<a href="https://colab.research.google.com/github/simulate111/General/blob/main/Norway_Oslo_Meteorological_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import requests
import pandas as pd
import time

# --- CONFIGURATION ---
client_id = '95b97914-f898-42d6-b1de-b1557d61357d'

# 1. DEFINE STATIONS
station_blindern = 'SN18700'
target_name = "Sofienberg"
fallback_station = "SN18210" # Oslo - Hovin

# 2. HELPER: FIND STATION ID
def find_station(name):
    print(f"Searching for station named '{name}'...")
    try:
        r = requests.get(
            'https://frost.met.no/sources/v0.jsonld',
            {'name': name},
            auth=(client_id, '')
        )
        if r.status_code == 200:
            data = r.json()['data']
            if data:
                found_id = data[0]['id']
                found_name = data[0]['name']
                print(f"   > Found: {found_name} ({found_id})")
                return found_id
    except:
        pass
    print(f"   > Station '{name}' not found. Using fallback: {fallback_station}")
    return fallback_station

# 3. HELPER: DOWNLOAD & PROCESS RAW DATA
def get_hourly_data(station_id, elements, name_map):
    all_rows = []
    # Monthly chunks
    months = [
        ('2024-01-01', '2024-02-01'), ('2024-02-01', '2024-03-01'),
        ('2024-03-01', '2024-04-01'), ('2024-04-01', '2024-05-01'),
        ('2024-05-01', '2024-06-01'), ('2024-06-01', '2024-07-01'),
        ('2024-07-01', '2024-08-01'), ('2024-08-01', '2024-09-01'),
        ('2024-09-01', '2024-10-01'), ('2024-10-01', '2024-11-01'),
        ('2024-11-01', '2024-12-01'), ('2024-12-01', '2025-01-01')
    ]

    print(f"Fetching {list(name_map.values())} from {station_id}...")

    for start, end in months:
        try:
            r = requests.get(
                'https://frost.met.no/observations/v0.jsonld',
                {'sources': station_id, 'elements': elements, 'referencetime': f"{start}/{end}"},
                auth=(client_id, '')
            )
            if r.status_code == 200:
                data = r.json().get('data', [])
                for item in data:
                    ts = item['referenceTime']
                    row = {'Time': ts}
                    # Extract values
                    for obs in item['observations']:
                        for k, v in name_map.items():
                            if k in obs['elementId']:
                                row[v] = obs['value']
                    all_rows.append(row)
        except Exception as e:
            print(f"   Error fetching chunk {start}: {e}")
        time.sleep(0.2)

    # Process to DataFrame
    if all_rows:
        df = pd.DataFrame(all_rows)
        df['Time'] = pd.to_datetime(df['Time'])

        # --- SAFETY CHECK (The Fix) ---
        # Ensure all requested columns exist, even if data was missing
        for target_col in name_map.values():
            if target_col not in df.columns:
                print(f"   WARNING: Column '{target_col}' missing for {station_id}. Filling with 0.")
                df[target_col] = 0.0

        # Resample to Hourly Mean
        df = df.set_index('Time').resample('h').mean()
        return df
    else:
        print(f"   Warning: No data found for {station_id}")
        # Return empty DataFrame with expected columns to prevent crashes
        return pd.DataFrame(columns=['Time'] + list(name_map.values()))

# --- MAIN EXECUTION ---

# Step A: Find the Temperature Station
station_temp_id = find_station(target_name)

# Step B: Download Temperature (From Sofienberg/Hovin)
df_temp = get_hourly_data(
    station_temp_id,
    'air_temperature',
    {'air_temperature': 'Temperature_C'}
)

# Step C: Download Wind & Radiation (From Blindern)
df_blindern = get_hourly_data(
    station_blindern,
    'wind_speed,surface_downwelling_shortwave_flux_in_air',
    {
        'wind_speed': 'Wind_Speed_ms',
        'surface_downwelling_shortwave_flux_in_air': 'Global_Radiation_Wm2'
    }
)

# Step D: Merge & Format
if not df_temp.empty and not df_blindern.empty:
    print("Merging datasets...")
    df_final = pd.concat([df_temp, df_blindern], axis=1)

    # Cleanup
    # 1. Fill missing columns if they appeared as NaNs during merge
    if 'Global_Radiation_Wm2' not in df_final.columns:
         df_final['Global_Radiation_Wm2'] = 0.0

    df_final = df_final.interpolate(method='linear', limit=2)
    df_final['Global_Radiation_Wm2'] = df_final['Global_Radiation_Wm2'].fillna(0)
    df_final.loc[df_final['Global_Radiation_Wm2'] < 0, 'Global_Radiation_Wm2'] = 0
    df_final = df_final.dropna(how='all')

    # Format Date/Hour
    df_final = df_final.reset_index()
    df_final['Date'] = df_final['Time'].dt.strftime('%Y-%m-%d')
    df_final['Hour'] = df_final['Time'].dt.strftime('%H:%M')

    # Order Columns
    cols = ['Date', 'Hour', 'Temperature_C', 'Global_Radiation_Wm2', 'Wind_Speed_ms']
    # Select only columns that actually exist
    cols = [c for c in cols if c in df_final.columns]
    df_final = df_final[cols]

    print(df_final.head())

    filename = "Oslo_Hybrid_2024.csv"
    df_final.to_csv(filename, index=False)
    print(f"\nSuccess! Hybrid weather data saved to {filename}")
else:
    print("Failed to get data.")

Searching for station named 'Sofienberg'...
   > Station 'Sofienberg' not found. Using fallback: SN18210
Fetching ['Temperature_C'] from SN18210...
Fetching ['Wind_Speed_ms', 'Global_Radiation_Wm2'] from SN18700...
Merging datasets...
         Date   Hour  Temperature_C  Global_Radiation_Wm2  Wind_Speed_ms
0  2024-01-01  00:00           -4.1                   0.0       7.566667
1  2024-01-01  01:00           -4.1                   0.0       7.150000
2  2024-01-01  02:00           -4.3                   0.0       7.916667
3  2024-01-01  03:00           -4.6                   0.0       8.566667
4  2024-01-01  04:00           -4.9                   0.0       9.500000

Success! Hybrid weather data saved to Oslo_Hybrid_2024.csv
