<a href="https://colab.research.google.com/github/simulate111/General/blob/main/Sweden_Stockholm_Meteorological_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import pandas as pd
import io

# --- CONFIGURATION ---
start_date = "2024-01-01"
end_date = "2025-01-01"

# 1. SETUP TASKS
# Param 1 = Temperature (Station 98230 - Stockholm City)
# Param 4 = Wind Speed (Station 97200 - Bromma Airport)
# Param 11 = Global Irradiance (Station 98230 - Stockholm City) **CORRECTED FROM 10**
tasks = [
    {"station": "98230", "param": "1",  "name": "Temperature_C"},
    {"station": "97200", "param": "4",  "name": "Wind_Speed_ms"},
    {"station": "98230", "param": "11", "name": "Global_Radiation_Wm2"}
]

dfs = []
print("Fetching 2024 Data (Reading SMHI CSV Archives)...")

for task in tasks:
    print(f"   > Requesting {task['name']} (Param {task['param']}) from Station {task['station']}...")

    # SMHI "Corrected Archive" is ONLY available as CSV, not JSON.
    url = f"https://opendata-download-metobs.smhi.se/api/version/1.0/parameter/{task['param']}/station/{task['station']}/period/corrected-archive/data.csv"

    try:
        r = requests.get(url)
        if r.status_code == 200:
            # SMHI CSVs have ~10 lines of metadata. We need to find the header row.
            # The header usually starts with "Datum" or "Date".
            content = r.text

            # Find the header row index dynamically
            header_row = 0
            lines = content.splitlines()
            for i, line in enumerate(lines[:20]): # Check first 20 lines
                if "Datum" in line or "Date" in line:
                    header_row = i
                    break

            # Read CSV using pandas
            # SMHI uses semicolon (;) separator
            df_raw = pd.read_csv(io.StringIO(content), sep=';', header=header_row)

            # Rename columns (SMHI headers are Swedish: 'Datum', 'Tid (UTC)', 'Lufttemperatur', etc.)
            # The 3rd column is usually the value
            val_col = df_raw.columns[2]

            # Parse Date and Time
            df_raw['Date'] = df_raw.iloc[:, 0] # 1st col is Date
            df_raw['TimeStr'] = df_raw.iloc[:, 1] # 2nd col is Time

            # Combine Date+Time
            df_raw['FullTime'] = pd.to_datetime(df_raw['Date'] + ' ' + df_raw['TimeStr'])

            # Extract Value
            df_clean = pd.DataFrame({
                'Time': df_raw['FullTime'],
                task['name']: df_raw[val_col]
            })

            # Filter for 2024
            mask = (df_clean['Time'] >= pd.Timestamp(start_date)) & (df_clean['Time'] <= pd.Timestamp(end_date))
            df_clean = df_clean.loc[mask]

            if not df_clean.empty:
                # Resample to hourly (averaging)
                df_clean = df_clean.set_index('Time').resample('h').mean()
                print(f"     Success: Got {len(df_clean)} hourly rows.")
                dfs.append(df_clean)
            else:
                print(f"     WARNING: Data downloaded, but 2024 range is empty.")
        else:
            print(f"     Error {r.status_code}: Could not download CSV (Check if Param/Station is valid).")

    except Exception as e:
        print(f"     Failed: {e}")

# --- MERGE & FORMAT ---
if dfs:
    print("Merging datasets...")
    df_final = pd.concat(dfs, axis=1)
    df_final = df_final.sort_index()

    # Fill gaps
    df_final = df_final.interpolate(method='linear', limit=2)

    if 'Global_Radiation_Wm2' in df_final.columns:
        df_final['Global_Radiation_Wm2'] = df_final['Global_Radiation_Wm2'].fillna(0)
        df_final.loc[df_final['Global_Radiation_Wm2'] < 0, 'Global_Radiation_Wm2'] = 0

    df_final = df_final.dropna(how='all')

    # Formatting
    df_final = df_final.reset_index()
    df_final['Date'] = df_final['Time'].dt.strftime('%Y-%m-%d')
    df_final['Hour'] = df_final['Time'].dt.strftime('%H:%M')

    cols = ['Date', 'Hour', 'Temperature_C', 'Global_Radiation_Wm2', 'Wind_Speed_ms']
    cols = [c for c in cols if c in df_final.columns]
    df_final = df_final[cols]

    print(df_final.head())

    filename = "Stockholm_2024_Corrected.csv"
    df_final.to_csv(filename, index=False)
    print(f"Success! Saved to {filename}")
else:
    print("No data retrieved.")

Fetching 2024 Data (Reading SMHI CSV Archives)...
   > Requesting Temperature_C (Param 1) from Station 98230...


  df_raw = pd.read_csv(io.StringIO(content), sep=';', header=header_row)
  df_raw = pd.read_csv(io.StringIO(content), sep=';', header=header_row)


     Success: Got 8785 hourly rows.
   > Requesting Wind_Speed_ms (Param 4) from Station 97200...
     Success: Got 8785 hourly rows.
   > Requesting Wind_Speed_ms (Param 4) from Station 97200...


  df_raw = pd.read_csv(io.StringIO(content), sep=';', header=header_row)
  df_raw = pd.read_csv(io.StringIO(content), sep=';', header=header_row)


     Success: Got 8785 hourly rows.
   > Requesting Global_Radiation_Wm2 (Param 11) from Station 98230...
     Success: Got 8785 hourly rows.
   > Requesting Global_Radiation_Wm2 (Param 11) from Station 98230...
     Error 404: Could not download CSV (Check if Param/Station is valid).
Merging datasets...
         Date   Hour  Temperature_C  Wind_Speed_ms
0  2024-01-01  00:00           -2.2            4.0
1  2024-01-01  01:00           -2.0            4.0
2  2024-01-01  02:00           -1.9            5.0
3  2024-01-01  03:00           -1.7            5.0
4  2024-01-01  04:00           -1.5            4.0
Success! Saved to Stockholm_2024_Corrected.csv
     Error 404: Could not download CSV (Check if Param/Station is valid).
Merging datasets...
         Date   Hour  Temperature_C  Wind_Speed_ms
0  2024-01-01  00:00           -2.2            4.0
1  2024-01-01  01:00           -2.0            4.0
2  2024-01-01  02:00           -1.9            5.0
3  2024-01-01  03:00           -1.7        