<a href="https://colab.research.google.com/github/simulate111/General/blob/main/Sweden_Stockholm_Meteorological_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
import requests
import pandas as pd
import io

# --- CONFIGURATION ---
START_DATE = "2024-01-01"
END_DATE = "2024-12-31"

# GEOGRAPHY (Stockholm Observatoriekullen)
LAT = "59.3417"
LON = "18.0549"

# TASKS
# We mix stations to get the best available data for Stockholm City
tasks = [
    # 1. Temperature from Observatoriekullen (City Center)
    {"station": "98230", "param": "1", "name": "Air_Temperature_C"},

    # 2. Wind from Bromma (Nearest Reliable Wind Station, ~7km away)
    # Observatoriekullen (98230) does not report reliable wind in 2024.
    {"station": "97200", "param": "4", "name": "Wind_Speed_ms"}
]

def fetch_station_data(task):
    """Fetches hourly data for a specific station and parameter."""
    print(f" > Fetching {task['name']} from Station {task['station']}...")
    combined_df = pd.DataFrame()

    # 1. Archive (CSV) - Jan to approx Aug/Sept
    url_csv = f"https://opendata-download-metobs.smhi.se/api/version/1.0/parameter/{task['param']}/station/{task['station']}/period/corrected-archive/data.csv"
    try:
        r = requests.get(url_csv)
        if r.status_code == 200:
            content = r.text
            # Find header
            skip_rows = 0
            for i, line in enumerate(content.splitlines()[:50]):
                if "Datum" in line or "Date" in line:
                    skip_rows = i
                    break

            # Read columns [0,1,2] -> [Date, Time, Value]
            df = pd.read_csv(io.StringIO(content), sep=';', skiprows=skip_rows, dtype=str, usecols=[0,1,2])
            df.columns = ['d', 't', 'v']
            df['Time'] = pd.to_datetime(df['d'] + ' ' + df['t'], errors='coerce')
            df['Value'] = pd.to_numeric(df['v'], errors='coerce')

            combined_df = pd.concat([combined_df, df[['Time', 'Value']]])
    except Exception: pass

    # 2. Latest (JSON) - Aug/Sept to Dec
    url_json = f"https://opendata-download-metobs.smhi.se/api/version/1.0/parameter/{task['param']}/station/{task['station']}/period/latest-months/data.json"
    try:
        r = requests.get(url_json)
        if r.status_code == 200:
            data = r.json()
            if 'value' in data:
                records = [{'Time': pd.to_datetime(i['date'], unit='ms'), 'Value': float(i['value'])} for i in data['value']]
                combined_df = pd.concat([combined_df, pd.DataFrame(records)])
    except Exception: pass

    # 3. Clean and Resample
    if not combined_df.empty:
        # Drop invalid times
        combined_df = combined_df.dropna(subset=['Time'])

        # Sort and deduplicate
        combined_df = combined_df.sort_values('Time').drop_duplicates(subset=['Time'])

        # Resample to Hourly Mean
        combined_df = combined_df.set_index('Time').resample('h').mean()

        # Remove Timezone to ensure clean merge
        if combined_df.index.tz is not None:
             combined_df.index = combined_df.index.tz_localize(None)

        return combined_df.rename(columns={'Value': task['name']})

    print(f"    ! Warning: No data found for {task['name']}")
    return pd.DataFrame()

def fetch_strang_solar(lat, lon, start, end):
    """Fetches Solar from STRÅNG Model (100% Coverage)"""
    print(f" > Fetching Global_Horizon_Irradiation_Wm2 from STRÅNG Model...")
    url = f"https://opendata-download-metanalys.smhi.se/api/category/strang1g/version/1/geotype/point/lon/{lon}/lat/{lat}/parameter/117/data.json"
    params = {'from': start.replace("-", ""), 'to': end.replace("-", ""), 'interval': 'hourly'}

    try:
        r = requests.get(url, params=params)
        if r.status_code == 200:
            data = r.json()
            records = [{'Time': pd.to_datetime(i['date_time']), 'Global_Horizon_Irradiation_Wm2': i['value']} for i in data]
            df = pd.DataFrame(records)
            df['Time'] = df['Time'].dt.tz_localize(None) # Remove TZ
            return df.set_index('Time').resample('h').mean()
    except Exception: pass
    return pd.DataFrame()

# --- MAIN RUN ---
dfs = []

# Fetch Station Data (Temp & Wind)
for task in tasks:
    dfs.append(fetch_station_data(task))

# Fetch Solar Data
dfs.append(fetch_strang_solar(LAT, LON, START_DATE, END_DATE))

# Merge
print("Merging data...")
df_final = pd.concat(dfs, axis=1)

# Filter 2024
mask = (df_final.index >= pd.Timestamp(START_DATE)) & (df_final.index <= pd.Timestamp(f"{END_DATE} 23:59:59"))
df_final = df_final.loc[mask]

# Interpolate small gaps (Linear)
df_final = df_final.interpolate(method='time', limit=2)

# Fill Solar NaNs with 0
if 'Global_Horizon_Irradiation_Wm2' in df_final.columns:
    df_final['Global_Horizon_Irradiation_Wm2'] = df_final['Global_Horizon_Irradiation_Wm2'].fillna(0)
    df_final.loc[df_final['Global_Horizon_Irradiation_Wm2'] < 0, 'Global_Horizon_Irradiation_Wm2'] = 0

# Final Cleanup
df_final = df_final.reset_index()
df_final['Date'] = df_final['Time'].dt.strftime('%Y-%m-%d')
df_final['Hour'] = df_final['Time'].dt.strftime('%H:%M')

cols = ['Date', 'Hour', 'Air_Temperature_C', 'Wind_Speed_ms', 'Global_Horizon_Irradiation_Wm2']
df_final = df_final[[c for c in cols if c in df_final.columns]]

print(f"Success! Retrieved {len(df_final)} rows.")
print(df_final.head())
df_final.to_csv("Stockholm_Weather_2024_1H.csv", index=False)

 > Fetching Air_Temperature_C from Station 98230...
 > Fetching Wind_Speed_ms from Station 97200...
 > Fetching Global_Horizon_Irradiation_Wm2 from STRÅNG Model...
Merging data...
Success! Retrieved 8784 rows.
         Date   Hour  Air_Temperature_C  Wind_Speed_ms  \
0  2024-01-01  00:00               -2.2            4.0   
1  2024-01-01  01:00               -2.0            4.0   
2  2024-01-01  02:00               -1.9            5.0   
3  2024-01-01  03:00               -1.7            5.0   
4  2024-01-01  04:00               -1.5            4.0   

   Global_Horizon_Irradiation_Wm2  
0                             0.0  
1                             0.0  
2                             0.0  
3                             0.0  
4                             0.0  
