<a href="https://colab.research.google.com/github/simulate111/General/blob/main/Sweden_Stockholm_Meteorological_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import requests
import pandas as pd
import io

# --- CONFIGURATION ---
START_DATE = "2024-01-01"
END_DATE = "2025-01-01"  # Updated as requested

# COORDINATES (Stockholm City - Observatoriekullen)
LAT = "59.3417"
LON = "18.0549"

# TASKS
tasks = [
    # 1. Temperature: Observatoriekullen (Station 98230) - Best for City Temp
    {"station": "98230", "param": "1", "name": "Air_Temperature_C"},

    # 2. Wind: Bromma Airport (Station 97200) - Best for Wind
    {"station": "97200", "param": "4", "name": "Wind_Speed_ms"}
]

def fetch_smhi_data(task):
    """Fetches observed data (Archive + Latest)"""
    print(f" > Fetching {task['name']} from Station {task['station']}...")
    combined_df = pd.DataFrame()

    # A. Try Archive (CSV)
    url_csv = f"https://opendata-download-metobs.smhi.se/api/version/1.0/parameter/{task['param']}/station/{task['station']}/period/corrected-archive/data.csv"
    try:
        r = requests.get(url_csv)
        if r.status_code == 200:
            content = r.text
            skip_rows = 0
            for i, line in enumerate(content.splitlines()[:50]):
                if "Datum" in line or "Date" in line:
                    skip_rows = i
                    break

            df = pd.read_csv(io.StringIO(content), sep=';', skiprows=skip_rows, dtype=str, usecols=[0,1,2])
            df.columns = ['d', 't', 'v']
            df['Time'] = pd.to_datetime(df['d'] + ' ' + df['t'], errors='coerce')
            df['Value'] = pd.to_numeric(df['v'], errors='coerce')
            combined_df = pd.concat([combined_df, df[['Time', 'Value']]])
    except Exception: pass

    # B. Try Latest (JSON)
    url_json = f"https://opendata-download-metobs.smhi.se/api/version/1.0/parameter/{task['param']}/station/{task['station']}/period/latest-months/data.json"
    try:
        r = requests.get(url_json)
        if r.status_code == 200:
            data = r.json()
            records = [{'Time': pd.to_datetime(i['date'], unit='ms'), 'Value': float(i['value'])} for i in data['value']]
            combined_df = pd.concat([combined_df, pd.DataFrame(records)])
    except Exception: pass

    # C. Process
    if not combined_df.empty:
        combined_df = combined_df.dropna(subset=['Time']).sort_values('Time').drop_duplicates(subset=['Time'])
        combined_df = combined_df.set_index('Time').resample('h').mean()
        if combined_df.index.tz is not None:
             combined_df.index = combined_df.index.tz_localize(None)
        return combined_df.rename(columns={'Value': task['name']})

    return pd.DataFrame()

def fetch_strang_solar(lat, lon, start, end):
    """Fetches Solar from STRÅNG Model"""
    print(" > Fetching Global_Horizon_Irradiation_Wm2 from STRÅNG Model...")
    url = f"https://opendata-download-metanalys.smhi.se/api/category/strang1g/version/1/geotype/point/lon/{lon}/lat/{lat}/parameter/117/data.json"
    params = {'from': start.replace("-", ""), 'to': end.replace("-", ""), 'interval': 'hourly'}

    try:
        r = requests.get(url, params=params)
        if r.status_code == 200:
            data = r.json()
            records = [{'Time': pd.to_datetime(i['date_time']), 'Global_Horizon_Irradiation_Wm2': i['value']} for i in data]
            df = pd.DataFrame(records)
            df['Time'] = df['Time'].dt.tz_localize(None)
            return df.set_index('Time').resample('h').mean()
    except Exception: pass
    return pd.DataFrame()

# --- MAIN EXECUTION ---
dfs = []
for task in tasks:
    dfs.append(fetch_smhi_data(task))

dfs.append(fetch_strang_solar(LAT, LON, START_DATE, END_DATE))

print("Merging data...")
df_final = pd.concat(dfs, axis=1)

# Filter Range (Includes all of 2025-01-01)
mask = (df_final.index >= pd.Timestamp(START_DATE)) & (df_final.index <= pd.Timestamp(f"{END_DATE} 23:59:59"))
df_final = df_final.loc[mask]

# Cleanup
df_final = df_final.interpolate(method='time', limit=2)
if 'Global_Horizon_Irradiation_Wm2' in df_final.columns:
    df_final['Global_Horizon_Irradiation_Wm2'] = df_final['Global_Horizon_Irradiation_Wm2'].fillna(0)
    df_final.loc[df_final['Global_Horizon_Irradiation_Wm2'] < 0, 'Global_Horizon_Irradiation_Wm2'] = 0

# Final Format
df_final = df_final.reset_index()
df_final['Date'] = df_final['Time'].dt.strftime('%Y-%m-%d')
df_final['Hour'] = df_final['Time'].dt.strftime('%H:%M')
df_final = df_final[['Date', 'Hour', 'Air_Temperature_C', 'Wind_Speed_ms', 'Global_Horizon_Irradiation_Wm2']]

print(f"Success! Retrieved {len(df_final)} rows.")
print(df_final.tail()) # Check the end to see 2025-01-01
df_final.to_csv("Stockholm_Weather_2024_Jan2025.csv", index=False)

 > Fetching Air_Temperature_C from Station 98230...
 > Fetching Wind_Speed_ms from Station 97200...
 > Fetching Global_Horizon_Irradiation_Wm2 from STRÅNG Model...
Merging data...
Success! Retrieved 8808 rows.
            Date   Hour  Air_Temperature_C  Wind_Speed_ms  \
8803  2025-01-01  19:00               -2.5            3.0   
8804  2025-01-01  20:00               -2.6            4.0   
8805  2025-01-01  21:00               -2.8            4.0   
8806  2025-01-01  22:00               -3.4            4.0   
8807  2025-01-01  23:00               -4.3            4.0   

      Global_Horizon_Irradiation_Wm2  
8803                             0.0  
8804                             0.0  
8805                             0.0  
8806                             0.0  
8807                             0.0  
