<a href="https://colab.research.google.com/github/simulate111/General/blob/main/Sweden_Stockholm_Meteorological_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import requests
import pandas as pd
import io

# --- CONFIGURATION ---
start_date = "2024-01-01"
end_date = "2024-12-31"
lat = "59.33" # Stockholm Latitude
lon = "18.06" # Stockholm Longitude

# 1. OBSERVATION TASKS (Temp & Wind)
obs_tasks = [
    {"station": "98230", "param": "1",  "name": "Temperature_C"}, # Stockholm City
    {"station": "97200", "param": "4",  "name": "Wind_Speed_ms"}  # Bromma
]

def fetch_smhi_obs(task):
    """Fetches observed data (Temp/Wind) from Archive + Latest JSON"""
    print(f" > Fetching {task['name']} (Station {task['station']})...")
    combined_df = pd.DataFrame()

    # A. Try Archive (CSV)
    url_csv = f"https://opendata-download-metobs.smhi.se/api/version/1.0/parameter/{task['param']}/station/{task['station']}/period/corrected-archive/data.csv"
    try:
        r = requests.get(url_csv)
        if r.status_code == 200:
            content = r.text
            skip_rows = 0
            # Dynamic header finder
            for i, line in enumerate(content.splitlines()[:50]):
                if "Datum" in line or "Date" in line:
                    skip_rows = i
                    break

            # Read CSV cols [0,1,2] -> [Date, Time, Value]
            df = pd.read_csv(io.StringIO(content), sep=';', skiprows=skip_rows, dtype=str, usecols=[0,1,2])
            df.columns = ['d', 't', 'v']
            df['Time'] = pd.to_datetime(df['d'] + ' ' + df['t'], errors='coerce')
            df['Value'] = pd.to_numeric(df['v'], errors='coerce')
            combined_df = pd.concat([combined_df, df[['Time', 'Value']]])
    except Exception: pass

    # B. Try Latest (JSON)
    url_json = f"https://opendata-download-metobs.smhi.se/api/version/1.0/parameter/{task['param']}/station/{task['station']}/period/latest-months/data.json"
    try:
        r = requests.get(url_json)
        if r.status_code == 200:
            data = r.json()
            records = [{'Time': pd.to_datetime(i['date'], unit='ms'), 'Value': float(i['value'])} for i in data['value']]
            combined_df = pd.concat([combined_df, pd.DataFrame(records)])
    except Exception: pass

    if not combined_df.empty:
        # Sort and clean
        combined_df = combined_df.sort_values('Time').drop_duplicates(subset=['Time'])
        combined_df = combined_df.set_index('Time').resample('h').mean()

        # *** CRITICAL: Ensure index is Timezone Naive (to match STRÅNG) ***
        if combined_df.index.tz is not None:
             combined_df.index = combined_df.index.tz_localize(None)

        return combined_df.rename(columns={'Value': task['name']})
    return pd.DataFrame()

def fetch_strang_solar(lat, lon, start, end):
    """Fetches Solar Radiation from SMHI STRÅNG (Model)"""
    print(" > Fetching Global_Radiation_Wm2 (STRÅNG Model)...")

    # Param 117 = Global Irradiance (Hourly)
    url = f"https://opendata-download-metanalys.smhi.se/api/category/strang1g/version/1/geotype/point/lon/{lon}/lat/{lat}/parameter/117/data.json"
    params = {
        'from': start.replace("-", ""),
        'to': end.replace("-", ""),
        'interval': 'hourly'
    }

    try:
        r = requests.get(url, params=params)
        if r.status_code == 200:
            data = r.json()
            records = []
            for item in data:
                records.append({
                    'Time': pd.to_datetime(item['date_time']),
                    'Global_Radiation_Wm2': item['value']
                })
            df = pd.DataFrame(records)

            # *** CRITICAL FIX: Strip Timezone Info ***
            df['Time'] = df['Time'].dt.tz_localize(None)

            df = df.set_index('Time').resample('h').mean()
            return df
        else:
            print(f"    ! STRÅNG Error {r.status_code}")
    except Exception as e:
        print(f"    ! STRÅNG Exception: {e}")

    return pd.DataFrame()

# --- MAIN EXECUTION ---
dfs = []

# 1. Get Observations
for task in obs_tasks:
    dfs.append(fetch_smhi_obs(task))

# 2. Get Solar (STRÅNG)
df_solar = fetch_strang_solar(lat, lon, start_date, end_date)
dfs.append(df_solar)

# 3. Merge
print("Merging data...")
# Now both indexes are TZ-Naive, so this will work:
df_final = pd.concat(dfs, axis=1)

# Filter 2024
# Make sure boundaries are compatible timestamps
df_final = df_final[(df_final.index >= pd.Timestamp(start_date)) & (df_final.index <= pd.Timestamp(f"{end_date} 23:59:59"))]

# Cleanup
df_final = df_final.interpolate(method='time', limit=2) # Fill small gaps
df_final = df_final.fillna(0) # Remaining solar NaNs are 0 (night)

# Final Formatting
df_final = df_final.reset_index()
df_final['Date'] = df_final['Time'].dt.strftime('%Y-%m-%d')
df_final['Hour'] = df_final['Time'].dt.strftime('%H:%M')
cols = ['Date', 'Hour', 'Temperature_C', 'Global_Radiation_Wm2', 'Wind_Speed_ms']
df_final = df_final[cols]

print(f"Success! Got {len(df_final)} rows.")
print(df_final.head())
df_final.to_csv("Stockholm_2024_Final.csv", index=False)
print("Saved to Stockholm_2024_Final.csv")

 > Fetching Temperature_C (Station 98230)...
 > Fetching Wind_Speed_ms (Station 97200)...
 > Fetching Global_Radiation_Wm2 (STRÅNG Model)...
Merging data...
Success! Got 8784 rows.
         Date   Hour  Temperature_C  Global_Radiation_Wm2  Wind_Speed_ms
0  2024-01-01  00:00           -2.2                   0.0            4.0
1  2024-01-01  01:00           -2.0                   0.0            4.0
2  2024-01-01  02:00           -1.9                   0.0            5.0
3  2024-01-01  03:00           -1.7                   0.0            5.0
4  2024-01-01  04:00           -1.5                   0.0            4.0
Saved to Stockholm_2024_Final.csv
