<a href="https://colab.research.google.com/github/simulate111/Climatic_Data/blob/main/Stockholm_Sweden_meteorological_data_SMHI(2015_2024)10minutes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import pandas as pd
import io

# --- CONFIGURATION ---
START_DATE = "2015-01-01"
END_DATE = "2024-12-31"

# COORDINATES (Stockholm)
LAT = "59.3417"
LON = "18.0549"

# TASKS
tasks = [
    # 1. Temperature: Observatoriekullen (Station 98230)
    {"station": "98230", "param": "1", "name": "Air_Temperature_C"},
    # 2. Wind: Bromma Airport (Station 97200)
    {"station": "97200", "param": "4", "name": "Wind_Speed_ms"}
]

def fetch_smhi_data(task):
    """Fetches observed data and interpolates to 10-minute intervals."""
    print(f" > Fetching {task['name']} from Station {task['station']}...")
    combined_df = pd.DataFrame()

    # A. Try Archive (CSV)
    url_csv = f"https://opendata-download-metobs.smhi.se/api/version/1.0/parameter/{task['param']}/station/{task['station']}/period/corrected-archive/data.csv"
    try:
        r = requests.get(url_csv)
        if r.status_code == 200:
            content = r.text
            skip_rows = 0
            for i, line in enumerate(content.splitlines()[:50]):
                if "Datum" in line or "Date" in line:
                    skip_rows = i
                    break

            df = pd.read_csv(io.StringIO(content), sep=';', skiprows=skip_rows,
                             usecols=[0,1,2], names=['d', 't', 'v'], dtype=str)

            df['Time'] = pd.to_datetime(df['d'] + ' ' + df['t'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
            df['Value'] = pd.to_numeric(df['v'], errors='coerce')
            combined_df = pd.concat([combined_df, df[['Time', 'Value']]])
    except Exception as e: print(f"   Error fetching archive: {e}")

    # B. Try Latest (JSON)
    url_json = f"https://opendata-download-metobs.smhi.se/api/version/1.0/parameter/{task['param']}/station/{task['station']}/period/latest-months/data.json"
    try:
        r = requests.get(url_json)
        if r.status_code == 200:
            data = r.json()
            records = [{'Time': pd.to_datetime(i['date'], unit='ms'), 'Value': float(i['value'])} for i in data['value']]
            combined_df = pd.concat([combined_df, pd.DataFrame(records)])
    except Exception: pass

    # C. Process: Resample to 10 Minutes and Interpolate
    if not combined_df.empty:
        combined_df = combined_df.dropna(subset=['Time']).sort_values('Time').drop_duplicates(subset=['Time'])
        combined_df = combined_df.set_index('Time')

        # CHANGED: Resample to 10min and interpolate to fill gaps between hours
        combined_df = combined_df.resample('10min').mean().interpolate(method='time', limit=6)

        combined_df.index = combined_df.index.tz_localize(None)
        return combined_df.rename(columns={'Value': task['name']})

    return pd.DataFrame()

def fetch_strang_solar_multiyear(lat, lon, start, end):
    """Fetches Hourly Solar and interpolates to 10-minute intervals."""
    print(" > Fetching Solar Data (STRÅNG)... This may take a moment.")
    all_solar = []

    start_dt = pd.to_datetime(start)
    end_dt = pd.to_datetime(end)
    years = pd.date_range(start=start_dt, end=end_dt, freq='YS')

    for yr in years:
        y_start = yr.strftime('%Y-%m-%d')
        y_end = (yr + pd.offsets.YearEnd(0)).strftime('%Y-%m-%d')
        print(f"   ...fetching solar for {yr.year}")

        url = f"https://opendata-download-metanalys.smhi.se/api/category/strang1g/version/1/geotype/point/lon/{lon}/lat/{lat}/parameter/117/data.json"
        params = {'from': y_start.replace("-", ""), 'to': y_end.replace("-", ""), 'interval': 'hourly'}

        try:
            r = requests.get(url, params=params)
            if r.status_code == 200:
                data = r.json()
                records = [{'Time': pd.to_datetime(i['date_time']), 'Global_Solar_Wm2': i['value']} for i in data]
                df = pd.DataFrame(records)
                all_solar.append(df)
        except Exception: pass

    if all_solar:
        full_df = pd.concat(all_solar)
        full_df['Time'] = full_df['Time'].dt.tz_localize(None)
        full_df = full_df.set_index('Time')

        # CHANGED: Resample to 10min and interpolate
        full_df = full_df.resample('10min').mean().interpolate(method='time', limit=6)
        return full_df

    return pd.DataFrame()

# --- MAIN EXECUTION ---

# 1. Fetch Data
dfs = []
for task in tasks:
    dfs.append(fetch_smhi_data(task))

dfs.append(fetch_strang_solar_multiyear(LAT, LON, START_DATE, END_DATE))

print("Merging data...")
df_raw = pd.concat(dfs, axis=1)

# 2. Filter strict range
mask = (df_raw.index >= pd.Timestamp(START_DATE)) & (df_raw.index <= pd.Timestamp(f"{END_DATE} 23:59:59"))
df_raw = df_raw.loc[mask]

# 3. Cleanup (Interpolate small gaps in the merged set)
df_raw = df_raw.interpolate(method='time', limit=3)
if 'Global_Solar_Wm2' in df_raw.columns:
    df_raw['Global_Solar_Wm2'] = df_raw['Global_Solar_Wm2'].fillna(0).clip(lower=0)

# 4. Calculate 10-Year 10-Minute Average
print("Calculating 10-year 10-Minute Averages...")

# CHANGED: Group by Month, Day, Hour, AND Minute
grouped = df_raw.groupby([df_raw.index.month, df_raw.index.day, df_raw.index.hour, df_raw.index.minute]).mean()
grouped.index.names = ['Month', 'Day', 'Hour', 'Minute']
df_avg = grouped.reset_index()

# Create dummy timestamp for display (Using 2024 for Leap Year support)
# CHANGED: Added Minute to timestamp construction
df_avg['Dummy_Timestamp'] = pd.to_datetime(
    '2024-' + df_avg['Month'].astype(str) + '-' + df_avg['Day'].astype(str) + ' ' +
    df_avg['Hour'].astype(str) + ':' + df_avg['Minute'].astype(str) + ':00',
    errors='coerce'
)
df_avg = df_avg.dropna(subset=['Dummy_Timestamp']).sort_values('Dummy_Timestamp')

# Format Output
df_avg['Display_Date'] = df_avg['Dummy_Timestamp'].dt.strftime('%m-%d')
df_avg['Display_Hour'] = df_avg['Dummy_Timestamp'].dt.strftime('%H:%M')
final_output = df_avg[['Display_Date', 'Display_Hour', 'Air_Temperature_C', 'Wind_Speed_ms', 'Global_Solar_Wm2']]

print(f"Success! Created {len(final_output)} rows (10-minute resolution).")
print(final_output.head())
final_output.to_csv("Stockholm_10Year_10Min_Average_2015-2024.csv", index=False)

 > Fetching Air_Temperature_C from Station 98230...
 > Fetching Wind_Speed_ms from Station 97200...
 > Fetching Solar Data (STRÅNG)... This may take a moment.
   ...fetching solar for 2015
   ...fetching solar for 2016
   ...fetching solar for 2017
   ...fetching solar for 2018
   ...fetching solar for 2019
   ...fetching solar for 2020
   ...fetching solar for 2021
   ...fetching solar for 2022
   ...fetching solar for 2023
   ...fetching solar for 2024
Merging data...
Calculating 10-year 10-Minute Averages...
Success! Created 52704 rows (10-minute resolution).
  Display_Date Display_Hour  Air_Temperature_C  Wind_Speed_ms  \
0        01-01        00:00              2.790       3.100000   
1        01-01        00:10              2.795       3.150000   
2        01-01        00:20              2.800       3.200000   
3        01-01        00:30              2.805       3.250000   
4        01-01        00:40              2.810       3.444444   

   Global_Solar_Wm2  
0               0.