<a href="https://colab.research.google.com/github/simulate111/General/blob/main/Turku_Finland_Meteorological_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
import time
from datetime import datetime, timedelta

# --- CONFIGURATION ---
FMISID = "100949"  # Turku Artukainen
START_DATE = "2024-01-01"
END_DATE = "2025-01-01"

# Define Tasks
# Solar is 1-min (needs resampling), Weather is 10-min
tasks = [
    {
        "name": "Weather",
        "stored_query": "fmi::observations::weather::simple",
        "parameters": "t2m,ws_10min",
        "col_map": {"t2m": "Temperature_C", "ws_10min": "Wind_Speed_ms"}
    },
    {
        "name": "Solar",
        "stored_query": "fmi::observations::radiation::simple",
        "parameters": "GLOB_1MIN",
        "col_map": {"GLOB_1MIN": "Global_Radiation_W/m2"}
    }
]

# Helper: Chunk dates to avoid API limits (7 days per chunk)
def get_chunks(start_str, end_str):
    start = datetime.strptime(start_str, "%Y-%m-%d")
    end = datetime.strptime(end_str, "%Y-%m-%d")
    chunks = []
    curr = start
    while curr < end:
        nxt = min(curr + timedelta(days=7), end)
        chunks.append((curr.strftime('%Y-%m-%dT%H:%M:%SZ'), nxt.strftime('%Y-%m-%dT%H:%M:%SZ')))
        curr = nxt
    return chunks

chunks = get_chunks(START_DATE, END_DATE)

print(f"Fetching 2024 Data for Turku (FMISID: {FMISID})...")

# --- FETCH FUNCTION ---
def fetch_fmi_data(task):
    all_rows = []
    print(f" > Requesting {task['name']}...")

    for start, end in chunks:
        url = "http://opendata.fmi.fi/wfs"
        params = {
            "service": "WFS",
            "version": "2.0.0",
            "request": "getFeature",
            "storedquery_id": task['stored_query'],
            "fmisid": FMISID,
            "parameters": task['parameters'],
            "starttime": start,
            "endtime": end
        }

        try:
            r = requests.get(url, params=params)
            if r.status_code == 200:
                root = ET.fromstring(r.content)
                for member in root:
                    for element in member.iter():
                        if 'BsWfsElement' in element.tag:
                            timestamp = None
                            param = None
                            val = None

                            for child in element:
                                if 'Time' in child.tag: timestamp = child.text
                                elif 'ParameterName' in child.tag: param = child.text
                                elif 'ParameterValue' in child.tag: val = child.text

                            # Handle NaNs from API
                            if val == 'NaN': val = None
                            else:
                                try: val = float(val)
                                except: val = None

                            if timestamp and param and val is not None:
                                friendly_name = task['col_map'].get(param)
                                if friendly_name:
                                    all_rows.append({'Time': timestamp, 'Type': friendly_name, 'Value': val})
        except Exception as e:
            print(f"    Error: {e}")
        time.sleep(0.1)

    if all_rows:
        df = pd.DataFrame(all_rows)
        df['Time'] = pd.to_datetime(df['Time'])
        # Pivot to get columns: Time | Temperature | Wind ...
        return df.pivot_table(index='Time', columns='Type', values='Value')
    else:
        return pd.DataFrame()

# --- EXECUTION ---
dfs = []
for task in tasks:
    df = fetch_fmi_data(task)
    if not df.empty:
        # If Solar (1-min), average it to 10-min blocks
        if "Global_Radiation_W/m2" in df.columns:
            print("   > Resampling Solar to 10-min averages...")
            df = df.resample('10min').mean()

        dfs.append(df)

# --- MERGE & INTERPOLATE ---
if dfs:
    print("Merging and filling gaps...")

    # 1. Outer Join (Keeps all timestamps)
    df_final = pd.concat(dfs, axis=1, join='outer')
    df_final = df_final.sort_index()

    # 2. CREATE A PERFECT TIME GRID (Optional but recommended)
    # This ensures even if API missed a timestamp entirely, we create a row for it
    full_range = pd.date_range(start=START_DATE, end=END_DATE, freq='10min', tz='UTC')
    df_final = df_final.reindex(full_range)

    # 3. LINEAR INTERPOLATION (The Fix)
    # limit_direction='both' fills missing values at the very start/end if needed
    # No 'limit' argument means it will fill infinite gaps.
    df_final = df_final.interpolate(method='linear', limit_direction='both')

    # 4. FORMATTING
    df_final = df_final.reset_index().rename(columns={'index': 'Time'})
    df_final['Date'] = df_final['Time'].dt.strftime('%Y-%m-%d')
    df_final['Hour'] = df_final['Time'].dt.strftime('%H:%M')

    cols = ['Date', 'Hour', 'Temperature_C', 'Global_Radiation_W/m2', 'Wind_Speed_ms']
    df_final = df_final[[c for c in cols if c in df_final.columns]]

    # Trim to exactly the requested year (reindexing might add one row at the end)
    df_final = df_final[df_final['Date'].str.startswith('2024')]

    print(df_final.head())
    print(f"Total Rows: {len(df_final)}")

    filename = "Turku_Finland_2024_Filled.csv"
    df_final.to_csv(filename, index=False)
    print(f"\nSuccess! Saved to {filename}")
else:
    print("Failed to retrieve data.")

Fetching 2024 Data for Turku (FMISID: 100949)...
 > Requesting Weather...
 > Requesting Solar...
   > Resampling Solar to 10-min averages...
Merging and filling gaps...
Type        Date   Hour  Temperature_C  Global_Radiation_W/m2  Wind_Speed_ms
0     2024-01-01  00:00          -15.8                    0.0            1.9
1     2024-01-01  00:10          -15.8                    0.0            1.9
2     2024-01-01  00:20          -15.7                    0.0            1.9
3     2024-01-01  00:30          -15.8                    0.0            1.9
4     2024-01-01  00:40          -15.9                    0.0            1.9
Total Rows: 52704

Success! Saved to Turku_Finland_2024_Filled.csv
