<a href="https://colab.research.google.com/github/simulate111/General/blob/main/Turku_Finland_Meteorological_Institute.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
import time
from datetime import datetime, timedelta, UTC

# --- CONFIGURATION ---
STATION_ID = "100949"  # Turku Artukainen
START_DATE = "2024-01-01"
END_DATE = "2025-01-01"

print(f"--- Fetching 2024 Data for Turku (FMISID: {STATION_ID}) ---")

# Define Fetch Tasks
tasks = [
    {
        "name": "Weather",
        "query": "fmi::observations::weather::simple",
        "params": "t2m,ws_10min",
        "map": {"t2m": "Temperature_C", "ws_10min": "Wind_Speed_ms"}
    },
    {
        "name": "Solar",
        "query": "fmi::observations::radiation::simple",
        "params": "GLOB_1MIN", # 1-minute resolution GHI
        "map": {"GLOB_1MIN": "GHI_Wm2"}
    }
]

# Helper: Chunk dates (7 days per chunk to stay within FMI limits)
def get_chunks(start, end):
    s = datetime.strptime(start, "%Y-%m-%d").replace(tzinfo=UTC)
    e = datetime.strptime(end, "%Y-%m-%d").replace(tzinfo=UTC)
    chunks = []
    curr = s
    while curr < e:
        nxt = min(curr + timedelta(days=7), e)
        chunks.append((curr.strftime('%Y-%m-%dT%H:%M:%SZ'), nxt.strftime('%Y-%m-%dT%H:%M:%SZ')))
        curr = nxt
    return chunks

chunks = get_chunks(START_DATE, END_DATE)

# --- FETCHING LOOP ---
dfs = []

for task in tasks:
    print(f"\n> Fetching {task['name']}...")
    all_rows = []

    total = len(chunks)
    for i, (start_str, end_str) in enumerate(chunks):
        # Progress bar
        if i % 10 == 0: print(f"   Chunk {i+1}/{total}...")

        url = "http://opendata.fmi.fi/wfs"
        params = {
            "service": "WFS", "version": "2.0.0", "request": "getFeature",
            "storedquery_id": task['query'],
            "fmisid": STATION_ID,
            "parameters": task['params'],
            "starttime": start_str, "endtime": end_str
        }

        try:
            r = requests.get(url, params=params)
            if r.status_code == 200:
                root = ET.fromstring(r.content)
                ns = {'wfs': 'http://www.opengis.net/wfs/2.0', 'BsWfs': 'http://xml.fmi.fi/schema/wfs/2.0'}

                for member in root.findall('.//wfs:member', ns):
                    elm = member.find('.//BsWfs:BsWfsElement', ns)
                    if elm is not None:
                        t = elm.find('BsWfs:Time', ns).text
                        p = elm.find('BsWfs:ParameterName', ns).text
                        v_node = elm.find('BsWfs:ParameterValue', ns)
                        v = v_node.text if v_node is not None else "NaN"

                        if v == 'NaN': v = None
                        else:
                            try: v = float(v)
                            except: v = None

                        if t and p and v is not None:
                            friendly = task['map'].get(p)
                            if friendly:
                                all_rows.append({'Time': t, 'Type': friendly, 'Value': v})
        except Exception as e:
            print(f"   Error in chunk {i}: {e}")
        time.sleep(0.1)

    # Process Data
    if all_rows:
        df = pd.DataFrame(all_rows)
        df['Time'] = pd.to_datetime(df['Time'])
        df = df.pivot_table(index='Time', columns='Type', values='Value')

        # If Solar, resample 1-min -> 10-min to match weather
        if 'GHI_Wm2' in df.columns:
            print("   > Resampling Solar (1-min) to 10-min averages...")
            df = df.resample('10min').mean()

        dfs.append(df)

# --- MERGE & SAVE ---
if dfs:
    print("\nMerging datasets...")
    df_final = pd.concat(dfs, axis=1).sort_index()

    # 1. Create perfect time grid (avoids missing rows)
    full_range = pd.date_range(START_DATE, END_DATE, freq='10min', tz='UTC')
    df_final = df_final.reindex(full_range)

    # 2. Interpolate gaps
    df_final = df_final.interpolate(method='linear', limit_direction='both')

    # 3. Clean Radiation (No negative values at night)
    if 'GHI_Wm2' in df_final.columns:
        df_final['GHI_Wm2'] = df_final['GHI_Wm2'].clip(lower=0).fillna(0)

    # 4. Format
    df_final = df_final.reset_index().rename(columns={'index': 'Time'})
    df_final['Date'] = df_final['Time'].dt.strftime('%Y-%m-%d')
    df_final['Hour'] = df_final['Time'].dt.strftime('%H:%M')

    # 5. Filter & Select Columns
    df_final = df_final[df_final['Date'].str.startswith('2024')]
    cols = ['Date', 'Hour', 'Temperature_C', 'Wind_Speed_ms', 'GHI_Wm2']
    df_final = df_final[[c for c in cols if c in df_final.columns]]

    print("-" * 30)
    print(df_final.head())
    print("-" * 30)

    filename = "Turku_Artukainen_2024.csv"
    df_final.to_csv(filename, index=False)
    print(f"Success! Saved to {filename}")
else:
    print("Failed to retrieve data.")

--- Fetching 2024 Data for Turku (FMISID: 100949) ---

> Fetching Weather...
   Chunk 1/53...
   Chunk 11/53...
   Chunk 21/53...
   Chunk 31/53...
   Chunk 41/53...
   Chunk 51/53...

> Fetching Solar...
   Chunk 1/53...
   Chunk 11/53...
   Chunk 21/53...
   Chunk 31/53...
   Chunk 41/53...
   Chunk 51/53...
   > Resampling Solar (1-min) to 10-min averages...

Merging datasets...
------------------------------
Type        Date   Hour  Temperature_C  Wind_Speed_ms  GHI_Wm2
0     2024-01-01  00:00          -15.8            1.9      0.0
1     2024-01-01  00:10          -15.8            1.9      0.0
2     2024-01-01  00:20          -15.7            1.9      0.0
3     2024-01-01  00:30          -15.8            1.9      0.0
4     2024-01-01  00:40          -15.9            1.9      0.0
------------------------------
Success! Saved to Turku_Artukainen_2024.csv
