<a href="https://colab.research.google.com/github/simulate111/Climatic_Data/blob/main/turku_finland_meteorological_data_FMI_hourly.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
import time
from datetime import datetime, timedelta

# --- CONFIGURATION ---
FMISID = "100949"  # Turku Artukainen
START_DATE = "2024-01-01"
END_DATE = "2025-01-01"

# Tasks: weather + radiation
tasks = [
    {
        "name": "Weather",
        "stored_query": "fmi::observations::weather::simple",
        "parameters": "t2m,ws_10min",
        "col_map": {
            "t2m": "Temperature_C",
            "ws_10min": "Wind_Speed_ms"
        }
    },
    {
        "name": "Solar",
        "stored_query": "fmi::observations::radiation::simple",
        "parameters": "GLOB_1MIN",
        "col_map": {
            "GLOB_1MIN": "Global_Radiation_W/m2"
        }
    }
]

# --- HELPER: API TIME CHUNKS (7 days max per request) ---
def get_chunks(start_str, end_str):
    start = datetime.strptime(start_str, "%Y-%m-%d")
    end = datetime.strptime(end_str, "%Y-%m-%d")
    chunks = []
    curr = start
    while curr < end:
        nxt = min(curr + timedelta(days=7), end)
        chunks.append((
            curr.strftime('%Y-%m-%dT%H:%M:%SZ'),
            nxt.strftime('%Y-%m-%dT%H:%M:%SZ')
        ))
        curr = nxt
    return chunks

chunks = get_chunks(START_DATE, END_DATE)

print(f"Fetching hourly data for Turku (FMISID: {FMISID})...")

# --- FETCH FUNCTION ---
def fetch_fmi_data(task):
    all_rows = []
    print(f" > Requesting {task['name']}...")

    for start, end in chunks:
        url = "http://opendata.fmi.fi/wfs"
        params = {
            "service": "WFS",
            "version": "2.0.0",
            "request": "getFeature",
            "storedquery_id": task['stored_query'],
            "fmisid": FMISID,
            "parameters": task['parameters'],
            "starttime": start,
            "endtime": end
        }

        try:
            r = requests.get(url, params=params)
            if r.status_code == 200:
                root = ET.fromstring(r.content)

                for member in root:
                    for element in member.iter():
                        if 'BsWfsElement' in element.tag:
                            timestamp = None
                            param = None
                            val = None

                            for child in element:
                                if 'Time' in child.tag:
                                    timestamp = child.text
                                elif 'ParameterName' in child.tag:
                                    param = child.text
                                elif 'ParameterValue' in child.tag:
                                    val = child.text

                            if val == 'NaN':
                                continue

                            try:
                                val = float(val)
                            except:
                                continue

                            friendly_name = task['col_map'].get(param)
                            if timestamp and friendly_name:
                                all_rows.append({
                                    "Time": timestamp,
                                    "Type": friendly_name,
                                    "Value": val
                                })

        except Exception as e:
            print(f"Error: {e}")

        time.sleep(0.1)

    if not all_rows:
        return pd.DataFrame()

    df = pd.DataFrame(all_rows)
    df["Time"] = pd.to_datetime(df["Time"])
    df = df.pivot_table(index="Time", columns="Type", values="Value")

    # ---- KEY STEP: RESAMPLE TO HOURLY ----
    df = df.resample("1H").mean()

    return df

# --- EXECUTION ---
dfs = []

for task in tasks:
    df = fetch_fmi_data(task)
    if not df.empty:
        dfs.append(df)

if not dfs:
    print("Failed to retrieve data.")
    exit()

print("Merging datasets...")

df_final = pd.concat(dfs, axis=1).sort_index()

# Ensure perfect hourly timeline
full_range = pd.date_range(start=START_DATE, end=END_DATE, freq="1H", tz="UTC")
df_final = df_final.reindex(full_range)

# Fill missing values smoothly
df_final = df_final.interpolate(method="linear", limit_direction="both")

# Format output
df_final = df_final.reset_index().rename(columns={"index": "Time"})
df_final["Date"] = df_final["Time"].dt.strftime("%Y-%m-%d")
df_final["Hour"] = df_final["Time"].dt.strftime("%H:00")

cols = [
    "Date",
    "Hour",
    "Temperature_C",
    "Global_Radiation_W/m2",
    "Wind_Speed_ms"
]

df_final = df_final[[c for c in cols if c in df_final.columns]]

# Trim to requested year
df_final = df_final[df_final["Date"].str.startswith("2024")]

print(df_final.head())
print(f"Total hourly rows: {len(df_final)}")

filename = "Turku_Finland_2024_Hourly.csv"
df_final.to_csv(filename, index=False)

print(f"\nSuccess! Saved to {filename}")

Fetching hourly data for Turku (FMISID: 100949)...
 > Requesting Weather...


  df = df.resample("1H").mean()


 > Requesting Solar...


  df = df.resample("1H").mean()
  full_range = pd.date_range(start=START_DATE, end=END_DATE, freq="1H", tz="UTC")


Merging datasets...
Type        Date   Hour  Temperature_C  Global_Radiation_W/m2  Wind_Speed_ms
0     2024-01-01  00:00     -15.850000                    0.0            2.2
1     2024-01-01  01:00     -16.216667                    0.0            2.2
2     2024-01-01  02:00     -16.366667                    0.0            2.2
3     2024-01-01  03:00     -16.316667                    0.0            2.2
4     2024-01-01  04:00     -16.116667                    0.0            2.2
Total hourly rows: 8784

Success! Saved to Turku_Finland_2024_Hourly.csv
