<a href="https://colab.research.google.com/github/simulate111/Climatic_Data/blob/main/turku_finland_meteorological_institute_FMI(2015_2024)10minutes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
import time
from datetime import datetime, timedelta, timezone

# --- CONFIGURATION ---
STATION_ID = "100949"  # Turku Artukainen
LAT = "60.45"          # Turku Latitude
LON = "22.25"          # Turku Longitude

YEARS = range(2015, 2025)
OUTPUT_FILE = "Turku_10Yr_10Min_Averages_Hybrid.csv"

# --- PART 1: FMI WEATHER (Temp + Wind) ---
def get_fmi_chunks(start_date, end_date):
    s = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
    e = datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
    chunks = []
    curr = s
    while curr < e:
        nxt = min(curr + timedelta(days=7), e)
        chunks.append((curr.strftime('%Y-%m-%dT%H:%M:%SZ'), nxt.strftime('%Y-%m-%dT%H:%M:%SZ')))
        curr = nxt
    return chunks

def fetch_fmi_weather(year):
    print(f"   > Fetching FMI Weather (Temp/Wind)...", end=" ", flush=True)

    start_str = f"{year}-01-01"
    end_str = f"{year+1}-01-01"
    chunks = get_fmi_chunks(start_str, end_str)

    all_rows = []
    params = {
        "service": "WFS", "version": "2.0.0", "request": "getFeature",
        "storedquery_id": "fmi::observations::weather::simple",
        "fmisid": STATION_ID,
        "parameters": "t2m,ws_10min",
    }

    for i, (s_ch, e_ch) in enumerate(chunks):
        params["starttime"] = s_ch
        params["endtime"] = e_ch

        try:
            r = requests.get("http://opendata.fmi.fi/wfs", params=params, timeout=20)
            if r.status_code == 200:
                root = ET.fromstring(r.content)
                ns = {'wfs': 'http://www.opengis.net/wfs/2.0', 'BsWfs': 'http://xml.fmi.fi/schema/wfs/2.0'}
                for member in root.findall('.//wfs:member', ns):
                    elm = member.find('.//BsWfs:BsWfsElement', ns)
                    # FIX 1: Explicit 'is not None' check avoids DeprecationWarning
                    if elm is not None:
                        t = elm.find('BsWfs:Time', ns).text
                        p = elm.find('BsWfs:ParameterName', ns).text
                        v_node = elm.find('BsWfs:ParameterValue', ns)
                        try:
                            val = float(v_node.text)
                            p_name = "Temperature_C" if p == "t2m" else "Wind_Speed_ms"
                            all_rows.append({'Time': t, 'Type': p_name, 'Value': val})
                        except: continue
        except Exception: pass
        if i % 10 == 0: print(".", end="", flush=True)
        time.sleep(0.05)

    if all_rows:
        df = pd.DataFrame(all_rows)
        # FIX 2: Handle Timezones correctly
        # FMI returns UTC strings with 'Z'. We convert to datetime, then STRIP timezone to match STRÅNG.
        df['Time'] = pd.to_datetime(df['Time'])
        if df['Time'].dt.tz is not None:
             df['Time'] = df['Time'].dt.tz_convert(None)

        df = df.pivot_table(index='Time', columns='Type', values='Value', aggfunc='mean')

        # Resample to 10min
        df = df.resample('10min').mean().interpolate(limit=6)
        print(f" Done ({len(df)} rows)")
        return df
    print(" Failed.")
    return pd.DataFrame()

# --- PART 2: STRÅNG SOLAR (Model) ---
def fetch_strang_solar(year):
    print(f"   > Fetching STRÅNG Solar...", end=" ", flush=True)

    y_start = f"{year}-01-01"
    y_end = f"{year}-12-31"

    url = f"https://opendata-download-metanalys.smhi.se/api/category/strang1g/version/1/geotype/point/lon/{LON}/lat/{LAT}/parameter/117/data.json"
    params = {'from': y_start.replace("-", ""), 'to': y_end.replace("-", ""), 'interval': 'hourly'}

    try:
        r = requests.get(url, params=params, timeout=30)
        if r.status_code == 200:
            data = r.json()
            records = [{'Time': pd.to_datetime(i['date_time']), 'Global_Radiation_Wm2': i['value']} for i in data]
            df = pd.DataFrame(records)

            # STRÅNG is also UTC, we make it naive to match FMI
            if df['Time'].dt.tz is not None:
                df['Time'] = df['Time'].dt.tz_convert(None)
            else:
                 # If it parsed as naive but is actually UTC, just leave it as naive UTC
                 pass

            df = df.set_index('Time').sort_index()

            # UPSAMPLE: Hourly -> 10 Minute (Interpolated)
            df = df.resample('10min').interpolate(method='time')
            print(f" Done (Upsampled to {len(df)} rows)")
            return df
    except Exception as e: print(f"Error: {e}")
    return pd.DataFrame()

# --- MAIN EXECUTION ---
all_years_data = []

print(f"Starting Hybrid Extraction for Turku ({YEARS[0]}-{YEARS[-1]})...")

for year in YEARS:
    print(f"\n--- Processing Year {year} ---")

    # 1. Get FMI Weather
    df_weather = fetch_fmi_weather(year)

    # 2. Get STRÅNG Solar
    df_solar = fetch_strang_solar(year)

    # 3. Merge
    if not df_weather.empty and not df_solar.empty:
        # Both indices are now Timezone-Naive. Safe to merge.
        df_merged = pd.merge(df_weather, df_solar, left_index=True, right_index=True, how='inner')
        all_years_data.append(df_merged)

if all_years_data:
    print("\nMerging all years...")
    master_df = pd.concat(all_years_data)

    # Solar Cleanup
    if 'Global_Radiation_Wm2' in master_df.columns:
        master_df['Global_Radiation_Wm2'] = master_df['Global_Radiation_Wm2'].fillna(0).clip(lower=0)

    print("Calculating 10-Year 10-Minute Averages...")

    grouped = master_df.groupby([
        master_df.index.month,
        master_df.index.day,
        master_df.index.hour,
        master_df.index.minute
    ]).mean()

    grouped.index.names = ['Month', 'Day', 'Hour', 'Minute']
    df_avg = grouped.reset_index()

    # Dummy Timestamp for Sorting (using 2024)
    df_avg['Dummy_Timestamp'] = pd.to_datetime(
        '2024-' + df_avg['Month'].astype(str) + '-' + df_avg['Day'].astype(str) + ' ' +
        df_avg['Hour'].astype(str) + ':' + df_avg['Minute'].astype(str) + ':00',
        errors='coerce'
    )
    df_avg = df_avg.dropna(subset=['Dummy_Timestamp']).sort_values('Dummy_Timestamp')

    df_avg['Display_Date'] = df_avg['Dummy_Timestamp'].dt.strftime('%m-%d')
    df_avg['Display_Time'] = df_avg['Dummy_Timestamp'].dt.strftime('%H:%M')

    final_cols = ['Display_Date', 'Display_Time', 'Temperature_C', 'Wind_Speed_ms', 'Global_Radiation_Wm2']
    final_output = df_avg[[c for c in final_cols if c in df_avg.columns]]

    print(f"Success! Generated {len(final_output)} rows.")
    print(final_output.head())
    final_output.to_csv(OUTPUT_FILE, index=False)
else:
    print("No data retrieved.")

Starting Hybrid Extraction for Turku (2015-2024)...

--- Processing Year 2015 ---
   > Fetching FMI Weather (Temp/Wind)... ...... Done (52561 rows)
   > Fetching STRÅNG Solar...  Done (Upsampled to 52417 rows)

--- Processing Year 2016 ---
   > Fetching FMI Weather (Temp/Wind)... ...... Done (52705 rows)
   > Fetching STRÅNG Solar...  Done (Upsampled to 52561 rows)

--- Processing Year 2017 ---
   > Fetching FMI Weather (Temp/Wind)... ...... Done (52561 rows)
   > Fetching STRÅNG Solar...  Done (Upsampled to 52417 rows)

--- Processing Year 2018 ---
   > Fetching FMI Weather (Temp/Wind)... ...... Done (52561 rows)
   > Fetching STRÅNG Solar...  Done (Upsampled to 52417 rows)

--- Processing Year 2019 ---
   > Fetching FMI Weather (Temp/Wind)... ...... Done (52561 rows)
   > Fetching STRÅNG Solar...  Done (Upsampled to 52417 rows)

--- Processing Year 2020 ---
   > Fetching FMI Weather (Temp/Wind)... ...... Done (52705 rows)
   > Fetching STRÅNG Solar...  Done (Upsampled to 52561 rows)
