<a href="https://colab.research.google.com/github/simulate111/Climatic_Data/blob/main/Turku_finland_meteorological_institute_FMI(2015_2024).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
import time
from datetime import datetime, timedelta, UTC

# --- CONFIGURATION ---
STATION_ID = "100949"  # Turku Artukainen
YEARS = range(2015, 2025)  # 2015 to 2024 inclusive
OUTPUT_FILE = "Turku_Artukainen_10Yr_Averages.csv"

# Simplified tasks: No Solar
tasks = [
    {
        "name": "Weather",
        "query": "fmi::observations::weather::simple",
        "params": "t2m,ws_10min",
        "map": {"t2m": "Temperature_C", "ws_10min": "Wind_Speed_ms"}
    }
]

def get_chunks(start_date, end_date):
    s = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=UTC)
    e = datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=UTC)
    chunks = []
    curr = s
    while curr < e:
        nxt = min(curr + timedelta(days=7), e)
        chunks.append((curr.strftime('%Y-%m-%dT%H:%M:%SZ'), nxt.strftime('%Y-%m-%dT%H:%M:%SZ')))
        curr = nxt
    return chunks

all_years_data = []

# --- FETCHING LOOP ---
for year in YEARS:
    print(f"\n--- Fetching Year {year} ---")
    start_str = f"{year}-01-01"
    end_str = f"{year+1}-01-01"
    chunks = get_chunks(start_str, end_str)

    all_rows = []
    for i, (start_ch, end_ch) in enumerate(chunks):
        if i % 15 == 0: print(f"  Progress: {i}/{len(chunks)} chunks")

        params = {
            "service": "WFS", "version": "2.0.0", "request": "getFeature",
            "storedquery_id": tasks[0]['query'],
            "fmisid": STATION_ID,
            "parameters": tasks[0]['params'],
            "starttime": start_ch, "endtime": end_ch
        }

        try:
            r = requests.get("http://opendata.fmi.fi/wfs", params=params, timeout=30)
            if r.status_code == 200:
                root = ET.fromstring(r.content)
                ns = {'wfs': 'http://www.opengis.net/wfs/2.0', 'BsWfs': 'http://xml.fmi.fi/schema/wfs/2.0'}

                for member in root.findall('.//wfs:member', ns):
                    elm = member.find('.//BsWfs:BsWfsElement', ns)
                    if elm is not None:
                        t = elm.find('BsWfs:Time', ns).text
                        p = elm.find('BsWfs:ParameterName', ns).text
                        v_node = elm.find('BsWfs:ParameterValue', ns)
                        try:
                            v = float(v_node.text) if v_node is not None and v_node.text != 'NaN' else None
                            if v is not None:
                                all_rows.append({'Time': t, 'Type': tasks[0]['map'][p], 'Value': v})
                        except: continue
        except Exception as e:
            print(f"  Error in chunk: {e}")
        time.sleep(0.05)

    if all_rows:
        df_year = pd.DataFrame(all_rows)
        df_year['Time'] = pd.to_datetime(df_year['Time'])
        df_year = df_year.pivot_table(index='Time', columns='Type', values='Value')
        # Fill minor gaps up to 1 hour
        df_year = df_year.interpolate(method='linear', limit=6)
        all_years_data.append(df_year.reset_index())

# --- PROCESSING AVERAGES ---
if all_years_data:
    master_df = pd.concat(all_years_data)

    # 1. Remove Leap Year (Feb 29)
    master_df = master_df[~((master_df['Time'].dt.month == 2) & (master_df['Time'].dt.day == 29))]

    # 2. Add Keys
    master_df['Month'] = master_df['Time'].dt.month
    master_df['Day'] = master_df['Time'].dt.day
    master_df['Hour'] = master_df['Time'].dt.hour

    # 3. Group by Month, Day, Hour to get the 10-year Average
    climate_avg = master_df.groupby(['Month', 'Day', 'Hour']).mean(numeric_only=True).reset_index()

    # 4. Final Sort and Save
    climate_avg = climate_avg.sort_values(['Month', 'Day', 'Hour'])
    climate_avg.to_csv(OUTPUT_FILE, index=False)

    print(f"\nSuccess! File saved as {OUTPUT_FILE}")
    print(f"Final dataset contains {len(climate_avg)} rows (8760 = full year).")
    print(climate_avg.head())
else:
    print("No data retrieved.")


--- Fetching Year 2015 ---
  Progress: 0/53 chunks
  Progress: 15/53 chunks
  Progress: 30/53 chunks
  Progress: 45/53 chunks

--- Fetching Year 2016 ---
  Progress: 0/53 chunks
  Progress: 15/53 chunks
  Progress: 30/53 chunks
  Progress: 45/53 chunks

--- Fetching Year 2017 ---
  Progress: 0/53 chunks
  Progress: 15/53 chunks
  Progress: 30/53 chunks
  Progress: 45/53 chunks

--- Fetching Year 2018 ---
  Progress: 0/53 chunks
  Progress: 15/53 chunks
  Progress: 30/53 chunks
  Progress: 45/53 chunks

--- Fetching Year 2019 ---
  Progress: 0/53 chunks
  Progress: 15/53 chunks
  Progress: 30/53 chunks
  Progress: 45/53 chunks

--- Fetching Year 2020 ---
  Progress: 0/53 chunks
  Progress: 15/53 chunks
  Progress: 30/53 chunks
  Progress: 45/53 chunks

--- Fetching Year 2021 ---
  Progress: 0/53 chunks
  Progress: 15/53 chunks
  Progress: 30/53 chunks
  Progress: 45/53 chunks

--- Fetching Year 2022 ---
  Progress: 0/53 chunks
  Progress: 15/53 chunks
  Progress: 30/53 chunks
  Progres