<a href="https://colab.research.google.com/github/simulate111/Climatic_Data/blob/main/Copenhagen_Denmark_meteorological_data_DMI(2015_2024)10minutes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import requests
import pandas as pd
import time

# --- CONFIGURATION ---
START_YEAR = 2015
END_YEAR = 2024
API_KEY = ""  # Paste your DMI API Key here if you encounter 401 errors
BASE_URL = "https://opendataapi.dmi.dk/v2/metObs/collections/observation/items"

# TASKS
tasks = [
    # Station 06180 (Copenhagen Airport)
    {"station": "06180", "param": "temp_dry",   "name": "Temperature_C"},
    {"station": "06180", "param": "wind_speed", "name": "Wind_Speed_ms"},
    # Station 06187 (Copenhagen Toldbod)
    {"station": "06187", "param": "radia_glob_past1h", "name": "Global_Radiation_Wm2"}
]

def fetch_dmi_year(year, task):
    """Fetches one year of data and condenses to 10-Minute averages immediately"""
    start_str = f"{year}-01-01T00:00:00Z"
    end_str = f"{year+1}-01-01T00:00:00Z"

    params = {
        "stationId": task['station'],
        "datetime": f"{start_str}/{end_str}",
        "parameterId": task['param'],
        "limit": 300000,
    }

    headers = {'X-Gravitee-Api-Key': API_KEY} if API_KEY else {}
    print(f"   > Fetching {year}...", end=" ", flush=True)

    try:
        r = requests.get(BASE_URL, params=params, headers=headers)

        # Fallback for Radiation
        if r.status_code != 200 and task['name'] == "Global_Radiation_Wm2":
            params['parameterId'] = 'radia_glob'
            r = requests.get(BASE_URL, params=params, headers=headers)

        if r.status_code == 200:
            data = r.json().get('features', [])
            if not data:
                print("No data.")
                return pd.DataFrame()

            records = [{
                'Time': item['properties']['observed'],
                task['name']: item['properties']['value']
            } for item in data]

            df = pd.DataFrame(records)
            df['Time'] = pd.to_datetime(df['Time'])

            # --- CHANGED TO 10 MINUTES ---
            # '10min' snaps timestamps to 00:00, 00:10, 00:20...
            df = df.set_index('Time').resample('10min').mean()
            print(f"OK ({len(df)} rows)")
            return df
        else:
            print(f"Error {r.status_code}")
            return pd.DataFrame()

    except Exception as e:
        print(f"Failed: {e}")
        return pd.DataFrame()

# --- 1. FETCH DATA ---
all_data = []
print(f"Starting Extraction ({START_YEAR}-{END_YEAR})...")

for task in tasks:
    print(f"\nProcessing {task['name']} (Station {task['station']}):")
    task_dfs = []

    for year in range(START_YEAR, END_YEAR + 1):
        df_year = fetch_dmi_year(year, task)
        if not df_year.empty:
            task_dfs.append(df_year)
        time.sleep(0.2)

    if task_dfs:
        full_task_df = pd.concat(task_dfs)
        # Remove duplicates caused by year overlap
        full_task_df = full_task_df[~full_task_df.index.duplicated(keep='first')]
        all_data.append(full_task_df)

if not all_data:
    print("No data retrieved.")
    exit()

print("\nMerging all parameters...")
df_raw = pd.concat(all_data, axis=1)

# --- 2. CLEANUP ---
mask = (df_raw.index >= pd.Timestamp(f"{START_YEAR}-01-01", tz='UTC')) & \
       (df_raw.index <= pd.Timestamp(f"{END_YEAR}-12-31 23:59:59", tz='UTC'))
df_raw = df_raw.loc[mask]

# Interpolate missing 10-min slots
df_raw = df_raw.interpolate(method='time', limit=6)

if 'Global_Radiation_Wm2' in df_raw.columns:
    df_raw['Global_Radiation_Wm2'] = df_raw['Global_Radiation_Wm2'].fillna(0).clip(lower=0)

# --- 3. CALCULATE 10-YEAR 10-MINUTE AVERAGES ---
print("Calculating 10-year 10-Minute Averages...")

if df_raw.index.tz is not None:
    df_raw.index = df_raw.index.tz_convert(None)

# Group by Month, Day, Hour, AND MINUTE
grouped = df_raw.groupby([
    df_raw.index.month,
    df_raw.index.day,
    df_raw.index.hour,
    df_raw.index.minute
]).mean()

grouped.index.names = ['Month', 'Day', 'Hour', 'Minute']
df_avg = grouped.reset_index()

# Create dummy timestamp for display (using 2024 for Leap Year support)
df_avg['Dummy_Timestamp'] = pd.to_datetime(
    '2024-' + df_avg['Month'].astype(str) + '-' + df_avg['Day'].astype(str) + ' ' +
    df_avg['Hour'].astype(str) + ':' + df_avg['Minute'].astype(str) + ':00',
    errors='coerce'
)
df_avg = df_avg.dropna(subset=['Dummy_Timestamp']).sort_values('Dummy_Timestamp')

# Format Output
df_avg['Display_Date'] = df_avg['Dummy_Timestamp'].dt.strftime('%m-%d')
df_avg['Display_Time'] = df_avg['Dummy_Timestamp'].dt.strftime('%H:%M')

final_output = df_avg[['Display_Date', 'Display_Time', 'Temperature_C', 'Wind_Speed_ms', 'Global_Radiation_Wm2']]

print(f"Success! Created {len(final_output)} rows (10-min resolution).")
print(final_output.head())
final_output.to_csv("Copenhagen_10Year_10Min_Average_2015-2024.csv", index=False)

Starting Extraction (2015-2024)...

Processing Temperature_C (Station 06180):
   > Fetching 2015... OK (52561 rows)
   > Fetching 2016... OK (52705 rows)
   > Fetching 2017... OK (52561 rows)
   > Fetching 2018... OK (52561 rows)
   > Fetching 2019... OK (52561 rows)
   > Fetching 2020... OK (52705 rows)
   > Fetching 2021... OK (52561 rows)
   > Fetching 2022... OK (52561 rows)
   > Fetching 2023... OK (52561 rows)
   > Fetching 2024... OK (52704 rows)

Processing Wind_Speed_ms (Station 06180):
   > Fetching 2015... OK (52561 rows)
   > Fetching 2016... OK (52705 rows)
   > Fetching 2017... OK (52561 rows)
   > Fetching 2018... OK (52561 rows)
   > Fetching 2019... OK (52561 rows)
   > Fetching 2020... OK (52705 rows)
   > Fetching 2021... OK (52561 rows)
   > Fetching 2022... OK (52561 rows)
   > Fetching 2023... OK (52561 rows)
   > Fetching 2024... OK (52704 rows)

Processing Global_Radiation_Wm2 (Station 06187):
   > Fetching 2015... OK (52561 rows)
   > Fetching 2016... OK (52705