<a href="https://colab.research.google.com/github/simulate111/General/blob/main/T%C3%B3rshavn_Faroe_Islands.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import requests
import pandas as pd
import time

# --- CONFIGURATION ---
base_url = "https://opendataapi.dmi.dk/v2/metObs/collections/observation/items"

# TRICK: Start fetching from Dec 31st to ensure we catch Jan 1st 00:00
start_date_fetch = "2023-12-31T12:00:00Z"
end_date = "2025-01-01T00:00:00Z"
limit = 300000

# Target Station: TÃ³rshavn (06011) or Vagar (06010)
station_id = "06011"

tasks = [
    {"param": "temp_dry",   "name": "Temperature_C"},
    {"param": "wind_speed", "name": "Wind_Speed_ms"},
    {"param": "radia_glob", "name": "Global_Radiation_Wm2"}
]

dfs = []
print(f"Fetching data... (Starting from {start_date_fetch} to catch midnight data)")

for task in tasks:
    params = {
        "stationId": station_id,
        "datetime": f"{start_date_fetch}/{end_date}",
        "parameterId": task['param'],
        "limit": limit
    }

    try:
        r = requests.get(base_url, params=params)

        # Fallback for Radiation
        if r.status_code != 200 and "radia" in task['param']:
             params['parameterId'] = 'radia_glob_past1h'
             r = requests.get(base_url, params=params)

        if r.status_code == 200:
            data = r.json().get('features', [])
            records = [{
                'Time': item['properties']['observed'],
                task['name']: item['properties']['value']
            } for item in data]

            df_temp = pd.DataFrame(records)
            if not df_temp.empty:
                # Ensure UTC
                df_temp['Time'] = pd.to_datetime(df_temp['Time']).dt.tz_convert(None) # Remove TZ info to keep it pure UTC

                # Resample to Hourly
                df_temp = df_temp.set_index('Time').resample('h').mean()
                dfs.append(df_temp)
                print(f" > Found {len(df_temp)} records for {task['name']}")
    except Exception as e:
        print(f" > Error: {e}")
    time.sleep(0.5)

if dfs:
    # Merge
    df_final = pd.concat(dfs, axis=1).sort_index()

    # Fill Gaps
    df_final = df_final.interpolate(method='linear')

    # --- FILTER TO EXACTLY 2024 ---
    # Now that we fetched early data, we clip it strictly to 2024-01-01 00:00
    df_final = df_final[(df_final.index >= "2024-01-01 00:00:00") & (df_final.index < "2025-01-01 00:00:00")]

    # Format
    df_final = df_final.reset_index()
    df_final['Date'] = df_final['Time'].dt.strftime('%Y-%m-%d')
    df_final['Hour'] = df_final['Time'].dt.strftime('%H:%M')

    # Output
    cols = ['Date', 'Hour', 'Temperature_C', 'Wind_Speed_ms', 'Global_Radiation_Wm2']
    df_final = df_final[[c for c in cols if c in df_final.columns]]

    print("-" * 30)
    print(df_final.head())
    print("-" * 30)

    filename = "Torshavn_Corrected_Midnight_Start.csv"
    df_final.to_csv(filename, index=False)
    print(f"Saved to {filename}")
else:
    print("No data found.")

Fetching data... (Starting from 2023-12-31T12:00:00Z to catch midnight data)
 > Found 4866 records for Temperature_C
 > Found 4866 records for Wind_Speed_ms
------------------------------
         Date   Hour  Temperature_C  Wind_Speed_ms
0  2024-06-12  07:00       9.500000       3.100000
1  2024-06-12  08:00       9.916667       3.916667
2  2024-06-12  09:00      10.233333       4.766667
3  2024-06-12  10:00      11.166667       4.750000
4  2024-06-12  11:00      11.983333       3.050000
------------------------------
Saved to Torshavn_Corrected_Midnight_Start.csv
