<a href="https://colab.research.google.com/github/simulate111/General/blob/main/Turku_ERA5_CDS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
! pip install "cdsapi>=0.7.7"



In [2]:
!pip install netcdf4



In [3]:
import os

# Updated configuration: removed the UID prefix as per the error instructions
content = """url: https://cds.climate.copernicus.eu/api
key: c025f203-5930-4d9c-acd6-699c46be7fd8"""

with open(os.path.expanduser('~/.cdsapirc'), 'w') as f:
    f.write(content)

print("Configuration updated! Now attempting to update the library...")

# Also update your library to the latest version to match the new API
!pip install --upgrade cdsapi

Configuration updated! Now attempting to update the library...


In [None]:
import cdsapi
import os
import calendar

# CRITICAL FIX: Set progress=False to prevent widget metadata creation
c = cdsapi.Client(progress=False)

# Range: Full year 2024 + January 2025
tasks = [('2024', str(m).zfill(2)) for m in range(1, 13)] + [('2025', '01')]

print("--- Data Download Started (GitHub-Safe Mode) ---")

for i, (year, month) in enumerate(tasks):
    filename = f'turku_{year}_{month}.nc'

    if os.path.exists(filename):
        print(f"[{i+1}/13] {filename} already exists. Skipping.")
        continue

    # Determine days: Full month for 2024, but only Jan 1st for 2025
    if year == '2025':
        days = ['01']
    else:
        last_day = calendar.monthrange(int(year), int(month))[1]
        days = [str(d).zfill(2) for d in range(1, last_day + 1)]

    print(f"[{i+1}/13] Requesting {month}/{year}...")

    try:
        c.retrieve(
            'reanalysis-era5-land',
            {
                'variable': [
                    '2m_temperature',
                    '10m_u_component_of_wind',
                    '10m_v_component_of_wind',
                    'surface_solar_radiation_downwards',
                ],
                'year': year,
                'month': month,
                'day': days,
                'time': [f"{str(h).zfill(2)}:00" for h in range(24)],
                'area': [60.5, 22.5, 60.5, 22.5],
                'format': 'netcdf',
            },
            filename)
        # We print a success message that will stay in your GitHub logs
        print(f"      ‚úÖ {filename} successfully downloaded and saved.")
    except Exception as e:
        print(f"      ‚ùå Failed {year}-{month}: {e}")

print("--- All Downloads Complete ---")

2025-12-20 05:48:21,144 INFO [2025-12-03T00:00:00Z] To improve our C3S service, we need to hear from you! Please complete this very short [survey](https://confluence.ecmwf.int/x/E7uBEQ/). Thank you.
INFO:ecmwf.datastores.legacy_client:[2025-12-03T00:00:00Z] To improve our C3S service, we need to hear from you! Please complete this very short [survey](https://confluence.ecmwf.int/x/E7uBEQ/). Thank you.


--- Data Download Started (GitHub-Safe Mode) ---
[1/13] Requesting 01/2024...


2025-12-20 05:48:21,585 INFO [2025-12-11T00:00:00] Please note that a dedicated catalogue entry for this dataset, post-processed and stored in Analysis Ready Cloud Optimized (ARCO) format (Zarr), is available for optimised time-series retrievals (i.e. for retrieving data from selected variables for a single point over an extended period of time in an efficient way). You can discover it [here](https://cds.climate.copernicus.eu/datasets/reanalysis-era5-land-timeseries?tab=overview)
INFO:ecmwf.datastores.legacy_client:[2025-12-11T00:00:00] Please note that a dedicated catalogue entry for this dataset, post-processed and stored in Analysis Ready Cloud Optimized (ARCO) format (Zarr), is available for optimised time-series retrievals (i.e. for retrieving data from selected variables for a single point over an extended period of time in an efficient way). You can discover it [here](https://cds.climate.copernicus.eu/datasets/reanalysis-era5-land-timeseries?tab=overview)
2025-12-20 05:48:21,587

In [None]:
!pip install netcdf4 h5netcdf

In [None]:
import xarray as xr
import pandas as pd
import numpy as np
import glob
import zipfile
import os

# 1. Setup paths
file_list = sorted(glob.glob('turku_*.nc'))
extract_dir = 'extracted_temp'
os.makedirs(extract_dir, exist_ok=True)
all_data = []

print(f"Checking {len(file_list)} files for ZIP compression...")

for f in file_list:
    try:
        # Check if the file is actually a ZIP (first two bytes are 'PK')
        with open(f, 'rb') as test_f:
            is_zip = test_f.read(2) == b'PK'

        if is_zip:
            print(f"üì¶ {f} is a ZIP archive. Extracting...")
            with zipfile.ZipFile(f, 'r') as zip_ref:
                zip_ref.extractall(extract_dir)

            # Find the actual .nc file that was inside the ZIP
            inner_files = glob.glob(os.path.join(extract_dir, "*.nc"))
            if inner_files:
                # Process the extracted file
                ds = xr.open_dataset(inner_files[0], engine='netcdf4')
                df = ds.to_dataframe().reset_index()
                all_data.append(df)
                print(f"‚úÖ Successfully read data from inside {f}")

                # Clean up extracted file to save space
                for extra in inner_files:
                    os.remove(extra)
        else:
            # It's a normal NetCDF file
            ds = xr.open_dataset(f, engine='netcdf4')
            df = ds.to_dataframe().reset_index()
            all_data.append(df)
            print(f"‚úÖ Successfully read normal NetCDF: {f}")

    except Exception as e:
        print(f"‚ùå Error processing {f}: {e}")

# 2. Final Merge and Conversion
if all_data:
    df_combined = pd.concat(all_data).sort_values('valid_time')

    # Conversions
    df_combined['Air_Temp_C'] = df_combined['t2m'] - 273.15
    df_combined['Wind_Speed_ms'] = np.sqrt(df_combined['u10']**2 + df_combined['v10']**2)
    df_combined['GHI_Wm2'] = df_combined['ssrd'] / 3600

    # Clean up and Export
    final_output = df_combined[['valid_time', 'Air_Temp_C', 'Wind_Speed_ms', 'GHI_Wm2']]
    final_output.columns = ['Timestamp', 'Air_Temp_C', 'Wind_Speed_ms', 'GHI_W_m2']

    final_output.to_csv('Turku_Final_Weather_2024.csv', index=False)

    print("\n" + "="*30)
    print("SUCCESS! Final CSV created.")
    print(f"Total rows: {len(final_output)}")
    print("="*30)
    print(final_output.head())
else:
    print("üõë No data frames were created. Check if files are 0 KB.")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import json

# 1. Get the name of your current notebook
# If you are in Colab, it is usually 'Untitled.ipynb' unless you renamed it
notebook_path = 'your_notebook_name.ipynb'

try:
    with open(notebook_path, 'r', encoding='utf-8') as f:
        nb_data = json.load(f)

    # 2. Check if the 'widgets' key exists in metadata and remove it
    if 'widgets' in nb_data.get('metadata', {}):
        del nb_data['metadata']['widgets']
        print("‚úÖ Success: Broken widget metadata removed.")

        # 3. Save the notebook back to disk
        with open(notebook_path, 'w', encoding='utf-8') as f:
            json.dump(nb_data, f, indent=1)
        print("‚ú® Notebook is now GitHub-safe. You can now push/upload it.")
    else:
        print("‚ÑπÔ∏è No widget metadata found. The error might be in the file name or a specific cell's output.")

except FileNotFoundError:
    print(f"‚ùå Error: Could not find '{notebook_path}'. Please check the filename in the sidebar.")