In [None]:
import xarray as xr
import numpy as np
import glob
import os
import pandas as pd
import matplotlib.pyplot as plt



In [None]:
# Define the bounding box for Germany
LAT_MIN, LAT_MAX = 47, 55  # Latitude range for Germany
LON_MIN, LON_MAX = 5, 15   # Longitude range for Germany

def load_and_filter_precipitation(nc_files):
    """Loads precipitation data from NetCDF files and filters for Germany."""
    all_data = []
    
    for nc_file in nc_files:
        print(f"Processing: {nc_file}")  # Debug print
        ds = xr.open_dataset(nc_file)
        
        # Print dataset dimensions for debugging
        print(f"Dataset dimensions: {ds.dims}")
        print(f"Dataset variables: {list(ds.variables)}")
        
        # Ensure the dataset contains the 'precip' variable
        if 'precip' not in ds:
            print(f"Warning: 'precip' variable not found in {nc_file}")
            continue
        
        precip = ds['precip']
        
        # Adjusting for correct coordinate names
        lat_name = "lat" if "lat" in ds.dims else "latitude"
        lon_name = "lon" if "lon" in ds.dims else "longitude"
        
        # Filter for Germany's latitude & longitude
        precip_germany = precip.sel(**{
            lat_name: slice(LAT_MAX, LAT_MIN),
            lon_name: slice(LON_MIN, LON_MAX)
        })
        
        # Convert to DataFrame and store
        df = precip_germany.to_dataframe().reset_index()
        if df.empty:
            print(f"Warning: No data for Germany in {nc_file}")
        else:
            all_data.append(df)
    
    return all_data

In [None]:
DATA_FOLDER = "unzipped_nc_files"  # Folder where .nc files are stored


nc_files = glob.glob(os.path.join(DATA_FOLDER, "*.nc"))
if not nc_files:
    print(f"Error: No NetCDF files found in {DATA_FOLDER}")
else:
    print(f"Found {len(nc_files)} NetCDF files.")

# Step 2: Load & filter precipitation data for Germany
data_frames = load_and_filter_precipitation(nc_files)

In [None]:
df_all = pd.concat(data_frames, ignore_index=True)
df_all_cleaned = df_all.dropna(subset=['precip'])
# Ensure 'time' is in datetime format
df_all_cleaned['time'] = pd.to_datetime(df_all_cleaned['time'])

# Calculate the mean precipitation for each month
df_monthly_mean = df_all_cleaned.groupby(df_all_cleaned['time'].dt.to_period('M'))['precip'].mean().reset_index()

# Convert 'time' back to string for clarity
df_monthly_mean['time'] = df_monthly_mean['time'].astype(str)



In [None]:
# Define a function to determine the season based on the month
def get_season(month):
    if month in [12, 1, 2]:
        return "Winter"
    elif month in [3, 4, 5]:
        return "Spring"
    elif month in [6, 7, 8]:
        return "Summer"
    else:
        return "Autumn"

# Ensure 'time' is in datetime format
df_all_cleaned['time'] = pd.to_datetime(df_all_cleaned['time'])

# Extract the year and season
df_all_cleaned['year'] = df_all_cleaned['time'].dt.year
df_all_cleaned['season'] = df_all_cleaned['time'].dt.month.map(get_season)

# Group by year and season, then calculate the mean precipitation
df_seasonal_yearly_mean = df_all_cleaned.groupby(['year', 'season'])['precip'].mean().reset_index()
df_seasonal_max = df_all_cleaned.groupby(['year', 'season'])['precip'].max().reset_index()
df_seasonal_min = df_all_cleaned.groupby(['year', 'season'])['precip'].min().reset_index()

# Display the result


In [None]:
# Plot Winter and Summer Seasonal Yearly Mean Precipitation
df_winter = df_seasonal_yearly_mean[df_seasonal_yearly_mean["season"] == "Winter"]
df_summer = df_seasonal_yearly_mean[df_seasonal_yearly_mean["season"] == "Summer"]

plt.figure(figsize=(12, 6))
plt.plot(df_winter["year"], df_winter["precip"], marker="o", linestyle="-", label="Winter", color="b")
plt.plot(df_summer["year"], df_summer["precip"], marker="o", linestyle="-", label="Summer", color="r")
plt.xlabel("Year")
plt.ylabel("Precipitation")
plt.title("Winter and Summer Seasonal Yearly Mean Precipitation")
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Ensure 'time' is in datetime format
df_all_cleaned['time'] = pd.to_datetime(df_all_cleaned['time'])

# Extract the year
df_all_cleaned['year'] = df_all_cleaned['time'].dt.year

# Group by year and calculate the mean precipitation
df_yearly_mean = df_all_cleaned.groupby('year')['precip'].mean().reset_index()
df_yearly_max = df_seasonal_yearly_mean.groupby('year')['precip'].max().reset_index()
df_yearly_min = df_seasonal_yearly_mean.groupby('year')['precip'].min().reset_index()


# Display the result


In [None]:
import matplotlib.pyplot as plt

# Plot Yearly Mean, Max, and Min Precipitation
plt.figure(figsize=(12, 6))

# Plot Mean
plt.plot(df_yearly_mean['year'], df_yearly_mean['precip'], marker='o', linestyle='-', color='b', label="Mean")

# Plot Max
plt.plot(df_yearly_max['year'], df_yearly_max['precip'], marker='o', linestyle='-', color='r', label="Max")

# Plot Min
plt.plot(df_yearly_min['year'], df_yearly_min['precip'], marker='o', linestyle='-', color='g', label="Min")

plt.xlabel("Year")
plt.ylabel("Precipitation")
plt.title("Yearly Mean, Max, and Min Precipitation")
plt.legend()
plt.grid(True)
plt.show()
