In [27]:
import pandas as pd
import numpy as np
import xarray as xr
import cdsapi 

import glob
import sys
import os
import datetime

import matplotlib.path as mpath
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import cartopy.feature as cfeature
import geopandas as gpd
from shapely.geometry import mapping



In [50]:
# check the area

import folium

# Define the coordinates for the corners of the rectangle

#[90, 180, 49, -180]

top_left = (46.5, -72.7)
bottom_right = (46.2, -72.2)


# Create a map centered at the average location
m = folium.Map(location=[(top_left[0] + bottom_right[0]) / 2, (top_left[1] + bottom_right[1]) / 2], zoom_start=6)

# Add the rectangle to the map
folium.Rectangle(
    bounds=[top_left, bottom_right],
    color='red',
    fill=True,
    fill_color='red',
    fill_opacity=0.2
).add_to(m)

# Display the map
m

In [3]:
northeast_atlantic = [60, -40, 0, 0]
target = '../data/global_temperatures/era5/era5_global_albedo.nc'




In [52]:
target = '../data/global_temperatures/era5/becancour.nc'
dataset = "reanalysis-era5-single-levels"
request = {
    'product_type': ['reanalysis'],
    'variable': ['2m_dewpoint_temperature', '2m_temperature'],
    'year': ['2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023', '2024'],
    'month': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'],
    'day': ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13',
             '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26',
               '27', '28', '29', '30', '31'],
    'time': ['00:00'],
    'data_format': 'netcdf',
    'download_format': 'unarchived',
    'area': [46.5, -72.7, 46.2, -72.2]
}

client = cdsapi.Client()
client.retrieve(dataset, request, target)

2024-08-28 10:03:58,333 INFO Request ID is 1fde0546-a6d0-46cf-8217-10125bb26c7e
2024-08-28 10:03:58,487 INFO status has been updated to accepted


KeyboardInterrupt: 

In [4]:
ne_atlantic_sst = xr.open_dataset(target)
sst = ne_atlantic_sst['sst']
sst

Could not load the ecCodes library!


In [29]:
sst_degc = sst - 273.15
sst_degc = sst_degc.assign_attrs(sst.attrs)
sst_degc.attrs['units'] = '° C'
sst_degc

In [6]:
# yearly_mean = sst_degc.groupby('valid_time.year').mean(keep_attrs=True)
# ref = yearly_mean.where((yearly_mean.year > 1990) & (yearly_mean.year < 2021), drop=True)
# ref_mean = ref.mean(dim="year", keep_attrs=True)

In [179]:
weights = np.cos(np.deg2rad(sst_degc.latitude))
weights.name = "weights"
sst_degc_weighted = sst_degc.weighted(weights)
sst_mean = sst_degc_weighted.mean(["longitude", "latitude"])

In [184]:

def smooth_daily_values(data):
    # Ensure the data is a 1D array
    data = np.asarray(data)
    
    # Compute the FFT of the data
    fft_coeffs = np.fft.fft(data)
    
    # Truncate the Fourier coefficients to keep only the first eight wavenumbers
    fft_coeffs[8:] = 0
    
    # Compute the inverse FFT to get the smoothed data
    smoothed_data = np.fft.ifft(fft_coeffs).real
    
    return smoothed_data

# Step 1: Calculate mean monthly values for the reference period (1990-2021)
reference_period = sst_mean.sel(valid_time=slice('1990-01-01', '2021-12-31'))
reference_period['month_day'] = reference_period['valid_time'].dt.strftime('%m-%d')
daily_means = reference_period.groupby('month_day').mean()

smoothed_daily_values = xr.apply_ufunc(
    smooth_daily_values,
    daily_means,
    input_core_dims=[['month_day']],
    output_core_dims=[['month_day']],
    vectorize=True
)

sst_mean['month'] = sst_mean['valid_time'].dt.month
sst_mean['month_day'] = sst_mean['valid_time'].dt.strftime('%m-%d')
aligned_smoothed_values = smoothed_daily_values.sel(month_day=sst_mean['month_day'])

offset_dict = {
    '01': 0.96, '02': 0.96, '03': 0.95, '04': 0.91, '05': 0.87, '06': 0.83, 
    '07': 0.80, '08': 0.80, '09': 0.81, '10': 0.85, '11': 0.89, '12': 0.93
}

# Create an array of offsets corresponding to each month
offsets = xr.DataArray(
    [offset_dict[str(month).zfill(2)] for month in sst_mean['month'].values],
    dims='valid_time'
)

sst_anom = sst_mean - aligned_smoothed_values
sst_anom_adjusted = sst_anom + offsets
sst_anom_normalized = sst_anom_adjusted.groupby('valid_time.dayofyear') - sst_anom.groupby('valid_time.dayofyear').mean()

In [185]:
sst_anom_normalized.name = 'sst'
sst_anom_df = sst_anom_normalized.to_dataframe().reset_index()
sst_anom_df

Unnamed: 0,valid_time,number,expver,month,month_day,dayofyear,sst
0,1979-01-01,0,0001,1,01-01,1,0.649379
1,1979-01-02,0,0001,1,01-02,2,0.596371
2,1979-01-03,0,0001,1,01-03,3,0.567981
3,1979-01-04,0,0001,1,01-04,4,0.567181
4,1979-01-05,0,0001,1,01-05,5,0.588516
...,...,...,...,...,...,...,...
16651,2024-08-03,0,0005,8,08-03,216,1.590116
16652,2024-08-04,0,0005,8,08-04,217,1.624340
16653,2024-08-05,0,0005,8,08-05,218,1.638725
16654,2024-08-06,0,0005,8,08-06,219,1.643109


In [186]:
sst_anom_clean = sst_anom_df.copy()
sst_anom_clean['date'] = pd.to_datetime(sst_anom_clean['valid_time'])
sst_anom_clean['year'] = sst_anom_clean['date'].dt.year
#sst_anom_clean['month_day'] = sst_anom_clean['date'].dt.strftime('%m-%d')
sst_anom_clean = sst_anom_clean[['month_day', 'year', 'sst']]
sst_anom_clean


Unnamed: 0,month_day,year,sst
0,01-01,1979,0.649379
1,01-02,1979,0.596371
2,01-03,1979,0.567981
3,01-04,1979,0.567181
4,01-05,1979,0.588516
...,...,...,...
16651,08-03,2024,1.590116
16652,08-04,2024,1.624340
16653,08-05,2024,1.638725
16654,08-06,2024,1.643109


In [187]:
pivot_df = sst_anom_clean.pivot(index='month_day', columns='year', values='sst')

year_strings = [str(year) for year in range(1979, 2025)]

pivot_df.columns = year_strings
pivot_df = pivot_df.reset_index()
pivot_df.to_csv('../data/damien_slides_data/era5_northeast_atlantic_sst_anom.csv', index=False)

In [1]:
import xarray as xr
import numpy as np
import os

big_file = '../data/global_temperatures/era5/temp_era5_global_coords_t2m_20240814.nc'
small_file = '../data/global_temperatures/era5/era5_global_coords_t2m_20240813.nc'
variable = 't2m'

# Open the dataset with chunking
#data = xr.open_dataset(big_file).load()

data = xr.open_dataset(big_file, chunks={'time': 1, 'latitude': 100, 'longitude': 100})
temp = data[variable]

# Calculate weights
weights = np.cos(np.deg2rad(temp.latitude))
weights.name = "weights"

# Apply weights and calculate the weighted mean
temp_weighted = temp.weighted(weights)
temp_mean = temp_weighted.mean(["longitude", "latitude"])

# Save the reduced dataset
temp_mean.to_netcdf(small_file)
os.remove(big_file)  # Remove the original file

Could not load the ecCodes library!


In [None]:
#MDK I figured it out. Need to build the above weighting code into the scraper script, 
# and then remove all monthly preprocessing from preprocessor

In [46]:
coordinates_dict = {
    "global_coords": [90, 180, -90, -180],
    "non_polar_seas": [60, 180, -60, -180],
    "northeast_atlantic": [60, -40, 0, 0],
    'ontario': [57, -79, 42, -96],
    'canada': [70, -141, 43, -51],
    'northern_latitudes': [90, 180, 49, -180],

}

variables_dict = {
    "sst": "sea_surface_temperature",
    "t2m": "2m_temperature",
    "forecast_albedo": "forecast_albedo"
}



In [35]:
ontario = [57, -79, 42, -96]

def get_shape_ca(shapefile, province_name=False):
    shapefile = shapefile.to_crs("EPSG:4326")
    if province_name:
        shape = shapefile[shapefile.PRENAME == province_name].geometry
    else:
        shape = gpd.GeoDataFrame(geometry=[shapefile.geometry.unary_union], crs="EPSG:4326")
    return shape

def mask_xr_dataset(xr_data, shape):
    xr_data = xr_data.assign_coords(longitude=(((xr_data.longitude + 180) % 360) - 180)).sortby('longitude')
    xr_data.rio.write_crs("EPSG:4326", inplace=True)
    xr_data_masked = xr_data.rio.clip(shape.geometry.apply(mapping), shape.crs)
    return xr_data_masked

ca_shapefile = gpd.read_file('../data/shapefiles/canada/lpr_000b16a_e.shp')



In [47]:
def pull_era5_daily(variable,  start_date, end_date, out_dir, lat_lon_dimensions, existing_file = None):
    print('Pulling data from CDS API...')
    dataset = "reanalysis-era5-single-levels"
    request = {
        'product_type': ['reanalysis'],
        'variable': variables_dict[variable],
        'year': [str(i) for i in range(start_date.year, end_date.year + 1)],
        'month': [f'{i:02d}' for i in range(1, 13)],
        'day': [f'{i:02d}' for i in range(1, 32)],
        'time': ['00:00'],
        'data_format': 'netcdf',
        'download_format': 'unarchived',
        'area': coordinates_dict[lat_lon_dimensions]
    }
    client = cdsapi.Client()
    small_file = f"era5_{lat_lon_dimensions}_{variable}_{end_date.strftime('%Y%m%d')}.nc"
    big_file = f'{out_dir}/temp_{small_file}'
    client.retrieve(dataset, request, big_file)
    
    with xr.open_dataset(big_file, chunks={'time': 1, 'latitude': 100, 'longitude': 100}) as data:
        #shape = get_shape_ca(ca_shapefile)
        #province_data_era5 = mask_xr_dataset(data,shape)
        temp = data[variable]
        weights = np.cos(np.deg2rad(temp.latitude))
        weights.name = "weights"
        temp_weighted = temp.weighted(weights)
        temp_mean = temp_weighted.mean(["longitude", "latitude"])
        temp_mean_ds = temp_mean.to_dataset(name=variable)
    
    if existing_file:
        combined_data = xr.concat([existing_file, temp_mean_ds], dim='valid_time')
        combined_data = combined_data.sortby('valid_time').drop_duplicates('valid_time', keep='last')
    else:
        combined_data = temp_mean_ds
    outfile = os.path.join(out_dir, small_file)
    combined_data.to_netcdf(outfile)
    print(f"Data written to {outfile}.\n")
    os.remove(big_file)

In [48]:
pull_era5_daily('t2m', datetime.datetime(1979, 1, 1), datetime.datetime.today(), '../data/global_temperatures/era5/', lat_lon_dimensions='northern_latitudes' , existing_file=None)

Pulling data from CDS API...


2024-08-27 09:41:18,003 INFO Request ID is 03d7b4fd-b59f-4d4d-946f-4fd0babd10c7
2024-08-27 09:41:18,147 INFO status has been updated to accepted
2024-08-27 09:49:37,978 INFO status has been updated to running
2024-08-27 10:39:48,961 INFO Creating download object as as_source with files:
['data_stream-oper.nc']
2024-08-27 11:05:54,860 INFO status has been updated to successful


f631cc732f719d25db99bdf807f79a0.nc:   0%|          | 0.00/5.64G [00:00<?, ?B/s]



Data written to ../data/global_temperatures/era5/era5_northern_latitudes_t2m_20240827.nc.



In [None]:
pull_era5_daily('t2m', datetime.datetime(1979, 1, 1), datetime.datetime.today(), '../data/global_temperatures/era5/', lat_lon_dimensions=None, existing_file=None)