In [None]:
### First we have downloaded a net CDF from the Copernicus Data Store: https://cds.climate.copernicus.eu

In [None]:
file = '/Users/gbenz/Downloads/tx10pETCCDI_mon_HadGEM3-GC31-LL_historical_r1i1p1f3_b1981-2010_v20190624_185001-201412_v2-0.nc'

In [None]:
import pandas as pd
import xarray as xr
from rasterstats import zonal_stats
import numpy as np

from utils.unzip import unzip_etccdi_package
from utils.correct_longitude import transform_longitudinal_values
from utils.give_metadata import give_metadata
from utils.etccdi_to_pg import generate_etccdi_temporal_tables
from utils.temporal_index import find_etccdi_timeindex
from utils.define_request import generate_and_validate_request

## Access with Copernicus Data Store API:

#### Objective by Oct. 30 is to have this process begin by retreiving (predefined and approved) ETCCDI data parameters from an API

- Works on Mon 28 GB

The proceeding code provides a correct output but requires a unique user API key. This cannot be simplified much further.

The desirable output is to exclusively change:
- 'variable' 
- product_type
- period
#### ------------------
- start_year = '1995'
- start_month = '01'
- end_year = '2000'
- end_month = '12'

Then if you select 'cold days' a decision tree will be printed with the optional parameters that could be selected for product type and period
Other parameters will be kept standard. 

In [None]:


# Now, calling the function will generate and validate the request
request = generate_and_validate_request(
    variable="consecutive_wet_days",
    product_type="base_independent",
    experiment="historical",
    temporal_aggregation="yearly"
)

display(request)


In [None]:
import cdsapi

dataset = "sis-extreme-indices-cmip6"

# Extract the desired elements from the request dictionary
variable = request["variable"][0]
temporal_aggregation = request["temporal_aggregation"][0]
period = request["period"][0]

# Concatenate them with an underscore or any other separator you prefer
zip_file_name = f"{variable}_{temporal_aggregation}_{period}.zip"

client = cdsapi.Client()
client.retrieve(dataset, request, target=zip_file_name)


In [None]:
netcdf_file, etccdi_index = unzip_etccdi_package(zip_file_name)

### Report Metadata from the selected ETTCDI netCDF file:

Move the pg shapefile to the github repo so this can be accessed without references to local paths

Accomplishes:
- checks to ensure the correct netcdf file is being processed
- provides spatial and temporal metadata

From preprocessing, we know that the ETCCDI climate data is not packaged in a desirable format, that is, the original longitudinal range is: 0.9375 to 359.0625
- Adjust the Longitude range 
- save an 'adjusted netcdf' file.


28-10 -- What would perhaps be most desirable is to first transform, then, report metadata with two seperate functions.

In [None]:
etccdi = transform_longitudinal_values(etccdi_index, netcdf_file)

In [None]:
give_metadata(etccdi)

In [None]:
#-----------------------------------------------------------
# Define Start Year & Month
#-----------------------------------------------------------
start_year = '1990'
start_month = '01'
#-----------------------------------------------------------
# Define End Year & Month
end_year = '2014'
end_month = '12'
#-----------------------------------------------------------
#-----------------------------------------------------------
# Establish Start and End index values:
start_index_val =  find_etccdi_timeindex(start_year, start_month, etccdi)
end_index_val = find_etccdi_timeindex(end_year, end_month, etccdi)
#-----------------------------------------------------------
print(f'The start index is: {start_index_val}')
print(f'The end index is: {end_index_val}')
#-----------------------------------------------------------
index_list = list(range(start_index_val, end_index_val + 1))
#-----------------------------------------------------------

### Testing on the first n elements:

In [None]:
sub_index = index_list[:2]

time_length_subset = len(sub_index)
time_length = len(index_list)

print(time_length_subset)
print(time_length)


### Puts it all together

Parameters:
1. references the sub_index which supplies the list (or sublist) of indexes to iterate over. Index specifically references time. This can be confusing because the ETCCDI variables are themselves climate indices.
2. Creates a single geotiff from the current time selection. We do this because the NetCDF itself is not a format that can be incorporated into rigorous analysis so as we iterate through the time series we convert the working item to a geotiff which is a format that can be operated on.


#### Parameters:

1. NetCDF file
2. (TEMPORAL) sub_index or full index (specify index to loop over)
3. etccdi index ex(tx10pETCCDI)


ADF -- Decision to just save to ONE WORKING raster that will continously be rewritten 
rationale: The purpose of having unique tifs is to visualize holes in the data. However, this is not worth the space. If holes appear in the tabular dataset, a new geotiff corresponding to that month / year can quickly be produced!

ADF -- Rationalize why this is best:
    # Resample the raster data to the new resolution
    resampled_raster = raster_data.rio.reproject(
        raster_data.rio.crs,
        shape=(
            int(raster_data.shape[1] * 10),  # Increase number of rows by a factor of 10
            int(raster_data.shape[2] * 10)   # Increase number of columns by a factor of 10
        ),
        resampling=Resampling.bilinear  # Use the correct resampling method
    )


### Params:

- time_index_list,
- netcdf, climate_index, 
- shapefile_path


In [None]:
generate_etccdi_temporal_tables(sub_index, etccdi, etccdi_index, '/Users/gbenz/Downloads/pg_extent/pgm_viewser_extent.shp')

### Review:

#### Validate completeness of the output index at PG resolution:

Temporally: check that all total number of time periods match
- Spatially: Full extent of PG (for each temporal unit!)

How to do this:

1. Load the 'compiled' etccdi index .csv
2. check for null values

Total length should be: X

check length of temporal units should be: X (dependent on input parameters)
check length of spatial units should be: X

3. Plot the data

### What we need:

A complete 'clean' dataframe to reference from VIEWSER! 11.11


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

def plot_monthly_average_tx10pETCCDI(dataframe):
    # Group by 'date' and calculate the average 'tx10pETCCDI' for each date
    monthly_avg = dataframe.groupby('date')['tx10pETCCDI'].mean().reset_index()
    
    # Plot the data
    plt.figure(figsize=(12, 6))
    plt.plot(monthly_avg['date'], monthly_avg['tx10pETCCDI'], marker='o', linestyle='-')
    plt.title('Average tx10pETCCDI by Year-Month')
    plt.xlabel('Date')
    plt.ylabel('Average tx10pETCCDI')
    plt.grid(True)
    plt.show()

# Assuming your DataFrame is named df


In [None]:
import geopandas as gpd

# Specify the path to your shapefile or other spatial data file
file_path = '/Users/gbenz/Downloads/pg_extent/pgm_viewser_extent.shp'

# Load the data into a GeoDataFrame
gdf = gpd.read_file(file_path)

spatial_extent = len(pd.unique(gdf['gid']))

In [None]:
validate_etccdi = pd.read_csv('/Users/gbenz/Documents/Climate Data/climate_extremes/etccdi_out_files/tx10pETCCDI_2015_01__2015_02.csv')
validate_etccdi['date'] = validate_etccdi['year'].astype(str) + '-' + validate_etccdi['month'].astype(str).str.zfill(2)

etccdi_time_length = len(pd.unique(validate_etccdi['date']))
etccdi_spatial_length = len(pd.unique(validate_etccdi['gid']))

#-------------------------------------------------------------------------------------------------------------------------------------------------
# Report null values:

# Specify the columns to check for null values
columns_to_check = ['gid', 'year', 'month', 'date', 'tx10pETCCDI']
print('This prints a summary of Null values:')
# Count null values in the specified columns
null_counts = validate_etccdi[columns_to_check].isnull().sum()
#-------------------------------------------------------------------------------------------------------------------------------------------------
print()
display(null_counts)

print()
print('Summary of temporal and spatial units:')
print()
print(f'This dataset expects to see {time_length} and found {etccdi_time_length}')
print(f'This dataset expects to see {spatial_extent} and found {etccdi_spatial_length}')
print()
print('Average over time to expose any temporal gaps')
#plot_monthly_average_tx10pETCCDI(validate_etccdi)

### Resample to Fine (Granular) Pixel 

Parameters to consider: 
1. What is an appropriate resolution
2. What is the most appropriate resampling method
