# Fancy Variables

In [1]:
# Adjust Notebook Display
from IPython.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

In [2]:
# Import Libraries
import geopandas
import geopandas as gpd
from sklearn.neighbors import BallTree
import numpy as np
from shapely.geometry import Point
from scipy.spatial import cKDTree
import pandas as pd
from shapely.ops import nearest_points
from shapely.geometry import Point
import pandas as pd
import math
import ast
import zipfile
import os

## Neighbours

In [3]:
# Load Shape Files Data
# source: https://ec.europa.eu/eurostat/web/gisco/geodata/reference-data/administrative-units-statistical-units/nuts#nuts21
with zipfile.ZipFile('NUTS_RG_20M_2021_3035.shp.zip', 'r') as zip_ref:
    zip_ref.extractall()

In [4]:
# Get Geometric Dataset

gdf = geopandas.read_file('NUTS_RG_20M_2021_3035.shp')
gdf['centroid'] = gdf['geometry'].centroid

gdf_centroids = gdf.copy()
gdf_centroids['geometry'] = gdf['geometry'].centroid

centroids_sindex = gdf_centroids.sindex

gdf = gdf[gdf['LEVL_CODE']==3]

In [5]:
# Add NEIGHBORS column

gdf["NEIGHBORS"] = None  
for index, country in gdf.iterrows():   
    # get 'not disjoint' countries
    neighbors = gdf[~gdf.geometry.disjoint(country.geometry)].NUTS_ID.tolist()

    # remove own name of the country from the list
    neighbors = [ name for name in neighbors if country.NUTS_ID != name ]

    # add names of neighbors as NEIGHBORS value
    gdf.at[index, "NEIGHBORS"] = ", ".join(neighbors)

gdf["NEIGHBORS"] = gdf['NEIGHBORS'].str.split(', ')

In [6]:
# Load Aggregated Climate Data
climate_midas = pd.read_csv('climate_midas_variables.csv')
climate_midas = climate_midas[climate_midas['NUTS_ID'].str.len() == 5]

climate_aggregated = pd.read_csv('climate_aggregated_variables.csv')
climate_aggregated = climate_aggregated[climate_aggregated['NUTS_ID'].str.len() == 5]
climate_aggregated.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 34608 entries, 0 to 34652
Data columns (total 36 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   NUTS_ID           34608 non-null  object 
 1   year              34608 non-null  int64  
 2   MaxTemp_std       30100 non-null  float64
 3   MaxTemp_mean      30100 non-null  float64
 4   MaxTemp_max       30100 non-null  float64
 5   MaxTemp_min       30100 non-null  float64
 6   MaxTemp_lta       30100 non-null  float64
 7   spi01_std         33242 non-null  float64
 8   spi01_mean        33242 non-null  float64
 9   spi01_max         33242 non-null  float64
 10  spi01_min         33242 non-null  float64
 11  spi12_std         31731 non-null  float64
 12  spi12_mean        31731 non-null  float64
 13  spi12_max         31731 non-null  float64
 14  spi12_min         31731 non-null  float64
 15  spa01_lta         33242 non-null  float64
 16  spa12_lta         33242 non-null  float6

In [7]:
# Set Functions for Variable Creation

def add_year(lst, year):
    return [f'{item}_{year}' for item in lst]

def NN(df):
    
    df = df.merge(gdf[['NEIGHBORS', 'NUTS_ID']], on = ['NUTS_ID'], how = 'left')
    
    df['year'] = df['year'].astype(str)
    
    # Year-NUTS identifier
    df['ID'] = df['NUTS_ID']+'_'+df['year']
    
    # Apply the function to the DataFrame
    df['NEIGHBORS_year'] = df.apply(lambda row: add_year(row['NEIGHBORS'], row['year']), axis=1)

    # Focus on neigbouring region avg
    def neighbors_agg(row):
        neighbor_list = row['NEIGHBORS_year']  

        if  (neighbor_list is not np.nan) :
            #neighbor_list = ast.literal_eval(neighbor_list)
            res = df[(df['ID'].isin(neighbor_list))][var].mean()
            return res

    for var in ['spi01_mean', 'spi12_mean', 'lfi_mean' , 'hcwi_mean']:
        df['NN_'+var] = df.apply(neighbors_agg, axis=1)
        
    df.drop(['NEIGHBORS', 'ID', 'NEIGHBORS_year'], axis=1, inplace=True)

    return df

In [8]:
# Apply Neighbours Function to Aggregated Data
climate_aggregated = NN(climate_aggregated)
climate_aggregated.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 34608 entries, 0 to 34607
Data columns (total 40 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   NUTS_ID           34608 non-null  object 
 1   year              34608 non-null  object 
 2   MaxTemp_std       30100 non-null  float64
 3   MaxTemp_mean      30100 non-null  float64
 4   MaxTemp_max       30100 non-null  float64
 5   MaxTemp_min       30100 non-null  float64
 6   MaxTemp_lta       30100 non-null  float64
 7   spi01_std         33242 non-null  float64
 8   spi01_mean        33242 non-null  float64
 9   spi01_max         33242 non-null  float64
 10  spi01_min         33242 non-null  float64
 11  spi12_std         31731 non-null  float64
 12  spi12_mean        31731 non-null  float64
 13  spi12_max         31731 non-null  float64
 14  spi12_min         31731 non-null  float64
 15  spa01_lta         33242 non-null  float64
 16  spa12_lta         33242 non-null  float6

In [9]:
# Add Aggregated Neighbours Data to MIDAS set

climate_midas['year'] = climate_midas['year'].astype(str)

nn_aggregated = climate_aggregated[['NUTS_ID', 'year', 'NN_spi01_mean', 'NN_spi12_mean', 'NN_lfi_mean', 'NN_hcwi_mean']]
climate_midas = pd.merge(climate_midas, nn_aggregated, on=['year', 'NUTS_ID'], how='outer')
climate_midas.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 34608 entries, 0 to 34607
Data columns (total 97 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   NUTS_ID        34608 non-null  object 
 1   year           34608 non-null  object 
 2   MaxTemp [1]    30087 non-null  float64
 3   MaxTemp [2]    30087 non-null  float64
 4   MaxTemp [3]    30090 non-null  float64
 5   MaxTemp [4]    30095 non-null  float64
 6   MaxTemp [5]    30093 non-null  float64
 7   MaxTemp [6]    30091 non-null  float64
 8   MaxTemp [7]    30094 non-null  float64
 9   MaxTemp [8]    30095 non-null  float64
 10  MaxTemp [9]    30079 non-null  float64
 11  MaxTemp [10]   30093 non-null  float64
 12  MaxTemp [11]   28778 non-null  float64
 13  MaxTemp [12]   28778 non-null  float64
 14  MaxTemp_lta    30100 non-null  float64
 15  SPI01 [1]      33242 non-null  float64
 16  SPI01 [2]      33242 non-null  float64
 17  SPI01 [3]      33242 non-null  float64
 18  SPI01 

## Compunding Effects

In [10]:
# Get HCWI Raw Data

hcwi = pd.read_csv('C:/Users/spiterisr/OneDrive - centralbankmalta.org/Working Papers/Beat the Heat Hackathon/Data/Climate Data/Heat Intensity/intensity_2001-2022.csv')

hcwi['date'] = pd.to_datetime(hcwi['date'])
hcwi['month'] = hcwi['date'].dt.month

dfs = []
for i in hcwi['year'].unique():
    for j in hcwi['month'].unique():
        df = pd.DataFrame()
        df['NUTS_ID'] = hcwi['NUTS_ID'].unique()
        df['year'] = i
        df['month'] = j
        dfs.append(df)
hcwi_base = pd.concat(dfs)

hcwi = pd.merge(hcwi_base, hcwi.groupby(['NUTS_ID', 'year', 'month']).agg({'median':'median'}), on=['NUTS_ID', 'year', 'month'])
hcwi.rename(columns={'median': 'hcwi'}, inplace=True)
hcwi.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 50269 entries, 0 to 50268
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   NUTS_ID  50269 non-null  object 
 1   year     50269 non-null  int64  
 2   month    50269 non-null  int64  
 3   hcwi     50269 non-null  float64
dtypes: float64(1), int64(2), object(1)
memory usage: 1.9+ MB


In [11]:
# Get SPI Raw Data

location = "C:/Users/spiterisr/OneDrive - centralbankmalta.org/Working Papers/Beat the Heat Hackathon/Data/Climate Data/SPI/"
all_files = os.listdir(location)

all_dfs = []

for file in all_files:
    if file.endswith("csv"):
        file_name = file.split("/")[-1]
        spa_type = file_name.split("_")[0]
        year = file_name.split("_")[-1].split(".")[0]
        df_a1 = pd.read_csv(location+file)
        df_a1.rename(columns=lambda x: x.strip(), inplace=True)
        df_a1['year'] = pd.to_datetime(df_a1['TIMESTAMP']).dt.year
        df_a1.rename(columns={'MEDIAN': f'MEDIAN_{spa_type}'}, inplace=True)
        all_dfs.append(df_a1)
    
spi = pd.concat(all_dfs, ignore_index=True)

spi['TIMESTAMP'] = pd.to_datetime(spi['TIMESTAMP'])
spi['month'] = spi['TIMESTAMP'].dt.month

dfs = []
for i in spi['year'].unique():
    for j in spi['month'].unique():
        df = pd.DataFrame()
        df['NUTS_ID'] = spi['NUTS_ID'].unique()
        df['year'] = i
        df['month'] = j
        dfs.append(df)
spi_base = pd.concat(dfs)

spi = pd.merge(spi_base, spi.groupby(['NUTS_ID', 'year', 'month']).agg({'MEDIAN_spa01':'median', 'MEDIAN_spa12':'median'}), on=['NUTS_ID', 'year', 'month'])
spi.rename(columns={'MEDIAN_spa01': 'spi01', 'MEDIAN_spa12': 'spi12'}, inplace=True)
spi.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 398904 entries, 0 to 398903
Data columns (total 5 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   NUTS_ID  398904 non-null  object 
 1   year     398904 non-null  int64  
 2   month    398904 non-null  int64  
 3   spi01    398893 non-null  float64
 4   spi12    380772 non-null  float64
dtypes: float64(2), int64(2), object(1)
memory usage: 18.3+ MB


In [12]:
# Merge HCWI and SPI Data
compounding = pd.merge(spi, hcwi, on=['NUTS_ID', 'year', 'month'], how = 'outer').fillna(0)
compounding.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 399097 entries, 0 to 399096
Data columns (total 6 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   NUTS_ID  399097 non-null  object 
 1   year     399097 non-null  int64  
 2   month    399097 non-null  int64  
 3   spi01    399097 non-null  float64
 4   spi12    399097 non-null  float64
 5   hcwi     399097 non-null  float64
dtypes: float64(3), int64(2), object(1)
memory usage: 21.3+ MB


In [13]:
# Quick Data Check
compounding.describe()

Unnamed: 0,year,month,spi01,spi12,hcwi
count,399097.0,399097.0,399097.0,399097.0,399097.0
mean,2011.499678,6.500074,0.010287,0.05314,0.247964
std,6.344207,3.451982,0.978922,0.940484,0.755864
min,2001.0,1.0,-4.8915,-3.31,0.0
25%,2006.0,4.0,-0.6175,-0.552,0.0
50%,2011.0,7.0,0.055,0.006,0.0
75%,2017.0,10.0,0.693,0.676,0.0
max,2022.0,12.0,4.254,3.342,8.706688


In [14]:
# Create Compounding Effects Variables
compounding['HeatWave_ShortDrought'] = np.where(((compounding['hcwi'] > 0 ) & (compounding['spi01'] <= -1)), compounding['hcwi'] * compounding['spi01'].abs(), 0)
compounding['HeatWave_LongDrought'] = np.where(((compounding['hcwi'] > 0 ) & (compounding['spi12'] <= -1)), compounding['hcwi'] * compounding['spi12'].abs(), 0)
compounding.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 399097 entries, 0 to 399096
Data columns (total 8 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   NUTS_ID                399097 non-null  object 
 1   year                   399097 non-null  int64  
 2   month                  399097 non-null  int64  
 3   spi01                  399097 non-null  float64
 4   spi12                  399097 non-null  float64
 5   hcwi                   399097 non-null  float64
 6   HeatWave_ShortDrought  399097 non-null  float64
 7   HeatWave_LongDrought   399097 non-null  float64
dtypes: float64(5), int64(2), object(1)
memory usage: 27.4+ MB


In [15]:
# Check Correlation Between Variables
print('Compounding Effects Short VS Long Term Correlation Check :', compounding['HeatWave_ShortDrought'].corr(compounding['HeatWave_LongDrought']))

Compounding Effects Short VS Long Term Correlation Check : 0.22628030649799374


In [16]:
# Add Variables to Aggregated Dataset

compounding['year'] = compounding['year'].astype(str)

additional_variable = compounding.groupby(['NUTS_ID', 'year']).agg({'HeatWave_ShortDrought':'median'})
climate_aggregated = pd.merge(climate_aggregated, additional_variable ,on=['NUTS_ID', 'year'])

additional_variable = compounding.groupby(['NUTS_ID', 'year']).agg({'HeatWave_LongDrought':'median'})
climate_aggregated = pd.merge(climate_aggregated, additional_variable ,on=['NUTS_ID', 'year'])

climate_aggregated.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 33298 entries, 0 to 33297
Data columns (total 42 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   NUTS_ID                33298 non-null  object 
 1   year                   33298 non-null  object 
 2   MaxTemp_std            28791 non-null  float64
 3   MaxTemp_mean           28791 non-null  float64
 4   MaxTemp_max            28791 non-null  float64
 5   MaxTemp_min            28791 non-null  float64
 6   MaxTemp_lta            28791 non-null  float64
 7   spi01_std              33242 non-null  float64
 8   spi01_mean             33242 non-null  float64
 9   spi01_max              33242 non-null  float64
 10  spi01_min              33242 non-null  float64
 11  spi12_std              31731 non-null  float64
 12  spi12_mean             31731 non-null  float64
 13  spi12_max              31731 non-null  float64
 14  spi12_min              31731 non-null  float64
 15  sp

In [17]:
# Add Variables to MIDAS Dataset

counter = 1
for i in compounding['month'].unique():
    if counter == 1:
        m_df = compounding[compounding['month'] == i]
        m_df = m_df[['NUTS_ID', 'year', 'HeatWave_ShortDrought']]
        m_df.rename(columns={'HeatWave_ShortDrought': 'HeatWave_ShortDrought ['+str(i)+']'}, inplace=True)
    else:
        m2_df = compounding[compounding['month'] == i]
        m2_df = m2_df[['NUTS_ID', 'year', 'HeatWave_ShortDrought']]
        m2_df.rename(columns={'HeatWave_ShortDrought': 'HeatWave_ShortDrought ['+str(i)+']'}, inplace=True)
        m_df = pd.merge(m_df, m2_df, on=['NUTS_ID', 'year'], how='outer')
    counter += 1
    
for i in compounding['month'].unique():
    m2_df = compounding[compounding['month'] == i]
    m2_df = m2_df[['NUTS_ID', 'year', 'HeatWave_LongDrought']]
    m2_df.rename(columns={'HeatWave_LongDrought': 'HeatWave_LongDrought ['+str(i)+']'}, inplace=True)
    m_df = pd.merge(m_df, m2_df, on=['NUTS_ID', 'year'], how='outer')

compounding_midas = m_df.copy()
climate_midas = pd.merge(climate_midas, compounding_midas, on=['NUTS_ID', 'year'], how='outer')
climate_midas.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 34608 entries, 0 to 34607
Columns: 121 entries, NUTS_ID to HeatWave_LongDrought [12]
dtypes: float64(119), object(2)
memory usage: 32.2+ MB


## Heatwave and Drought Duration

In [18]:
"""

Help! I need somebody
Help! Not just anybody
Help! You know I need someone
Help!

When I was younger, so much younger than today
I never needed anybody's help in any way
But now these days are gone and I'm not so self assured
Now I find I've changed my mind, I've opened up the doors

Help me if you can, I'm feeling down
And I do appreciate you being 'round
Help me get my feet back on the ground
Won't you please, please help me?

And now my life has changed in oh so many ways
My independence seems to vanish in the haze
But every now and then I feel so insecure
I know that I just need you like I've never done before

Help me if you can, I'm feeling down
And I do appreciate you being 'round
Help me get my feet back on the ground
Won't you please, please help me?

When I was younger, so much younger than today
I never needed anybody's help in any way
But now these days are gone, I'm not so self assured
Now I find I've changed my mind, I've opened up the doors

Help me if you can, I'm feeling down
And I do appreciate you being 'round
Help me get my feet back on the ground
Won't you please, please help me?

Help me, help me, oh

"""


"\n\nHelp! I need somebody\nHelp! Not just anybody\nHelp! You know I need someone\nHelp!\n\nWhen I was younger, so much younger than today\nI never needed anybody's help in any way\nBut now these days are gone and I'm not so self assured\nNow I find I've changed my mind, I've opened up the doors\n\nHelp me if you can, I'm feeling down\nAnd I do appreciate you being 'round\nHelp me get my feet back on the ground\nWon't you please, please help me?\n\nAnd now my life has changed in oh so many ways\nMy independence seems to vanish in the haze\nBut every now and then I feel so insecure\nI know that I just need you like I've never done before\n\nHelp me if you can, I'm feeling down\nAnd I do appreciate you being 'round\nHelp me get my feet back on the ground\nWon't you please, please help me?\n\nWhen I was younger, so much younger than today\nI never needed anybody's help in any way\nBut now these days are gone, I'm not so self assured\nNow I find I've changed my mind, I've opened up the doo

## Save Fancy Climate Data

In [19]:
climate_aggregated.to_csv('climate_aggregated_variables.csv', index=False)
climate_midas.to_csv('climate_midas_variables.csv', index=False)