In [ ]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt 
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
import seaborn as sns
import numpy as np

In [None]:
def get_climate_data(year):
    climate_data_directory = './historical_climate_data'
    data = pd.read_csv(f'{climate_data_directory}/canada_{year}.csv')
    return data

def get_fire_data(year):
    fire_data_directory = './historical_fire_data/grouped'
    data = pd.read_csv(f'{fire_data_directory}/grouped_canada_{year}.csv')
    data['fire_occurrence'] = (data['confidence'] >= 90).astype(int)
    return data


In [ ]:
climate_data_2010 = get_climate_data(2010)
climate_data_2020 = get_climate_data(2020)

In [ ]:
climate_data_2010 = climate_data_2010[['daynight', 'month', '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature', 
                          'soil_temperature_level_1', 'soil_temperature_level_2', 'soil_temperature_level_3', 'soil_temperature_level_4','soil_type', 'total_precipitation', 'volumetric_soil_water_layer_1', 'volumetric_soil_water_layer_2', 'volumetric_soil_water_layer_3', 'volumetric_soil_water_layer_4']]

group_by_aggregate2010 = climate_data_2010.groupby(['daynight', 'month']).agg({
    '10m_u_component_of_wind': 'mean', 
    '10m_v_component_of_wind': 'mean', 
    '2m_temperature': 'mean', 
    'soil_temperature_level_1': 'mean', 
    'soil_temperature_level_2': 'mean', 
    'soil_temperature_level_3': 'mean', 
    'soil_temperature_level_4': 'mean',
    'soil_type': 'mean', 
    'total_precipitation': 'mean', 
    'volumetric_soil_water_layer_1': 'mean', 
    'volumetric_soil_water_layer_2': 'mean', 
    'volumetric_soil_water_layer_3': 'mean', 
    'volumetric_soil_water_layer_4': 'mean'
})

#group_by_aggregate2010

sns.boxplot(x='daynight', y='2m_temperature', data=climate_data_2010)

# Calculate statistics
means = climate_data_2010.groupby('daynight')['2m_temperature'].mean()
mins = climate_data_2010.groupby('daynight')['2m_temperature'].min()
maxs = climate_data_2010.groupby('daynight')['2m_temperature'].max()

# Overlay mean, minimum, and maximum values
for i, species in enumerate(means.index):
    plt.text(i, means[species], f'Mean: {means[species]:.2f}', ha='center', va='bottom')
    plt.text(i, mins[species], f'Min: {mins[species]:.2f}', ha='center', va='bottom')
    plt.text(i, maxs[species], f'Max: {maxs[species]:.2f}', ha='center', va='top')

# Show plot
plt.show()

In [ ]:
climate_data_2020 = climate_data_2020[['daynight', 'month', '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature', 
                          'soil_temperature_level_1', 'soil_temperature_level_2', 'soil_temperature_level_3', 'soil_temperature_level_4','soil_type', 'total_precipitation', 'volumetric_soil_water_layer_1', 'volumetric_soil_water_layer_2', 'volumetric_soil_water_layer_3', 'volumetric_soil_water_layer_4']]

group_by_aggregate2020 = climate_data_2020.groupby(['daynight', 'month']).agg({
    '10m_u_component_of_wind': 'mean', 
    '10m_v_component_of_wind': 'mean', 
    '2m_temperature': 'mean', 
    'soil_temperature_level_1': 'mean', 
    'soil_temperature_level_2': 'mean', 
    'soil_temperature_level_3': 'mean', 
    'soil_temperature_level_4': 'mean',
    'soil_type': 'mean', 
    'total_precipitation': 'mean', 
    'volumetric_soil_water_layer_1': 'mean', 
    'volumetric_soil_water_layer_2': 'mean', 
    'volumetric_soil_water_layer_3': 'mean', 
    'volumetric_soil_water_layer_4': 'mean'
})

#group_by_aggregate2020
sns.boxplot(x='daynight', y='2m_temperature', data=climate_data_2020)

# Calculate statistics
means = climate_data_2020.groupby('daynight')['2m_temperature'].mean()
mins = climate_data_2020.groupby('daynight')['2m_temperature'].min()
maxs = climate_data_2020.groupby('daynight')['2m_temperature'].max()

# Overlay mean, minimum, and maximum values
for i, species in enumerate(means.index):
    plt.text(i, means[species], f'Mean: {means[species]:.2f}', ha='center', va='bottom')
    plt.text(i, mins[species], f'Min: {mins[species]:.2f}', ha='center', va='bottom')
    plt.text(i, maxs[species], f'Max: {maxs[species]:.2f}', ha='center', va='top')

# Show plot
plt.show()
#sns.boxplot(x='daynight', y='2m_temperature', data=climate_data_2020)