# EvapoTranspiration Using Pennmans Formula on - Gridded Data EMDA
## Ensemble of Meterological Dataset for North America 
### using probabilistc methods to estimate the uncertainty in spatial fields
https://essd.copernicus.org/articles/13/3337/2021/

*Each scenario represents a predicted socio-economic status and the radiative energy that the globe will reach by the end of the century (4 socio-economic scenarios paired with radiative force we are producing (KJ/m2)).  **SSP1-2.6** = assumes reduced emissions, **SSP2-4.5** = assumes the same trend of emissions as historical, **SSP3-7.0** = medium to high emissions scenario, **SSP5-8.5**=optimal for economic development but with high emissions.*

![Penman Formula](penman.jpg)


# Install Libraries

In [171]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from scipy.stats import linregress
from itertools import combinations
import math
from matplotlib.dates import MonthLocator, DateFormatter
import requests
from io import StringIO
import os

# Hard Coded Variables

## Observed

In [172]:
observed_folder_t_p = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/Observed'
observed_folder_trange = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/Observed/trange'

## S1

In [173]:
s1_folder_t_p = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S1'
s1_folder_trange = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S1/trange'

## S2

In [174]:
s2_folder_t_p = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S2'
s2_folder_trange = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S2/trange'

## S3

In [175]:
s3_folder_t_p = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S3'
s3_folder_trange = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S3/trange'

## S4

In [176]:
s4_folder_t_p = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S4'
s4_folder_trange = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S4/trange'

# Utility Functions

In [177]:
def read_csv_files_in_folder(folder_path):
    dataframes = []
    
    files = sorted(os.listdir(folder_path))  # Sort files alphabetically
    for file in files:
        if file.endswith(".csv"):
            file_path = os.path.join(folder_path, file)
            df = pd.read_csv(file_path)
            dataframes.append(df)
    
    return dataframes


In [178]:
def merge_dataframes(df_t_p_list, df_f_list):
    """
    Merge two lists of DataFrames based on the 'Date' column.

    Parameters:
    - df_t_p_list: List of DataFrames with 'Date', 'tmean', and 'precip' columns, sorted by 'Date'
    - df_f_list: List of DataFrames with 'Date' and 'trange' columns, sorted by 'Date'

    Returns:
    - Merged list of DataFrames on 'Date' column
    """
    merged_dfs = []

    # Ensure the length of both lists is the same
    assert len(df_t_p_list) == len(df_f_list), "The lengths of input lists must be the same."

    for df_t_p, df_f in zip(df_t_p_list, df_f_list):
        # Convert 'Date' column to datetime if not already
        if not pd.api.types.is_datetime64_any_dtype(df_t_p['Date']):
            df_t_p['Date'] = pd.to_datetime(df_t_p['Date'])

        if not pd.api.types.is_datetime64_any_dtype(df_f['Date']):
            df_f['Date'] = pd.to_datetime(df_f['Date'])

        merged_df = pd.merge(df_t_p, df_f, on='Date', how='outer')
        merged_dfs.append(merged_df)

    return merged_dfs

In [179]:
def calculate_and_add_column(df_list):
    """
    Calculate a new column based on the specified formula and add it to each DataFrame in the list.

    Parameters:
    - df_list: List of DataFrames

    Returns:
    - None (modifies the DataFrames in place)
    """
    for df in df_list:
        # Extract year, month, and day information
        df['Year'] = df['Date'].dt.year
        df['Month'] = df['Date'].dt.month
        df['Day'] = df['Date'].dt.day

        # Calculate monthly means for July and December
        july_mean = df[(df['Month'] == 7)].groupby('Year')['tmean'].mean()
        dec_mean = df[(df['Month'] == 12)].groupby('Year')['tmean'].mean()

        # Map the mean values to the corresponding year in the DataFrame
        df['tmean_jul'] = df['Year'].map(july_mean)
        df['tmean_dec'] = df['Year'].map(dec_mean)

        # Perform the specified calculation and add a new column to the DataFrame
        df['evaporation'] = (
            (700 * (df['tmean'] + 10.2) / 50.449) +
            15 * (3.91 +0.37* df['tmean'] + 0.53 * df['trange'] + 0.35 * (df['tmean_jul'] - df['tmean_dec']) - 10.9)
        ) / (80 - df['tmean'])

        # Drop the intermediate columns
        df.drop(['Year','Month', 'Day'], axis=1, inplace=True, errors='ignore') 

# Data Engineering

## Observed

In [180]:
observed_t_p = read_csv_files_in_folder(observed_folder_t_p)
observed_trange = read_csv_files_in_folder(observed_folder_trange)

In [181]:
observed_dfs = merge_dataframes(observed_t_p, observed_trange)

In [182]:
observed_dfs[0].head(2)

Unnamed: 0,Date,tmean,precip,trange
0,1979-01-01,-24.9175,2.7711,12.577
1,1979-01-02,-21.8485,0.70797,9.769


## S1

In [183]:
s1_t_p = read_csv_files_in_folder(s1_folder_t_p)
s1_trange = read_csv_files_in_folder(s1_folder_trange)

In [184]:
s1_dfs = merge_dataframes(s1_t_p, s1_trange)

## S2

In [185]:
s2_t_p = read_csv_files_in_folder(s2_folder_t_p)
s2_trange = read_csv_files_in_folder(s2_folder_trange)

In [186]:
s2_dfs = merge_dataframes(s2_t_p, s2_trange)

## S3

In [187]:
s3_t_p = read_csv_files_in_folder(s3_folder_t_p)
s3_trange = read_csv_files_in_folder(s3_folder_trange)

In [188]:
s3_dfs = merge_dataframes(s3_t_p, s3_trange)

## S4

In [189]:
s4_t_p = read_csv_files_in_folder(s4_folder_t_p)
s4_trange = read_csv_files_in_folder(s4_folder_trange)

In [190]:
s4_dfs = merge_dataframes(s4_t_p, s4_trange)

In [191]:
s4_dfs[39].head()

Unnamed: 0,Date,precip,tmean,trange
0,2015-01-01,6.514761,1.966477,4.000172
1,2015-01-02,8.052436,2.692204,4.202973
2,2015-01-03,1.416353,0.828085,3.932778
3,2015-01-04,23.150974,2.364892,5.937545
4,2015-01-05,5.724569,-1.39087,6.359421


# Calculations

## Observed

In [192]:
calculate_and_add_column(observed_dfs)

In [193]:
observed_dfs[0].head()

Unnamed: 0,Date,tmean,precip,trange,tmean_jul,tmean_dec,evaporation
0,1979-01-01,-24.9175,2.7711,12.577,14.36335,-4.527383,-2.365571
1,1979-01-02,-21.8485,0.70797,9.769,14.36335,-4.527383,-2.070692
2,1979-01-03,-22.255,0.11758,9.818,14.36335,-4.527383,-2.135873
3,1979-01-04,-23.5575,0.0,12.729,14.36335,-4.527383,-2.129859
4,1979-01-05,-21.4305,0.000486,13.063,14.36335,-4.527383,-1.740993


In [194]:
# Iterate over the merged dataframes for saving
for i, df in enumerate(observed_dfs):
    # Save the merged dataframe to a CSV file
    csv_filename = f"observed_t_p_e.csv"  # Naming based on position in the list
    csv_filepath = os.path.join("C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/observed/penman", csv_filename)
    df.to_csv(csv_filepath, index=False)


## **Scenario 1:** SSP1-2.6 = *assumes reduced emissions*

In [195]:
calculate_and_add_column(s1_dfs)

In [196]:
s1_dfs[0].head()

Unnamed: 0,Date,precip,tmean,trange,tmean_jul,tmean_dec,evaporation
0,2015-01-01,12.284951,-4.077306,2.030853,14.100862,-8.641855,1.106367
1,2015-01-02,2.865495,-3.112623,0.101487,14.100862,-8.641855,1.160127
2,2015-01-03,6.831872,-0.506162,5.111435,14.100862,-8.641855,2.321336
3,2015-01-04,1.807658,-5.040199,10.397952,14.100862,-8.641855,1.656089
4,2015-01-05,1.872311,-3.14241,4.184681,14.100862,-8.641855,1.543183


In [197]:
# Iterate over the merged dataframes for saving
for i, df in enumerate(s1_dfs):
    # Save the merged dataframe to a CSV file
    csv_filename = f"s1_{i+1}_t_p_e.csv"  # Naming based on position in the list
    csv_filepath = os.path.join("C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S1/penman", csv_filename)
    df.to_csv(csv_filepath, index=False)


## **Scenario 2:** SSP2-4.5 = *assumes the same trend of emissions as historical*

In [198]:
calculate_and_add_column(s2_dfs)

In [199]:
s2_dfs[0].head()

Unnamed: 0,Date,precip,tmean,trange,tmean_jul,tmean_dec,evaporation
0,2015-01-01,5.913979,1.064339,3.946321,15.245039,-6.250266,2.553699
1,2015-01-02,7.351886,1.542983,4.307355,15.245039,-6.250266,2.72437
2,2015-01-03,1.266364,-0.072935,3.493105,15.245039,-6.250266,2.196533
3,2015-01-04,19.761959,1.357572,6.058133,15.245039,-6.250266,2.849136
4,2015-01-05,5.841811,-3.309855,7.554837,15.245039,-6.250266,1.744033


In [200]:
# Iterate over the merged dataframes for saving
for i, df in enumerate(s2_dfs):
    # Save the merged dataframe to a CSV file
    csv_filename = f"s2_{i+1}_t_p_e.csv"  # Naming based on position in the list
    csv_filepath = os.path.join("C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S2/penman", csv_filename)
    df.to_csv(csv_filepath, index=False)


## **Scenario 3:** SSP3-7.0 = *medium to high emissions scenario*

In [201]:
calculate_and_add_column(s3_dfs)

In [202]:
s3_dfs[0].head()

Unnamed: 0,Date,precip,tmean,trange,tmean_jul,tmean_dec,evaporation
0,2015-01-01,7.079445,1.407242,3.878722,14.947745,-7.789648,2.725728
1,2015-01-02,8.912531,2.092265,3.747983,14.947745,-7.789648,2.907156
2,2015-01-03,1.376515,-0.056117,3.577113,14.947745,-7.789648,2.290872
3,2015-01-04,23.199167,1.49451,6.20435,14.947745,-7.789648,2.98586
4,2015-01-05,7.392652,-1.238661,4.011037,14.947745,-7.789648,2.017225


In [203]:
# Iterate over the merged dataframes for saving
for i, df in enumerate(s3_dfs):
    # Save the merged dataframe to a CSV file
    csv_filename = f"s3_{i+1}_t_p_e.csv"  # Naming based on position in the list
    csv_filepath = os.path.join("C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S3/penman", csv_filename)
    df.to_csv(csv_filepath, index=False)


## **Scenario 4:** SSP5-8.5 = *optimal for economic development but with high emissions.*

In [204]:
calculate_and_add_column(s4_dfs)

In [205]:
s4_dfs[0].head()

Unnamed: 0,Date,precip,tmean,trange,tmean_jul,tmean_dec,evaporation
0,2015-01-01,6.129145,1.677566,3.966279,13.715161,-2.269608,2.358435
1,2015-01-02,4.294818,-0.198543,5.185653,13.715161,-2.269608,1.969715
2,2015-01-03,0.0,-1.132881,4.745145,13.715161,-2.269608,1.680162
3,2015-01-04,1.04762,-2.067218,4.304637,13.715161,-2.269608,1.397201
4,2015-01-05,1.139335,-2.429765,5.00771,13.715161,-2.269608,1.373427


In [206]:
s4_dfs[39].head()

Unnamed: 0,Date,precip,tmean,trange,tmean_jul,tmean_dec,evaporation
0,2015-01-01,6.514761,1.966477,4.000172,14.007622,-13.559427,3.221783
1,2015-01-02,8.052436,2.692204,4.202973,14.007622,-13.559427,3.455238
2,2015-01-03,1.416353,0.828085,3.932778,14.007622,-13.559427,2.889378
3,2015-01-04,23.150974,2.364892,5.937545,14.007622,-13.559427,3.536397
4,2015-01-05,5.724569,-1.39087,6.359421,14.007622,-13.559427,2.518038


In [207]:
# Iterate over the merged dataframes for saving
for i, df in enumerate(s4_dfs):
    # Save the merged dataframe to a CSV file
    csv_filename = f"s4_{i+1}_t_p_e.csv"  # Naming based on position in the list
    csv_filepath = os.path.join("C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S4/penman", csv_filename)
    df.to_csv(csv_filepath, index=False)
