# EvapoTranspiration Using Pennmans Formula on - Gridded Data EMDA
## Ensemble of Meterological Dataset for North America 
### using probabilistc methods to estimate the uncertainty in spatial fields
https://essd.copernicus.org/articles/13/3337/2021/

*Each scenario represents a predicted socio-economic status and the radiative energy that the globe will reach by the end of the century (4 socio-economic scenarios paired with radiative force we are producing (KJ/m2)).  **SSP1-2.6** = assumes reduced emissions, **SSP2-4.5** = assumes the same trend of emissions as historical, **SSP3-7.0** = medium to high emissions scenario, **SSP5-8.5**=optimal for economic development but with high emissions.*

![Penman Formula](penman.jpg)


# Install Libraries

In [78]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from scipy.stats import linregress
from itertools import combinations
import math
from matplotlib.dates import MonthLocator, DateFormatter
import requests
from io import StringIO
import os

# Hard Coded Variables

## Observed

In [79]:
observed_folder_t_p = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/Observed'
observed_folder_trange = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/Observed/trange'

## S1

In [80]:
s1_folder_t_p = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S1'
s1_folder_trange = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S1/trange'

## S2

In [81]:
s2_folder_t_p = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S2'
s2_folder_trange = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S2/trange'

## S3

In [82]:
s3_folder_t_p = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S3'
s3_folder_trange = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S3/trange'

## S4

In [83]:
s4_folder_t_p = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S4'
s4_folder_trange = 'C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S4/trange'

# Utility Functions

In [84]:
def read_csv_files_in_folder(folder_path):
    dataframes = []
    
    files = sorted(os.listdir(folder_path))  # Sort files alphabetically
    for file in files:
        if file.endswith(".csv"):
            file_path = os.path.join(folder_path, file)
            df = pd.read_csv(file_path)
            dataframes.append(df)
    
    return dataframes


In [85]:
def merge_dataframes(df_t_p_list, df_f_list):
    """
    Merge two lists of DataFrames based on the 'Date' column.

    Parameters:
    - df_t_p_list: List of DataFrames with 'Date', 'tmean', and 'precip' columns, sorted by 'Date'
    - df_f_list: List of DataFrames with 'Date' and 'trange' columns, sorted by 'Date'

    Returns:
    - Merged list of DataFrames on 'Date' column
    """
    merged_dfs = []

    # Ensure the length of both lists is the same
    assert len(df_t_p_list) == len(df_f_list), "The lengths of input lists must be the same."

    for df_t_p, df_f in zip(df_t_p_list, df_f_list):
        # Convert 'Date' column to datetime if not already
        if not pd.api.types.is_datetime64_any_dtype(df_t_p['Date']):
            df_t_p['Date'] = pd.to_datetime(df_t_p['Date'])

        if not pd.api.types.is_datetime64_any_dtype(df_f['Date']):
            df_f['Date'] = pd.to_datetime(df_f['Date'])

        merged_df = pd.merge(df_t_p, df_f, on='Date', how='outer')
        merged_dfs.append(merged_df)

    return merged_dfs

In [120]:
def calculate_and_add_column(df_list):
    """
    Calculate a new column based on the specified formula and add it to each DataFrame in the list.

    Parameters:
    - df_list: List of DataFrames

    Returns:
    - None (modifies the DataFrames in place)
    """
    for df in df_list:
        # Extract year, month, and day information
        df['Year'] = df['Date'].dt.year
        df['Month'] = df['Date'].dt.month
        df['Day'] = df['Date'].dt.day

        elev = 1426  # elevation meters
        lat = 49.575 # latitude deg

        # Calculate tmean_tdew
        df['rann'] = df.groupby('Month')['trange'].transform('mean')
        df['tmean_tdew'] = (
            0.0023 * elev + 0.37 * df['tmean'] + 0.53 * df['trange'] + 0.35 * df['rann'] - 10.9
        )

        # Calculate Evaporation

        df['evaporation'] = (
            (524 * (df['tmean'] + 0.006 * elev) / (100 - lat) + 15 * df['tmean_tdew']) / (80 - df['tmean'])
        ).where(df['tmean'] >= 0, other=0)  # Applying condition for tmean < 0

        # Drop the intermediate columns
        df.drop(['Year', 'Month', 'Day', 'rann', 'tmean_tdew'], axis=1, inplace=True, errors='ignore') 


# Data Engineering

## Observed

In [88]:
observed_t_p = read_csv_files_in_folder(observed_folder_t_p)
observed_trange = read_csv_files_in_folder(observed_folder_trange)

In [89]:
observed_dfs = merge_dataframes(observed_t_p, observed_trange)

In [90]:
observed_dfs[0].head(2)

Unnamed: 0,Date,tmean,precip,trange
0,1979-01-01,-24.9175,2.7711,12.577
1,1979-01-02,-21.8485,0.70797,9.769


## S1

In [91]:
s1_t_p = read_csv_files_in_folder(s1_folder_t_p)
s1_trange = read_csv_files_in_folder(s1_folder_trange)

In [92]:
s1_dfs = merge_dataframes(s1_t_p, s1_trange)

## S2

In [93]:
s2_t_p = read_csv_files_in_folder(s2_folder_t_p)
s2_trange = read_csv_files_in_folder(s2_folder_trange)

In [94]:
s2_dfs = merge_dataframes(s2_t_p, s2_trange)

## S3

In [95]:
s3_t_p = read_csv_files_in_folder(s3_folder_t_p)
s3_trange = read_csv_files_in_folder(s3_folder_trange)

In [96]:
s3_dfs = merge_dataframes(s3_t_p, s3_trange)

## S4

In [97]:
s4_t_p = read_csv_files_in_folder(s4_folder_t_p)
s4_trange = read_csv_files_in_folder(s4_folder_trange)

In [98]:
s4_dfs = merge_dataframes(s4_t_p, s4_trange)

In [99]:
s4_dfs[39].head()

Unnamed: 0,Date,precip,tmean,trange
0,2015-01-01,6.514761,1.966477,4.000172
1,2015-01-02,8.052436,2.692204,4.202973
2,2015-01-03,1.416353,0.828085,3.932778
3,2015-01-04,23.150974,2.364892,5.937545
4,2015-01-05,5.724569,-1.39087,6.359421


# Calculations

## Observed

In [100]:
calculate_and_add_column(observed_dfs)

In [101]:
observed_dfs[0].head()

Unnamed: 0,Date,tmean,precip,trange,evaporation
0,1979-01-01,-24.9175,2.7711,12.577,0.0
1,1979-01-02,-21.8485,0.70797,9.769,0.0
2,1979-01-03,-22.255,0.11758,9.818,0.0
3,1979-01-04,-23.5575,0.0,12.729,0.0
4,1979-01-05,-21.4305,0.000486,13.063,0.0


In [102]:
# distinct_values = observed_dfs[0]['rann'].unique()
# print(distinct_values)

In [103]:
# mean_evaporation_by_month = observed_dfs[0].groupby('Month')['evaporation'].mean()
# print(mean_evaporation_by_month)

In [104]:
# days_in_month = observed_dfs[0].groupby('Month')['Day'].nunique()

# mean_evaporation_by_month = observed_dfs[0].groupby('Month')['evaporation'].mean()

# total_evaporation_by_month = mean_evaporation_by_month * days_in_month

# print(total_evaporation_by_month)

In [105]:
# total_evaporation_sum = total_evaporation_by_month.sum()

# print("Total evaporation:", total_evaporation_sum)

In [106]:
# Iterate over the merged dataframes for saving
for i, df in enumerate(observed_dfs):
    # Save the merged dataframe to a CSV file
    csv_filename = f"observed_t_p_e.csv"  # Naming based on position in the list
    csv_filepath = os.path.join("C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/observed/penman", csv_filename)
    df.to_csv(csv_filepath, index=False)


## **Scenario 1:** SSP1-2.6 = *assumes reduced emissions*

In [107]:
calculate_and_add_column(s1_dfs)

In [108]:
s1_dfs[0].head()

Unnamed: 0,Date,precip,tmean,trange,evaporation
0,2015-01-01,12.284951,-4.077306,2.030853,0.0
1,2015-01-02,2.865495,-3.112623,0.101487,0.0
2,2015-01-03,6.831872,-0.506162,5.111435,0.0
3,2015-01-04,1.807658,-5.040199,10.397952,0.0
4,2015-01-05,1.872311,-3.14241,4.184681,0.0


In [109]:
# Iterate over the merged dataframes for saving
for i, df in enumerate(s1_dfs):
    # Save the merged dataframe to a CSV file
    csv_filename = f"s1_{i+1}_t_p_e.csv"  # Naming based on position in the list
    csv_filepath = os.path.join("C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S1/penman", csv_filename)
    df.to_csv(csv_filepath, index=False)


## **Scenario 2:** SSP2-4.5 = *assumes the same trend of emissions as historical*

In [110]:
calculate_and_add_column(s2_dfs)

In [111]:
s2_dfs[0].head()

Unnamed: 0,Date,precip,tmean,trange,evaporation
0,2015-01-01,5.913979,1.064339,3.946321,0.838265
1,2015-01-02,7.351886,1.542983,4.307355,0.977218
2,2015-01-03,1.266364,-0.072935,3.493105,0.0
3,2015-01-04,19.761959,1.357572,6.058133,1.114316
4,2015-01-05,5.841811,-3.309855,7.554837,0.0


In [112]:
# Iterate over the merged dataframes for saving
for i, df in enumerate(s2_dfs):
    # Save the merged dataframe to a CSV file
    csv_filename = f"s2_{i+1}_t_p_e.csv"  # Naming based on position in the list
    csv_filepath = os.path.join("C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S2/penman", csv_filename)
    df.to_csv(csv_filepath, index=False)


## **Scenario 3:** SSP3-7.0 = *medium to high emissions scenario*

In [113]:
calculate_and_add_column(s3_dfs)

In [114]:
s3_dfs[0].head()

Unnamed: 0,Date,precip,tmean,trange,evaporation
0,2015-01-01,7.079445,1.407242,3.878722,0.888017
1,2015-01-02,8.912531,2.092265,3.747983,1.022655
2,2015-01-03,1.376515,-0.056117,3.577113,0.0
3,2015-01-04,23.199167,1.49451,6.20435,1.142234
4,2015-01-05,7.392652,-1.238661,4.011037,0.0


In [115]:
# Iterate over the merged dataframes for saving
for i, df in enumerate(s3_dfs):
    # Save the merged dataframe to a CSV file
    csv_filename = f"s3_{i+1}_t_p_e.csv"  # Naming based on position in the list
    csv_filepath = os.path.join("C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S3/penman", csv_filename)
    df.to_csv(csv_filepath, index=False)


## **Scenario 4:** SSP5-8.5 = *optimal for economic development but with high emissions.*

In [121]:
calculate_and_add_column(s4_dfs)

In [122]:
s4_dfs[0].head()

Unnamed: 0,Date,precip,tmean,trange,evaporation,Year,Month,Day,rann,tmean_tdew
0,2015-01-01,6.129145,1.677566,3.966279,0.925677,2015,1,1,7.546294,-2.25617
1,2015-01-02,4.294818,-0.198543,5.185653,0.0,2015,1,2,7.546294,-2.304062
2,2015-01-03,0.0,-1.132881,4.745145,0.0,2015,1,3,7.546294,-2.883236
3,2015-01-04,1.04762,-2.067218,4.304637,0.0,2015,1,4,7.546294,-3.46241
4,2015-01-05,1.139335,-2.429765,5.00771,0.0,2015,1,5,7.546294,-3.223924


In [123]:
s4_dfs[39].head()

Unnamed: 0,Date,precip,tmean,trange,evaporation,Year,Month,Day,rann,tmean_tdew
0,2015-01-01,6.514761,1.966477,4.000172,1.002543,2015,1,1,7.709246,-2.074276
1,2015-01-02,8.052436,2.692204,4.202973,1.182462,2015,1,2,7.709246,-1.698272
2,2015-01-03,1.416353,0.828085,3.932778,0.75214,2015,1,3,7.709246,-2.5312
3,2015-01-04,23.150974,2.364892,5.937545,1.28789,2015,1,4,7.709246,-0.900055
4,2015-01-05,5.724569,-1.39087,6.359421,0.0,2015,1,5,7.709246,-2.066093


In [124]:
# distinct_values = s4_dfs[39]['rann'].unique()
# print(distinct_values)

[ 7.70924584  8.87317551  9.59972532 11.24756708 12.39365254 12.54254388
 14.85505696 14.88137769 13.71757729 11.11042094  7.58071195  7.17676915]


In [125]:
# days_in_month = s4_dfs[0].groupby('Month')['Day'].nunique()

# mean_evaporation_by_month = s4_dfs[0].groupby('Month')['evaporation'].mean()

# total_evaporation_by_month = mean_evaporation_by_month * days_in_month

# print(total_evaporation_by_month)

Month
1      15.074350
2      27.338075
3      43.279912
4      72.807362
5     121.550389
6     165.094323
7     239.371087
8     243.721795
9     161.230956
10     91.125795
11     25.134550
12      9.201502
dtype: float64


In [126]:
# total_evaporation_sum = total_evaporation_by_month.sum()

# print("Total evaporation:", total_evaporation_sum)

Total evaporation: 1214.9300965710481


In [119]:
# Iterate over the merged dataframes for saving
for i, df in enumerate(s4_dfs):
    # Save the merged dataframe to a CSV file
    csv_filename = f"s4_{i+1}_t_p_e.csv"  # Naming based on position in the list
    csv_filepath = os.path.join("C:/Users/14037/OneDrive - University of Calgary/Documents/ENCI_570/TM_PHES_code/S4/penman", csv_filename)
    df.to_csv(csv_filepath, index=False)
