 # ML for Environmental Engineering 
 By: Suheyla Tozan and Steven Gutterman

In [None]:
import pandas as pd
from datetime import datetime
import requests
import numpy as np
import time

In [None]:
#first, we add the "week of the year" to each row of the data, replicating for 1993 as well
file_path = 'metdata_2018-2023_ELABHWC.xlsx'
df = pd.read_excel(file_path)

# Function to calculate week of the year
def get_week_of_year(date_str):
    date_obj = datetime.strptime(date_str, '%Y-%m-%d')
    return date_obj.isocalendar()[1]

# Calculate the week of the year for each date
df['week_of_year'] = df['date_gmt'].apply(get_week_of_year)

# Save the data to a new Excel file
df.to_excel('metdata_2018-2023_ELABHWC.xlsx', index=False)

print("New spreadsheet with weeks of the year has been created.")

In [None]:
#function to replace weather conditions with randomly selected conditions over the past 30 years

def random_weather(n, met_2018_2023, met_1993_2023):
    total_iterations = n * len(met_2018_2023)
    iteration_count = 0
    dataframes = []

    for j in range(n):
        MET_copy = met_2018_2023.copy()  #creates a copy of the MET_2013_2017 DataFrame to modify

        for i in range(len(MET_copy)):
            hour_1 = MET_copy.iloc[i]['time_gmt']
            week_1 = MET_copy.iloc[i]['week_of_year']
            iteration_count += 1

            if iteration_count % 1000 == 0:
                print(f"Progress: {iteration_count} out of {total_iterations} iterations completed")

            #random sampling, selects 10 rows from the same hour at any given day 2 weeks before or 2 weeks after
            if week_1 == 1:
                MET_sample = met_1993_2023[(MET_1988_2017['time_gmt'] == hour_1) & (met_1993_2023['week_of_year'].isin([52, 53, 1, 2, 3]))].sample(n=10)
            elif week_1 == 2:
                MET_sample = met_1993_2023[(MET_1988_2017['time_gmt'] == hour_1) & (met_1993_2023['week_of_year'].isin([1, 2, 3, 4, 5]))].sample(n=10)
            elif week_1 == 52:
                MET_sample = met_1993_2023[(MET_1988_2017['time_gmt'] == hour_1) & (met_1993_2023['week_of_year'].isin([50, 51, 52, 1, 2]))].sample(n=10)
            elif week_1 == 53:
                MET_sample = met_1993_2023[(MET_1988_2017['time_gmt'] == hour_1) & (met_1993_2023['week_of_year'].isin([51, 52, 53, 1, 2]))].sample(n=10)
            else:
                MET_sample = met_1993_2023[(MET_1988_2017['time_gmt'] == hour_1) & (met_1993_2023['week_of_year'].between(week_1 - 2, week_1 + 2))].sample(n=10)

            if not MET_sample.empty:
                selected_index = np.random.choice(MET_sample.index, 1) #randomly selects 1 of these 10 rows
                selected_row = MET_sample.loc[selected_index]

                for col in MET_copy.columns:
                    if col in selected_row.columns:
                        MET_copy.at[i, col] = selected_row.iloc[0][col]
                    else:
                        MET_copy.at[i, col] = np.nan
            else:
                print(f"No data available for hour {hour_1} and week {week_1}") #prints if there is no data available

        dataframes.append(MET_copy)

    return dataframes

In [None]:
file_path_2018_2023 = 'metdata_2018-2023_ELA.xlsx'
file_path_1993_2023 = 'metdata_1993-2023_ELA.xlsx'

met_2018_2023 = pd.read_excel(file_path_2018_2023)
met_1993_2023 = pd.read_excel(file_path_1993_2023)

n = 25 #we want 25 final datasets (already available in the GitHub)

modified_dataframes = random_weather(n, MET_2013_2017, MET_1988_2017)

#saves each DataFrame to a separate Excel file
for i, df in enumerate(modified_dataframes):
    df.to_excel(f"output_{i+1}.xlsx", index=False)