In [8]:
#######################################################################################################################
# PRIOR TO THIS SCRIPT: 
# 1) Verify that the price is given in $/MMBtu. 
# 2) Verify that the price for 1/1/2021 is given. If not, add it manually (take the most recent price available)
# DO NOT FORGET TO ACCOUNT FOR THE TIME CHANGE (DST) IN MARCH AND NOVEMBER: done on lines 27-35
# For 2021, delete the hour 3-4am on 3/14/2021 and add the hour 2-3am on 11/7/2021 
#######################################################################################################################

# taking datas from gas and adding missing dates (weekends) + hours
import pandas as pd

# Read in the original CSV file
df = pd.read_csv("SPGlobal_CommodityCharting(Chart)_23-May-2023.csv")

# Convert the "Date" column to a datetime object
df["Date"] = pd.to_datetime(df["Date"], format="%m/%d/%Y")

# Create a new DataFrame with all dates from 01/01/2021 to 12/31/2021
full_date_range = pd.date_range(start="01/01/2021", end="01/01/2022", freq="H")
full_df = pd.DataFrame({"Date": full_date_range})

# Merge the two DataFrames based on the "Date" column, filling in missing values with the previous day's closing price
merged_df = full_df.merge(df, on="Date", how="left").fillna(method="ffill")

#delete the last row (01/01/2022)
merged_df = merged_df[:-1]

# Delete the row with the date 2021-03-14 03:00:00
merged_df = merged_df.drop(merged_df[merged_df['Date'] == '2021-03-14 03:00:00'].index)

# Duplicate the row with the date 2021-11-07 02:00:00 using concat
row_to_duplicate = merged_df[merged_df['Date'] == '2021-11-07 02:00:00']
merged_df = pd.concat([merged_df, row_to_duplicate])

# Sort the DataFrame by the "Date" column, ascending
merged_df = merged_df.sort_values(by="Date", ascending=True)

# Write the merged DataFrame to a new CSV file
merged_df.to_csv("NG(HH)2021.csv", index=False)

In [24]:
#######################################################################################################################
# PRIOR TO THIS SCRIPT: 
# 1) Verify that the price is given in $/MMBtu. Coal is usually $/tonne
# 2) Verify that the price for 1/1/2021 is given. If not, add it manually (take the most recent price available)
# DO NOT FORGET TO ACCOUNT FOR THE TIME CHANGE (DST) IN MARCH AND NOVEMBER: done on lines 27-35
# For 2021, delete the hour 3-4am on 3/14/2021 and add the hour 2-3am on 11/7/2021 
#######################################################################################################################

# taking datas from coal and adding missing dates (weekends) + hours
import pandas as pd

# Read in the original CSV file
df = pd.read_csv("Coal_12_31_21-12_31_20.csv")

# Convert the "Date" column to a datetime object
df["Date"] = pd.to_datetime(df["Date"], format="%m/%d/%Y")

# Create a new DataFrame with all dates from 01/01/2021 to 12/31/2021
full_date_range = pd.date_range(start="01/01/2021", end="01/01/2022", freq="H")
full_df = pd.DataFrame({"Date": full_date_range})

# Merge the two DataFrames based on the "Date" column, filling in missing values with the previous day's closing price
merged_df = full_df.merge(df, on="Date", how="left").fillna(method="ffill")

#delete the last row (01/01/2022)
merged_df = merged_df[:-1]

# Delete the row with the date 2021-03-14 03:00:00
merged_df = merged_df.drop(merged_df[merged_df['Date'] == '2021-03-14 03:00:00'].index)

# Duplicate the row with the date 2021-11-07 02:00:00 using concat
row_to_duplicate = merged_df[merged_df['Date'] == '2021-11-07 02:00:00']
merged_df = pd.concat([merged_df, row_to_duplicate])

# Sort the DataFrame by the "Date" column, ascending
merged_df = merged_df.sort_values(by="Date", ascending=True)

# Write the merged DataFrame to a new CSV file
merged_df.to_csv("Coal2021.csv", index=False)

In [25]:
# Modify the price file to only keep the average bus price.

import pandas as pd

# Read in the Excel file
excel_file = pd.read_excel("2021prices.xlsx", sheet_name=None)

# Loop through each sheet, filter out non-"HB_BUSAVG" rows, and concatenate the results
concatenated_df = pd.concat(
    [df.loc[df["Settlement Point"] == "HB_BUSAVG"] for df in excel_file.values()]
)

# Write the concatenated dataframe to a CSV file
concatenated_df.to_csv("2021prices.csv", index=False)

In [1]:
#Convert the IntGenByFuelType file to a CSV file
import pandas as pd

# Specify the path to your Excel file
excel_file = 'IntGenbyFuel2021.xlsx'

# List of sheet names in the Excel file
sheet_names = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

# Create an empty list to store DataFrames for each sheet
dfs = []

# Read each sheet and append it to the dfs list
for sheet_name in sheet_names:
    df = pd.read_excel(excel_file, sheet_name=sheet_name)
    dfs.append(df)

# Concatenate the DataFrames in the dfs list
combined_data = pd.concat(dfs, ignore_index=True)

#drop the column "Settlement Type"
combined_data = combined_data.drop(columns=['Settlement Type', 'Total'])

# add columns for each hour of the day, ranging from 1 to 24. Values are the sum of each quarter hour
combined_data['1'] = combined_data['0:15'] + combined_data['0:30'] + combined_data['0:45'] + combined_data['1:00']
combined_data['2'] = combined_data['1:15'] + combined_data['1:30'] + combined_data['1:45'] + combined_data['2:00']
combined_data['3'] = combined_data['2:15'] + combined_data['2:30'] + combined_data['2:45'] + combined_data['3:00']
combined_data['4'] = combined_data['3:15'] + combined_data['3:30'] + combined_data['3:45'] + combined_data['4:00']
combined_data['5'] = combined_data['4:15'] + combined_data['4:30'] + combined_data['4:45'] + combined_data['5:00']
combined_data['6'] = combined_data['5:15'] + combined_data['5:30'] + combined_data['5:45'] + combined_data['6:00']
combined_data['7'] = combined_data['6:15'] + combined_data['6:30'] + combined_data['6:45'] + combined_data['7:00']
combined_data['8'] = combined_data['7:15'] + combined_data['7:30'] + combined_data['7:45'] + combined_data['8:00']
combined_data['9'] = combined_data['8:15'] + combined_data['8:30'] + combined_data['8:45'] + combined_data['9:00']
combined_data['10'] = combined_data['9:15'] + combined_data['9:30'] + combined_data['9:45'] + combined_data['10:00']
combined_data['11'] = combined_data['10:15'] + combined_data['10:30'] + combined_data['10:45'] + combined_data['11:00']
combined_data['12'] = combined_data['11:15'] + combined_data['11:30'] + combined_data['11:45'] + combined_data['12:00']
combined_data['13'] = combined_data['12:15'] + combined_data['12:30'] + combined_data['12:45'] + combined_data['13:00']
combined_data['14'] = combined_data['13:15'] + combined_data['13:30'] + combined_data['13:45'] + combined_data['14:00']
combined_data['15'] = combined_data['14:15'] + combined_data['14:30'] + combined_data['14:45'] + combined_data['15:00']
combined_data['16'] = combined_data['15:15'] + combined_data['15:30'] + combined_data['15:45'] + combined_data['16:00']
combined_data['17'] = combined_data['16:15'] + combined_data['16:30'] + combined_data['16:45'] + combined_data['17:00']
combined_data['18'] = combined_data['17:15'] + combined_data['17:30'] + combined_data['17:45'] + combined_data['18:00']
combined_data['19'] = combined_data['18:15'] + combined_data['18:30'] + combined_data['18:45'] + combined_data['19:00']
combined_data['20'] = combined_data['19:15'] + combined_data['19:30'] + combined_data['19:45'] + combined_data['20:00']
combined_data['21'] = combined_data['20:15'] + combined_data['20:30'] + combined_data['20:45'] + combined_data['21:00']
combined_data['22'] = combined_data['21:15'] + combined_data['21:30'] + combined_data['21:45'] + combined_data['22:00']
combined_data['23'] = combined_data['22:15'] + combined_data['22:30'] + combined_data['22:45'] + combined_data['23:00']
combined_data['0'] = combined_data['23:15'] + combined_data['23:30'] + combined_data['23:45'] + combined_data['0:00']
combined_data['2 (DST)'] = combined_data['01:15 (DST)'] + combined_data['01:30 (DST)'] + combined_data['01:45 (DST)'] + combined_data['02:00 (DST)']

# keep only the fuel and date columns, and the column we just created
combined_data = combined_data[['Fuel', 'Date', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                                 '11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
                                    '21', '22', '23', '0', '2 (DST)']]

# instead of having a column for each hour, we want to have a column for the date and hour and a column for the value
# first, we need to melt the data
combined_data = pd.melt(combined_data, id_vars=['Fuel', 'Date'], var_name='Hour', value_name='Value')

# now we need to combine the date and hour columns
combined_data['Date'] = combined_data['Date'].astype(str)
combined_data['Hour'] = combined_data['Hour'].astype(str)
combined_data['Hour'] = combined_data['Hour'].str.zfill(2)

# we need to convert the hour column to datetime

combined_data['DateHour'] = combined_data['Date'] + ' ' + combined_data['Hour']

# now we can drop the date and hour columns
combined_data = combined_data.drop(columns=['Hour'])
combined_data = combined_data.dropna(subset=['Value'])

# save the row with the (DST) value in a separate dataframe and remove it from the main dataframe
dst_data = combined_data[combined_data['DateHour'].str.contains('DST')]
combined_data = combined_data[~combined_data['DateHour'].str.contains('DST')]

# remove the (DST) from the DateHour column
dst_data['DateHour'] = dst_data['DateHour'].str.replace(' (DST)', '')

# now we can convert the DateHour column to a datetime
combined_data['DateHour'] = pd.to_datetime(combined_data['DateHour'], format='%Y-%m-%d %H')
dst_data['DateHour'] = pd.to_datetime(dst_data['DateHour'], format='%Y-%m-%d %H')

# Pivot the DataFrame to have 'Fuel' as columns
combined_data = combined_data.pivot_table(index=['DateHour', 'Date'], columns=['Fuel'], values=['Value']).reset_index()
dst_data = dst_data.pivot_table(index=['DateHour', 'Date'], columns=['Fuel'], values=['Value']).reset_index()

# add the (DST) data back to the main dataframe using concat
combined_data = pd.concat([combined_data, dst_data])

# Create a custom sorting key to sort the 'DateHour' column
def custom_sort_key(x):
    hour = x.hour
    if hour == 0:
        hour = 24  # Assign a value greater than other hours to make it appear last
    return hour

combined_data['SortKey'] = combined_data['DateHour'].apply(custom_sort_key)

# Sort the DataFrame by the custom sorting key
combined_data = combined_data.sort_values(by=['Date', 'SortKey'])

# Remove the 'SortKey' and 'Date' column
combined_data = combined_data.drop(columns=['SortKey', 'Date'])

# Keep only the fuel names in the column names
combined_data.columns = combined_data.columns.droplevel(0)

# Reset the index of the DataFrame
combined_data = combined_data.reset_index(drop=True)

print(combined_data.head(10))

# Save the combined data to a CSV file  
combined_data.to_csv('IntGenbyFuel2021_hourly.csv', index=False)

Fuel                        Biomass         Coal          Gas        Gas-CC   
0    2021-01-01 01:00:00  29.641907  6226.403543  1264.612197  16947.276551  \
1    2021-01-01 02:00:00  29.653193  6166.239724  1336.967578  16334.880788   
2    2021-01-01 03:00:00  29.666134  6103.444333  1369.308736  16105.976972   
3    2021-01-01 04:00:00  29.670460  6236.221736  1370.516529  16917.627213   
4    2021-01-01 05:00:00  29.663568  6819.574839  1343.463393  17740.373602   
5    2021-01-01 06:00:00  28.529615  7703.488911  1499.707446  18377.752842   
6    2021-01-01 07:00:00  29.660938  8404.042171  1661.747781  19054.751362   
7    2021-01-01 08:00:00  29.664135  8864.096263  1673.959457  19600.166075   
8    2021-01-01 09:00:00  29.667813  8946.533163  1675.453231  19434.595076   
9    2021-01-01 10:00:00  29.627579  7976.463748  1650.879532  18961.349828   

Fuel      Hydro      Nuclear     Other        Solar          Wind  
0     35.600353  5103.196339  2.871013     0.000512  13984.762

  combined_data = combined_data.drop(columns=['SortKey', 'Date'])
