In [1]:
import csv
import math
import sys
from datetime import datetime as dt
from datetime import timezone as tz

import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytz as pytz
import tensorflow as tf

In [60]:
CARBON_INTENSITY_COLUMN = 1 # column for real-time carbon intensity

# Operational carbon emission factors
# Carbon rate used by electricityMap. Checkout this link:
# https://github.com/electricitymaps/electricitymaps-contrib/blob/master/config/zones/US-MIDW-MISO.yaml

# Median direct emission factors
carbonRateDirect = {"avg_coal_production_forecast": 1003.7, "avg_biomass_production_forecast": 0, 
                "avg_nat_gas_production_forecast": 409.43, "avg_geothermal_production_forecast": 0, 
                "avg_hydro_production_forecast": 0, "avg_nuclear_production_forecast": 0, 
                "avg_oil_production_forecast": 406, "avg_solar_production_forecast": 0, 
                "avg_unknown_production_forecast": 575, "avg_others_production_forecast": 575, 
                "avg_wind_production_forecast": 0} # g/kWh

In [61]:
def initialize(inFileName):
    print("FILE: ", inFileName)
    dataset = pd.read_csv(inFileName, header=0, infer_datetime_format=True, 
                            parse_dates=["UTC time"]) #, index_col=["Local time"]
    print(dataset.head(2))
    print(dataset.tail(2))
    dataset.replace(np.nan, 0, inplace=True) # replace NaN with 0.0
    num = dataset._get_numeric_data()
    num[num<0] = 0
    
    print(dataset.columns)
    # print("UTC time", dataset["UTC time"].dtype)
    return dataset

def calculateCarbonIntensity(dataset, carbonRate):
    global CARBON_INTENSITY_COLUMN
    carbonIntensity = 0
    carbonCol = []
    miniDataset = dataset.iloc[:, CARBON_INTENSITY_COLUMN:]
    print("**", miniDataset.columns.values)
    rowSum = miniDataset.sum(axis=1).to_list()
    for i in range(len(miniDataset)):
        if(rowSum[i] == 0):
            # basic algorithm to fill missing values if all sources are missing
            # just using the previous hour's value
            # same as electricityMap
            for j in range(1, len(dataset.columns.values)):
                if(dataset.iloc[i, j] == 0):
                    dataset.iloc[i, j] = dataset.iloc[i-1, j]
                miniDataset.iloc[i] = dataset.iloc[i, CARBON_INTENSITY_COLUMN:]
                # print(miniDataset.iloc[i])
            rowSum[i] = rowSum[i-1]
        carbonIntensity = 0
        for j in range(len(miniDataset.columns.values)):
            source = miniDataset.columns.values[j]
            sourceContribFrac = miniDataset.iloc[i, j]/rowSum[i]
            # print(sourceContribFrac, carbonRate[source])
            carbonIntensity += (sourceContribFrac * carbonRate[source])
        if (carbonIntensity == 0):
            print(miniDataset.iloc[i])
        carbonCol.append(round(carbonIntensity, 2)) # rounding to 2 values after decimal place
    dataset.insert(loc=CARBON_INTENSITY_COLUMN, column="carbon_intensity", value=carbonCol)
    return dataset

In [62]:
def combine_forecast_files(fuel_sources):
    # Initialize an empty DataFrame to store the combined data
    combined_data = pd.DataFrame()

    # Iterate through each fuel source
    for source in fuel_sources:
        # Read the CSV file for the current fuel source
        file_path = f"data/src_prod_forecast_{source}.csv"
        df = pd.read_csv(file_path)
        columns_to_use = ['datetime', f'avg_{source}_production_forecast']

        # If it's the first file, directly assign it to combined_data
        if combined_data.empty:
            combined_data = df[columns_to_use]
        else:
            # Merge the current DataFrame with the combined DataFrame on the 'date' column
            combined_data = pd.merge(combined_data, df[columns_to_use], on='datetime', how='outer')

    # Sort the DataFrame by the 'date' column
    combined_data = combined_data.sort_values(by='datetime')

    return combined_data

In [63]:
fuel_sources = ['coal', 'nat_gas', 'nuclear', 'hydro', 'solar', 'wind', 'others']

aa = combine_forecast_files(fuel_sources)

In [64]:
aa

Unnamed: 0,datetime,avg_coal_production_forecast,avg_nat_gas_production_forecast,avg_nuclear_production_forecast,avg_hydro_production_forecast,avg_solar_production_forecast,avg_wind_production_forecast,avg_others_production_forecast
0,2023-07-01T00:00:00.000000000,28998.91845,51799.34496,11539.50744,1941.49222,903.02414,2634.15585,661.55012
1,2023-07-01T01:00:00.000000000,27412.26680,49278.65653,11539.59903,1552.80889,468.73724,2861.52390,668.72884
2,2023-07-01T02:00:00.000000000,27522.03594,44988.06731,11545.26355,1145.53004,262.23795,3288.53575,662.66752
3,2023-07-01T03:00:00.000000000,25687.60796,41219.64474,11543.73057,861.40539,16.75210,3559.34119,639.92240
4,2023-07-01T04:00:00.000000000,24797.67271,37302.33491,11586.19300,661.70988,152.07776,3774.28023,633.14901
...,...,...,...,...,...,...,...,...
4411,2023-12-31T19:00:00.000000000,20240.65647,24287.08009,8280.67904,709.82338,1504.50665,13380.55904,348.97226
4412,2023-12-31T20:00:00.000000000,20517.38060,24945.51722,8376.93372,811.70988,1406.22895,13127.50315,345.64814
4413,2023-12-31T21:00:00.000000000,21120.29523,24321.98556,8254.97258,884.77925,1337.32449,13409.38900,338.95578
4414,2023-12-31T22:00:00.000000000,21434.17208,23514.21843,8330.30476,1002.07171,1139.37294,12593.11283,345.97149


In [65]:
def initialize_and_combine(fuel_sources):
    # Initialize an empty DataFrame to store the combined data
    combined_data = pd.DataFrame()

    # Iterate through each fuel source
    for source in fuel_sources:
        # Read the CSV file for the current fuel source
        file_path = f"data/src_prod_forecast_{source}.csv"
        df = pd.read_csv(file_path)
        columns_to_use = ['datetime', f'avg_{source}_production_forecast']

        # If it's the first file, directly assign it to combined_data
        if combined_data.empty:
            combined_data = df[columns_to_use]
        else:
            # Merge the current DataFrame with the combined DataFrame on the 'date' column
            combined_data = pd.merge(combined_data, df[columns_to_use], on='datetime', how='outer')

    # Sort the DataFrame by the 'date' column
    combined_data = combined_data.sort_values(by='datetime')
    print(combined_data.head(2))
    print(combined_data.tail(2))
    combined_data.replace(np.nan, 0, inplace=True) # replace NaN with 0.0
    num = combined_data._get_numeric_data()
    num[num<0] = 0
    
    print(combined_data.columns)
    # print("UTC time", dataset["UTC time"].dtype)
    return combined_data

In [66]:
fuel_sources = ['coal', 'nat_gas', 'nuclear', 'hydro', 'solar', 'wind', 'others']

dataset = initialize_and_combine(fuel_sources)

                        datetime  avg_coal_production_forecast  \
0  2023-07-01T00:00:00.000000000                   28998.91845   
1  2023-07-01T01:00:00.000000000                   27412.26680   

   avg_nat_gas_production_forecast  avg_nuclear_production_forecast  \
0                      51799.34496                      11539.50744   
1                      49278.65653                      11539.59903   

   avg_hydro_production_forecast  avg_solar_production_forecast  \
0                     1941.49222                      903.02414   
1                     1552.80889                      468.73724   

   avg_wind_production_forecast  avg_others_production_forecast  
0                    2634.15585                       661.55012  
1                    2861.52390                       668.72884  
                           datetime  avg_coal_production_forecast  \
4414  2023-12-31T22:00:00.000000000                   21434.17208   
4415  2023-12-31T23:00:00.000000000              

In [67]:
dataset = calculateCarbonIntensity(dataset, carbonRateDirect)

** ['avg_coal_production_forecast' 'avg_nat_gas_production_forecast'
 'avg_nuclear_production_forecast' 'avg_hydro_production_forecast'
 'avg_solar_production_forecast' 'avg_wind_production_forecast'
 'avg_others_production_forecast']


In [68]:
dataset

Unnamed: 0,datetime,carbon_intensity,avg_coal_production_forecast,avg_nat_gas_production_forecast,avg_nuclear_production_forecast,avg_hydro_production_forecast,avg_solar_production_forecast,avg_wind_production_forecast,avg_others_production_forecast
0,2023-07-01T00:00:00.000000000,514.78,28998.91845,51799.34496,11539.50744,1941.49222,903.02414,2634.15585,661.55012
1,2023-07-01T01:00:00.000000000,512.62,27412.26680,49278.65653,11539.59903,1552.80889,468.73724,2861.52390,668.72884
2,2023-07-01T02:00:00.000000000,519.20,27522.03594,44988.06731,11545.26355,1145.53004,262.23795,3288.53575,662.66752
3,2023-07-01T03:00:00.000000000,515.12,25687.60796,41219.64474,11543.73057,861.40539,16.75210,3559.34119,639.92240
4,2023-07-01T04:00:00.000000000,513.59,24797.67271,37302.33491,11586.19300,661.70988,152.07776,3774.28023,633.14901
...,...,...,...,...,...,...,...,...,...
4411,2023-12-31T19:00:00.000000000,443.04,20240.65647,24287.08009,8280.67904,709.82338,1504.50665,13380.55904,348.97226
4412,2023-12-31T20:00:00.000000000,445.92,20517.38060,24945.51722,8376.93372,811.70988,1406.22895,13127.50315,345.64814
4413,2023-12-31T21:00:00.000000000,450.01,21120.29523,24321.98556,8254.97258,884.77925,1337.32449,13409.38900,338.95578
4414,2023-12-31T22:00:00.000000000,458.46,21434.17208,23514.21843,8330.30476,1002.07171,1139.37294,12593.11283,345.97149
