# Reshape Fuel Prices - Duke Energy Carolinas

3/18/2021 \
by [Mauricio Hernandez](mmh54@duke.edu)

In [1]:
import csv
import datetime as dt
import numpy as np
import pandas as pd

In [2]:
df_lookup = pd.read_csv('./inputs/UnitLookupAndDetailTable_(DEC-DEP).csv')
df_fuel_DEC = pd.read_csv('./inputs/UNIT_FUEL_PRICE(DEC 2019).csv')
list(df_fuel_DEC.columns)

['REGION',
 'UNIT_NAME',
 'CC_KEY',
 'FUEL_CONTRACT',
 'FUEL_TYPE',
 'EDITION',
 'PRICE $/MBTU',
 'FROM_DATE',
 'TO_DATE',
 'MODIFIED']

In [3]:
#Slicing data and filter all the values where end date is before Jan 1st
df_fuel_DEC['UNIT_ID'] = df_fuel_DEC.UNIT_NAME + '_'+ df_fuel_DEC.CC_KEY.apply(str)
df_fuel_DEC = df_fuel_DEC.loc[:, ['UNIT_ID', 'FUEL_TYPE','PRICE $/MBTU', 'FROM_DATE', 'TO_DATE']]

df_fuel_DEC.sort_values(by=['UNIT_ID', 'FUEL_TYPE'], inplace=True)

df_fuel_DEC.to_csv('./outputs/UNIT_FUEL_PRICE(DEC 2019)_sorted.csv', sep=',', encoding='utf-8', index= False)

df_fuel_DEC.head()

Unnamed: 0,UNIT_ID,FUEL_TYPE,PRICE $/MBTU,FROM_DATE,TO_DATE
0,ALLE_UN01_0,COAL,4.2517,12/22/2018,1/5/2019
1,ALLE_UN01_0,COAL,4.1813,1/5/2019,1/12/2019
2,ALLE_UN01_0,COAL,4.1954,1/12/2019,1/19/2019
3,ALLE_UN01_0,COAL,4.1994,1/19/2019,1/26/2019
4,ALLE_UN01_0,COAL,4.2554,1/26/2019,2/2/2019


## Descriptive statistics

Data from Duke Energy Carolinas and Duke Energy Progress

In [4]:
df_fuel_DEC.describe(include='all')

Unnamed: 0,UNIT_ID,FUEL_TYPE,PRICE $/MBTU,FROM_DATE,TO_DATE
count,28676,28676,28676.0,28676,28676
unique,61,4,,742,745
top,CLIF_UN05_0,NGAS,,3/16/2019,6/8/2019
freq,665,13775,,67,67
mean,,,7.482226,,
std,,,5.786913,,
min,,,0.233,,
25%,,,2.563,,
50%,,,3.108179,,
75%,,,14.34,,


### Calculating range of days between initial and end dates

In [5]:
def convertStringToDate(date_string): 
    date_obj = dt.datetime.strptime(date_string.split(" ")[0], '%m/%d/%Y')
    #if date_obj - dt.date(2018, 7, 11)
    return date_obj    

#convertStringToDate('5/10/2018')
df_fuel_DEC['FROM_DATE'] = df_fuel_DEC['FROM_DATE'].apply(convertStringToDate)
df_fuel_DEC['TO_DATE'] = df_fuel_DEC['TO_DATE'].apply(convertStringToDate)

df_fuel_DEC.describe(include='all')

Unnamed: 0,UNIT_ID,FUEL_TYPE,PRICE $/MBTU,FROM_DATE,TO_DATE
count,28676,28676,28676.0,28676,28676
unique,61,4,,376,379
top,CLIF_UN05_0,NGAS,,2019-10-12 00:00:00,2019-07-06 00:00:00
freq,665,13775,,108,108
first,,,,2018-12-22 00:00:00,2018-12-23 00:00:00
last,,,,2020-01-01 00:00:00,2020-08-22 00:00:00
mean,,,7.482226,,
std,,,5.786913,,
min,,,0.233,,
25%,,,2.563,,


In [6]:
First_day = convertStringToDate('1/1/2019')
Last_day = convertStringToDate('12/31/2019')

#remove all the values where the end dates are in 2018
df_fuel_DEC['END_YEAR'] = df_fuel_DEC['TO_DATE'].map(lambda TO_DATE: TO_DATE.year)
df_fuel_DEC['START_YEAR'] = df_fuel_DEC['FROM_DATE'].map(lambda FROM_DATE: FROM_DATE.year)

df_fuel_DEC = df_fuel_DEC[df_fuel_DEC['START_YEAR'] < 2020]
df_fuel_DEC = df_fuel_DEC[df_fuel_DEC['END_YEAR'] >= 2019]

df_fuel_DEC['FROM_DATE'] = df_fuel_DEC['FROM_DATE'].map(lambda FROM_DATE: First_day if (First_day - FROM_DATE).days > 0 else FROM_DATE )
df_fuel_DEC['TO_DATE'] = df_fuel_DEC['TO_DATE'].map(lambda TO_DATE: Last_day if (TO_DATE - Last_day).days > 0 else TO_DATE)

df_fuel_DEC = df_fuel_DEC[df_fuel_DEC['TO_DATE'] != First_day]

df_fuel_DEC.describe(include='all')

Unnamed: 0,UNIT_ID,FUEL_TYPE,PRICE $/MBTU,FROM_DATE,TO_DATE,END_YEAR,START_YEAR
count,28157,28157,28157.0,28157,28157,28157.0,28157.0
unique,61,4,,365,364,,
top,CLIF_UN05_0,NGAS,,2019-03-08 00:00:00,2019-12-31 00:00:00,,
freq,652,13482,,108,160,,
first,,,,2019-01-01 00:00:00,2019-01-02 00:00:00,,
last,,,,2019-12-31 00:00:00,2019-12-31 00:00:00,,
mean,,,7.511518,,,2019.003907,2018.999538
std,,,5.8079,,,0.062382,0.021483
min,,,0.233,,,2019.0,2018.0
25%,,,2.559,,,2019.0,2019.0


In [7]:
# Adding columns to compute number of days from FROM_DATE to TO_DATE 
df_fuel_DEC['DAYS'] = df_fuel_DEC['TO_DATE'] - df_fuel_DEC['FROM_DATE']
df_fuel_DEC['DAYS'] = df_fuel_DEC['DAYS'].map(lambda DAYS: DAYS.days )

df_fuel_DEC['REF_FROM_DATE'] = df_fuel_DEC['FROM_DATE'] - First_day
df_fuel_DEC['REF_FROM_DATE'] = df_fuel_DEC['REF_FROM_DATE'].map(lambda DAYS: DAYS.days )

# Replace last value when the number of days is zero
df_fuel_DEC['DAYS'] = np.where((df_fuel_DEC['DAYS'] == 0) & (df_fuel_DEC['TO_DATE'] == Last_day), 1, df_fuel_DEC['DAYS'])

df_fuel_DEC = df_fuel_DEC.loc[:, ['UNIT_ID', 'FUEL_TYPE', 'PRICE $/MBTU', 'FROM_DATE', 'TO_DATE', 'DAYS', 'REF_FROM_DATE']]
df_fuel_DEC.head()

Unnamed: 0,UNIT_ID,FUEL_TYPE,PRICE $/MBTU,FROM_DATE,TO_DATE,DAYS,REF_FROM_DATE
0,ALLE_UN01_0,COAL,4.2517,2019-01-01,2019-01-05,4,0
1,ALLE_UN01_0,COAL,4.1813,2019-01-05,2019-01-12,7,4
2,ALLE_UN01_0,COAL,4.1954,2019-01-12,2019-01-19,7,11
3,ALLE_UN01_0,COAL,4.1994,2019-01-19,2019-01-26,7,18
4,ALLE_UN01_0,COAL,4.2554,2019-01-26,2019-02-02,7,25


In [8]:
# Creating pivot tableto summarize unit units and fuel type
df_fuel_DEC_pivot = df_fuel_DEC.groupby(['UNIT_ID', 'FUEL_TYPE']).sum()
df_fuel_DEC_pivot.to_csv('./outputs/fuel_summary.csv', sep=',', encoding='utf-8')
#print(list(df_fuel_DEC_pivot.index))
df_fuel_DEC_pivot

Unnamed: 0_level_0,Unnamed: 1_level_0,PRICE $/MBTU,DAYS,REF_FROM_DATE
UNIT_ID,FUEL_TYPE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ALLE_UN01_0,COAL,135.5794,364,7193
ALLE_UN01_0,LOIL,3590.3080,364,40786
ALLE_UN02_0,COAL,135.3128,364,7193
ALLE_UN02_0,LOIL,3590.3080,364,40786
ALLE_UN03_0,COAL,137.1085,364,7193
...,...,...,...,...
RCKN_CT03_0,NGAS,874.4140,365,60075
RCKN_CT04_0,LOIL,3595.8000,364,40786
RCKN_CT04_0,NGAS,874.4140,365,60075
RCKN_CT05_0,LOIL,3595.8000,364,40786


## Manipulating dataframe to organize data

In [9]:
First_day = convertStringToDate('1/1/2019')
Last_day = convertStringToDate('12/31/2019')

#Create list with dates from First_day to last_day
date_list = [First_day + dt.timedelta(days=x) for x in range(0, (Last_day-First_day).days + 1)]

date_str_list = []
for date in date_list:
    date_str_list.append(date.strftime("%m/%d/%Y"))

In [10]:
#create results dataframe to store prices every day
df_fuel_result = pd.DataFrame(index=df_fuel_DEC_pivot.index, columns=date_list)
#df_fuel_DEC_pivot = df_fuel_DEC_pivot.reindex(columns = df_fuel_DEC_pivot.columns.tolist() + date_str_list)
                                
df_fuel_result.head(n=5)

Unnamed: 0_level_0,Unnamed: 1_level_0,2019-01-01,2019-01-02,2019-01-03,2019-01-04,2019-01-05,2019-01-06,2019-01-07,2019-01-08,2019-01-09,2019-01-10,...,2019-12-22,2019-12-23,2019-12-24,2019-12-25,2019-12-26,2019-12-27,2019-12-28,2019-12-29,2019-12-30,2019-12-31
UNIT_ID,FUEL_TYPE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ALLE_UN01_0,COAL,,,,,,,,,,,...,,,,,,,,,,
ALLE_UN01_0,LOIL,,,,,,,,,,,...,,,,,,,,,,
ALLE_UN02_0,COAL,,,,,,,,,,,...,,,,,,,,,,
ALLE_UN02_0,LOIL,,,,,,,,,,,...,,,,,,,,,,
ALLE_UN03_0,COAL,,,,,,,,,,,...,,,,,,,,,,


In [11]:
current_index = ()
old_index = ()
aux_index = 0
fuel_price_list = [None] * 365

for index, row in df_fuel_DEC.iterrows():
    aux_index = index
    index_current = (row['UNIT_ID'], row['FUEL_TYPE'])
    
    # access data using column names
    fuel_price = row['PRICE $/MBTU']
    days = row['DAYS']
    ref_day = row['REF_FROM_DATE']
    
    current_index = (row['UNIT_ID'], row['FUEL_TYPE'])
    #print(index, row['UNIT_ID'], row['FUEL_TYPE'], row['PRICE $/MBTU'], row['REF_FROM_DATE'], row['DAYS'])
        
    if index == 0:
        old_index = current_index
    
    if (old_index != current_index):
        df_fuel_result.loc[old_index] = fuel_price_list

        old_index = current_index
        fuel_price_list = [None] * 365
    
    fuel_price_list[ref_day:(ref_day + days)] = [fuel_price]*(days)

        #print(index, row['PRICE $/MBTU'], row['REF_FROM_DATE'], row['DAYS'])
#Save last value
if aux_index != 0 :
    df_fuel_result.loc[current_index] = fuel_price_list
    
df_fuel_result.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,2019-01-01,2019-01-02,2019-01-03,2019-01-04,2019-01-05,2019-01-06,2019-01-07,2019-01-08,2019-01-09,2019-01-10,...,2019-12-22,2019-12-23,2019-12-24,2019-12-25,2019-12-26,2019-12-27,2019-12-28,2019-12-29,2019-12-30,2019-12-31
UNIT_ID,FUEL_TYPE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ALLE_UN01_0,COAL,4.2517,4.2517,4.2517,4.2517,4.1813,4.1813,4.1813,4.1813,4.1813,4.1813,...,2.5024,2.5024,2.5024,2.5024,2.5024,2.4824,2.4824,2.4824,2.4824,
ALLE_UN01_0,LOIL,12.629,12.629,12.668,12.668,12.979,13.259,13.259,13.497,13.497,13.497,...,14.913,14.913,14.913,14.913,14.913,14.913,14.913,14.913,14.913,
ALLE_UN02_0,COAL,4.2446,4.2446,4.2446,4.2446,4.1741,4.1741,4.1741,4.1741,4.1741,4.1741,...,2.4972,2.4972,2.4972,2.4972,2.4972,2.4772,2.4772,2.4772,2.4772,
ALLE_UN02_0,LOIL,12.629,12.629,12.668,12.668,12.979,13.259,13.259,13.497,13.497,13.497,...,14.913,14.913,14.913,14.913,14.913,14.913,14.913,14.913,14.913,
ALLE_UN03_0,COAL,4.3076,4.3076,4.3076,4.3076,4.2371,4.2371,4.2371,4.2371,4.2371,4.2371,...,2.5427,2.5427,2.5427,2.5427,2.5427,2.5227,2.5227,2.5227,2.5227,


In [12]:
df_fuel_result.to_csv('./outputs/UNIT_FUEL_PRICES_DEC_Results.csv', sep=',', encoding='utf-8')
df_fuel_DEC.to_csv('./outputs/UNIT_FUEL_PRICES_DEC_Short.csv', sep=',', encoding='utf-8')

In [13]:
#dfSummary['UNIT_ID'] dfSummary.UNIT_ID == 'ALLE_UN01_0')
#dfSummary[dfSummary.DAYS == 364]