In [331]:
import pandas as pd
import datetime
import statsmodels.formula.api as sm
import statsmodels.api as smap
import matplotlib.pyplot as plt


In [193]:
# Code cleans up data for prices (monthly, state)

data = pd.ExcelFile('/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/monthly_prices_sales_revenue.xlsx')

df = data.parse(sheetname=data.sheet_names[0], skiprows=0)

# make a list of the header row and strip up to the 4th letter. This is the location and year information
cols1 = list(df.columns)
cols1 = [str(x).lower() for x in cols1]

# make another list of the first row,this is the age group information
# we need to preserve this information in the column name when we reshape the data 
cols2 = list(df.iloc[0,:])
cols2 = [str(x).lower() for x in cols2]

cols3 = list(df.iloc[1,:])
cols3 = [str(x).lower().replace(" ","_") for x in cols3]

# now join the two lists to make a combined column name which preserves our location, year and age-group information
cols = [x+"_"+y for x,y in zip(cols1,cols2)]
name = ""
for i,x in enumerate(cols):
    if "unnamed" in x:
        x = x.split('_')[-1]
        cols[i] = x
    if "_" in x:
        name = x.split("_")[0]
        continue
    elif ("_" not in x):
        cols[i] = name+"_" + x
cols[0:4] = cols3[0:4]
# Assign new column names to the dataframe
df.columns = cols
df.drop([0,1], inplace=True)
df = df[:-1]
df.to_csv('/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/avg_price_monthly.csv')

In [420]:
# Import electricity price data (annual)

price_data = pd.read_csv('/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/avg_price_monthly.csv')
price_data['date'] = pd.to_datetime([f'{y}-{m}-01' for y, m in zip(price_data.year, price_data.month)])

# Import generator data (survey of all generators in 2018)

gen_data = pd.read_csv('/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/eia_data/eia8602018/merged_new.csv')
gen_data = gen_data.drop(gen_data.columns[gen_data.columns.str.contains("Unnamed")], axis=1)
gen_data['operating_year'] = gen_data['operating_year'].astype(int)
gen_data['operating_month'] = gen_data['operating_month'].astype(int)
gen_data['operating_date'] = pd.to_datetime([f'{y}-{m}-01' for y, m in zip(gen_data.operating_year, gen_data.operating_month)])
gen_data['summer_cap']=pd.to_numeric(gen_data['summer_capacity_mw'].str.replace(",","").replace(" ",""))
gen_data['winter_cap']=pd.to_numeric(gen_data['winter_capacity_mw'].str.replace(",","").replace(" ",""))

In [428]:
solar = gen_data[gen_data['technology'] == "Solar Photovoltaic"]
solar = solar[solar['grant_program']=='none']

In [429]:
solar.groupby(['plant_state', 'period']).nameplate_cap.sum().reset_index()

Unnamed: 0,plant_state,period,nameplate_cap
0,AL,2012-2016: ITC Round 1,75.0
1,AL,2017-2023: ITC Round 2,121.9
2,AR,2012-2016: ITC Round 1,13.0
3,AR,2017-2023: ITC Round 2,87.0
4,AZ,2009-2011: Loan grant + ITC,92.7
...,...,...,...
106,WA,2017-2023: ITC Round 2,19.2
107,WI,2009-2011: Loan grant + ITC,1.8
108,WI,2012-2016: ITC Round 1,3.1
109,WI,2017-2023: ITC Round 2,19.0


In [411]:
solar_cap_monthly = solar.groupby(['plant_state','operating_date'])[['summer_cap','nameplate_cap', 'winter_cap']].sum().reset_index()
res_price_monthly = price_data[['date', 'state', 'residential_price']]

In [412]:
solar_cap_monthly

Unnamed: 0,plant_state,operating_date,summer_cap,nameplate_cap,winter_cap
0,AL,2016-09-01,75.0,75.0,75.0
1,AL,2017-04-01,10.6,10.6,10.6
2,AL,2017-07-01,7.4,7.4,7.4
3,AL,2017-12-01,86.4,89.2,83.4
4,AL,2018-12-01,14.7,14.7,14.7
...,...,...,...,...,...
1068,WI,2017-06-01,3.5,3.5,3.5
1069,WI,2017-11-01,1.0,1.0,1.0
1070,WI,2018-01-01,2.5,2.5,2.5
1071,WI,2018-06-01,1.2,1.2,1.2


In [413]:
m = solar_cap_monthly.merge(res_price_monthly, how='right', left_on = ['operating_date', 'plant_state'], right_on=['date','state'])
m = m.drop(columns=['plant_state','operating_date'], axis = 1)
m['nameplate_cap'] = m['nameplate_cap'].fillna(0)

In [414]:
m.to_csv('/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/playing.csv')

In [415]:
m

Unnamed: 0,summer_cap,nameplate_cap,winter_cap,date,state,residential_price
0,75.0,75.0,75.0,2016-09-01,AL,12.22
1,10.6,10.6,10.6,2017-04-01,AL,12.64
2,7.4,7.4,7.4,2017-07-01,AL,12.46
3,86.4,89.2,83.4,2017-12-01,AL,11.77
4,14.7,14.7,14.7,2018-12-01,AL,11.55
...,...,...,...,...,...,...
18406,,0.0,,2020-01-01,VT,19.27
18407,,0.0,,2020-01-01,WA,9.43
18408,,0.0,,2020-01-01,WI,14.31
18409,,0.0,,2020-01-01,WV,10.84
