In [1]:
### This code makes a dataset with 
# y = (2016 solar additions in MW)/(solar MW capacity built < 2016)
# x = (net metering, RPS, RPS w/Solar, avg. retail electricity price, solar index, regulated/not, population, other 2016 cap additions, community solar, region)

In [3]:
import pandas as pd
import datetime
import numpy as np
import statsmodels.formula.api as sm
import statsmodels.api as smap
import matplotlib.pyplot as plt
# pd.set_option('display.max_rows', None)

In [7]:
StataFiles = '/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/StataDatasets/'

### Load in all raw CSV datafiles and merge...

### Clean generation data

In [5]:
# Import generating data (annual)

data = pd.ExcelFile('/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/eia_summary_data/annual_generation_state.xls')

ann_generation = data.parse(sheetname=data.sheet_names[0], skiprows=1)
ann_generation.columns = [x.lower().replace(" ","_") for x in ann_generation.columns]
tot_gen = ann_generation[ann_generation['type_of_producer'].str.contains('Total Electric Power')]

### Clean capacity data

In [6]:
# Import annual capacity data

cap_data = pd.read_csv('/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/eia_capacity_data/existcapacity_annual.csv', header=1)
cap_data.columns = [x.lower().replace(" ","_") for x in cap_data.columns]
cap_data['summer_cap']=pd.to_numeric(cap_data['summer_capacity_(megawatts)'].str.replace(",",""),errors='coerce')
cap_data['nameplate_cap']=pd.to_numeric(cap_data['nameplate_capacity_(megawatts)'].str.replace(",",""),errors='coerce')

In [7]:
solar_cap = cap_data[(cap_data['producer_type']=="Total Electric Power Industry") &
                     (cap_data['fuel_source'].str.contains("Solar"))]

In [8]:
solar_gen = tot_gen[tot_gen['energy_source']== "Solar Thermal and Photovoltaic"]

In [9]:
reg1_data = solar_cap.merge(solar_gen, left_on=['year','state_code'],right_on=['year','state'],how='inner')
reg1_data.to_csv('/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/StataDatasets/cap_gen.csv')

### Clean generator dataset

In [10]:
# Import generator data (survey of all generators in 2018)

gen_data = pd.read_csv('/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/eia_data/eia8602018/merged_new.csv')
gen_data = gen_data.drop(gen_data.columns[gen_data.columns.str.contains("Unnamed")], axis=1)
gen_data['operating_year'] = gen_data['operating_year'].astype(int)
gen_data['operating_month'] = gen_data['operating_month'].astype(int)
gen_data['operating_date'] = pd.to_datetime([f'{y}-{m}-01' for y, m in zip(gen_data.operating_year, gen_data.operating_month)])
gen_data['summer_cap']=pd.to_numeric(gen_data['summer_capacity_mw'].str.replace(",","").replace(" ",""))
gen_data['winter_cap']=pd.to_numeric(gen_data['winter_capacity_mw'].str.replace(",","").replace(" ",""))

  interactivity=interactivity, compiler=compiler, result=result)


In [11]:
solar = gen_data[gen_data['technology'] == "Solar Photovoltaic"]
solar = solar[solar['grant_program']=='none']

In [12]:
solar_cap_monthly = solar.groupby(['plant_state','operating_date','operating_month','operating_year'])[['summer_cap','nameplate_cap', 'winter_cap']].sum().reset_index()
res_price_monthly = price_data[['date', 'state', 'residential_price']]

In [13]:
monthly_vars = solar_cap_monthly.merge(res_price_monthly, how='right', left_on = ['operating_date', 'plant_state'], right_on=['date','state'])
monthly_vars = monthly_vars.drop(columns=['plant_state','operating_date'], axis = 1)
monthly_vars=monthly_vars.fillna(0)

In [14]:
monthly_vars.to_csv('/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/monthly_data.csv')

### Merge all of the variables together

In [15]:
grant_data = pd.read_excel('../../../Data for Tax Equity Project/treasury_data/grant_awards.xlsx',header=1)

In [16]:
loan_funds = grant_data.groupby('State').Funded.sum().reset_index()

In [17]:
solar_cap_ann = solar.groupby(['plant_state','operating_year'])[['summer_cap','nameplate_cap', 'winter_cap']].sum().reset_index()

In [25]:
master_df = solar_cap_ann.merge(loan_funds, how='outer',left_on='plant_state',right_on='State')

In [78]:
master_df.to_csv()

Unnamed: 0,plant_state,operating_year,summer_cap,nameplate_cap,winter_cap,State,Funded
0,AL,2016.0,75.0,75.0,75.0,AL,2495648.0
1,AL,2017.0,104.4,107.2,101.4,AL,2495648.0
2,AL,2018.0,14.7,14.7,14.7,AL,2495648.0
3,AR,2015.0,12.0,12.0,12.0,AR,257529.0
4,AR,2016.0,1.0,1.0,1.0,AR,257529.0
5,AR,2017.0,6.0,6.0,6.0,AR,257529.0
6,AR,2018.0,81.0,81.0,81.0,AR,257529.0
7,AZ,2001.0,3.9,3.9,3.9,AZ,1444632000.0
8,AZ,2002.0,2.1,2.1,2.1,AZ,1444632000.0
9,AZ,2003.0,0.1,0.1,0.1,AZ,1444632000.0
