In [5]:
from IPython.display import HTML
import random

def hide_toggle(text='Toggle Code', for_next=False):
    this_cell = """$('div.cell.code_cell.rendered.selected')"""
    next_cell = this_cell + '.next()'

    toggle_text = text  # text shown on toggle link
    target_cell = this_cell  # target cell to control with toggle
    js_hide_current = ''  # bit of JS to permanently hide code in current cell (only when toggling next cell)

    if for_next:
        target_cell = next_cell
        toggle_text += ' next cell'
        js_hide_current = this_cell + '.find("div.input").hide();'

    js_f_name = 'code_toggle_{}'.format(str(random.randint(1,2**64)))

    html = """
        <script>
            function {f_name}() {{
                {cell_selector}.find('div.input').toggle();
            }}

            {js_hide_current}
        </script>

        <a href="javascript:{f_name}()">{toggle_text}</a>
    """.format(
        f_name=js_f_name,
        cell_selector=target_cell,
        js_hide_current=js_hide_current, 
        toggle_text=toggle_text
    )

    return HTML(html)

hide_toggle(text='Toggle IPython Code')

## The dataset will look like this

Unit of observation is state-year from 2010-2019.  e.g. as many years as I have

Outcomes are compiled using EIA 860 data on individual generating units [here](http://localhost:8888/notebooks/code/cleaning/clean_state_solar_plant_panel.ipynb)

- Log capacity additions at grid-scale facilities >= 5 MW AC (MW, DC and AC)
- Number of new plants installed

Policies are 

- Whether large-scale state tax credit program was available
- State PURPA Rules as of 2020
- Solar, DG, Total RPS targets in 2016 and 2025
- Retail choice / rules around utility contracting

Controls are 

- Log population
- State GDP (in millions of dollars from BEA)
- Log sales (MWh) 
- State capacity factor
- State technical potential (MWh / Capacity Factor) = MW available 
- Year Dummy 



In [53]:
#####################
# set up workspace
#####################

import pandas as pd
import numpy as np
import os

# set the homeDir to where the project directory is located
homeDir = '/Users/rachelanderson/Dropbox (Princeton)/Research_V2/descriptive_solar/'

# raw and clean data directories are located in the homeDir 
rawDataDir = homeDir + 'data/raw/'
cleanDataDir =  homeDir + 'data/clean/'

##########################################
# Import helper functions
##########################################

# helper function to clean column names

def clean_cols(cols):
    clean_cols = cols.str.lower().str.replace(' ', '_').str.replace('?','')
    clean_cols = clean_cols.str.replace('(','').str.replace(')','').str.replace('-','_')
    return(clean_cols)

hide_toggle('Show generic header')

In [72]:
#############################################
###
### Set up state panel frame with controls
###
#############################################

# State-year panel frame for 2005-2019
state_panel_frame = pd.read_csv(cleanDataDir+'state_inputs/state_panel_frame.csv')

# Merge in state_gdp
# gdp = pd.read_csv(cleanDataDir + 'state_inputs/state_gdp_panel.csv')
# state_df = state_panel_frame.merge(gdp)

# Merge in state sales
sales = pd.read_csv(cleanDataDir + 'state_sales/state_sales_panel.csv')
state_df = state_panel_frame.merge(sales[['year','state','tot_sales']])
state_df['log_sales'] = np.log(state_df['tot_sales'])

# Add dummy for RTO/ISO Region
region_map = {"WA": "Northwest", "OR": "Northwest", "ID": "Northwest", "MT": "Northwest", "WY": "Northwest", "NV": "Northwest", "UT": "Northwest", "CA": "CAISO", "AZ": "Southwest", "NM": "Southwest", "CO": "Southwest", "TX": "ERCOT", "OK": "SPP", "NE": "SPP", "SD": "SPP", "KS": "SPP", "ND": "MISO", "MN": "MISO", "IA": "MISO", "IL": "MISO", "MS": "MISO", "WI": "MISO", "LA": "MISO", "IN": "MISO", "MI": "MISO", "MO": "MISO", "AR": "MISO", "FL": "Southeast", "GA": "Southeast", "AL": "Southeast", "SC": "Southeast", "NC": "Southeast", "TN": "Southeast", "OH": "PJM", "KY": "PJM", "VA": "PJM", "WV": "PJM", "DE": "PJM", "PA": "PJM", "MD": "PJM", "NJ": "PJM", "NY": "NYISO", "VT": "ISO-NE", "NH": "ISO-NE", "MA": "ISO-NE", "CT": "ISO-NE", "RI": "ISO-NE", "ME": "ISO-NE", "HI": "Hawaii", "AK": "Alaska", "PR": "Puerto Rico", "DC": "District of Columbia"}

iso_rto = ['CAISO', 'SPP', 'MISO', 'ISO-NE', 'PJM', 'ERCOT']

state_df['region'] = state_df.state.map(region_map)
state_df['iso_rto'] = state_df.region.isin(iso_rto)
state_df['iso_rto_dummy'] = [1 if x == True else 0 for x in state_df['iso_rto']]

In [73]:
# Outcome variables
state_solar=pd.read_csv(cleanDataDir + 'state_solar/state_micro_solar_2005-2019.csv')
state_df = state_df.merge(state_solar)

In [74]:
# Merge in RPS Targets

rps = pd.read_csv(cleanDataDir+'state_policies/state_rps_panel_clean.csv')
    
state_df = state_df.merge(rps, how = 'left')

In [75]:
### NC and OR had large ITC pre-2016 and pre-July 2014, respectively

# NC Pre-2016
state_df['itc_dummy'] = 0
state_df.loc[(state_df['state'] == 'NC') & (state_df['year']<2016),'itc_dummy'] = 1

# OR Pre-2014 
state_df.loc[(state_df['state'] == 'OR') & (state_df['year']>= 2009) & (state_df['year']<=2014),'itc_dummy'] = 1

# NC extended its tax credit in 2009, so I try also coding it as if tax credit added in 2009
state_df['alt_itc_dummy'] = state_df['itc_dummy']
state_df.loc[(state_df['state'] == 'NC') & (state_df['year']>= 2009)  & (state_df['year'] <= 2015),'alt_itc_dummy'] = 1


### AZ and NM had large PTC. To best of my knowledge AZ hit capacity starting FY 2015

state_df['ptc_dummy'] = 0
state_df.loc[(state_df['state'] == 'AZ') & (state_df['year']>=2011) & (state_df['year'] <= 2014),'ptc_dummy'] = 1

# NM Info: http://www.emnrd.state.nm.us/ECMD/CleanEnergyTaxIncentives/documents/REPTCFinalReportFeb2015.pdf
# Facilities on waiting list after applications submitte din 2011 
# Could get more disaggregated -- some facilities came online in 2014
# http://www.emnrd.state.nm.us/ECMD/CleanEnergyTaxIncentives/documents/UpdatedQueueREPTC9.18.2020.pdf
state_df.loc[(state_df['state'] == 'NM') & (state_df['year']>=2007) & (state_df['year'] <= 2011),'ptc_dummy'] = 1
state_df['alt_ptc_dummy'] = state_df['ptc_dummy']
state_df.loc[(state_df['state'] == 'NM') & (state_df['year']>=2007) & (state_df['year'] <= 2014),'alt_ptc_dummy'] = 1

In [77]:
state_df

Unnamed: 0,year,state,tot_sales,log_sales,region,iso_rto,iso_rto_dummy,net_meter_cap_dc,net_meter_cap_ac,comm__cap_ac,...,first_plant_year,tot_rps_lbl,solar_rps_manual,dg_rps_manual,solar_rps_lbl,lbl_sales,itc_dummy,alt_itc_dummy,ptc_dummy,alt_ptc_dummy
0,2005,AK,5912571.0,15.592591,Alaska,False,0,0.0,0.0,0.0,...,,0.00,0.0,0.0,0.0,0.000000,0,0,0,0
1,2005,AL,89201620.0,18.306410,Southeast,False,0,0.0,0.0,0.0,...,2016.0,0.00,0.0,0.0,0.0,0.000000,0,0,0,0
2,2005,AR,46054897.0,17.645345,MISO,True,1,0.0,0.0,0.0,...,2015.0,0.00,0.0,0.0,0.0,0.000000,0,0,0,0
3,2005,AZ,69390686.0,18.055263,Southwest,False,0,0.0,0.0,0.0,...,2001.0,1.00,0.0,0.0,0.0,0.000000,0,0,0,0
4,2005,CA,275394219.0,19.433714,CAISO,True,1,1.7,1.6,1.6,...,2009.0,13.85,0.0,0.0,0.0,0.000000,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
745,2019,VT,5427664.0,15.507019,ISO-NE,True,1,0.0,0.0,0.0,...,,55.00,2.2,0.0,2.2,5564.792004,0,0,0,0
746,2019,WA,93374817.0,18.352132,Northwest,False,0,0.0,0.0,0.0,...,,9.00,0.0,0.0,0.0,88918.425299,0,0,0,0
747,2019,WI,69157540.0,18.051898,MISO,True,1,0.0,0.0,0.0,...,2019.0,10.00,0.0,0.0,0.0,71242.594861,0,0,0,0
748,2019,WV,33247013.0,17.319475,PJM,True,1,0.0,0.0,0.0,...,,0.00,0.0,0.0,0.0,34385.170173,0,0,0,0
