In [1]:
from IPython.display import HTML
import random

def hide_toggle(text='Toggle Code', for_next=False):
    this_cell = """$('div.cell.code_cell.rendered.selected')"""
    next_cell = this_cell + '.next()'

    toggle_text = text  # text shown on toggle link
    target_cell = this_cell  # target cell to control with toggle
    js_hide_current = ''  # bit of JS to permanently hide code in current cell (only when toggling next cell)

    if for_next:
        target_cell = next_cell
        toggle_text += ' next cell'
        js_hide_current = this_cell + '.find("div.input").hide();'

    js_f_name = 'code_toggle_{}'.format(str(random.randint(1,2**64)))

    html = """
        <script>
            function {f_name}() {{
                {cell_selector}.find('div.input').toggle();
            }}

            {js_hide_current}
        </script>

        <a href="javascript:{f_name}()">{toggle_text}</a>
    """.format(
        f_name=js_f_name,
        cell_selector=target_cell,
        js_hide_current=js_hide_current, 
        toggle_text=toggle_text
    )

    return HTML(html)

hide_toggle(text='Toggle IPython Code')

## The dataset will look like this

Unit of observation is state-year from 2010-2019.  e.g. as many years as I have

Outcomes are compiled using EIA 860 data on individual generating units [here](http://localhost:8888/notebooks/code/cleaning/clean_state_solar_plant_panel.ipynb)

- Log capacity additions at grid-scale facilities > 1 MW (MW, DC and AC)
- Number of new plants installed

Policies are 

- Whether large-scale state tax credit program was available
- State PURPA Rules as of 2020
- Solar, DG, Total RPS targets in 2016 and 2025
- Retail choice / rules around utility contracting

Controls are 

- Log population
- State GDP (in millions of dollars from BEA)
- Log sales (MWh) 
- State capacity factor
- State technical potential (MWh / Capacity Factor) = MW available 
- Year Dummy 


In [2]:
#####################
# set up workspace
#####################

import pandas as pd
import numpy as np
import os

# set the homeDir to where the project directory is located
homeDir = '/Users/rachelanderson/Dropbox (Princeton)/Research_V2/descriptive_solar/'

# raw and clean data directories are located in the homeDir 
rawDataDir = homeDir + 'data/raw/'
cleanDataDir =  homeDir + 'data/clean/'

##########################################
# Import helper functions
##########################################

# helper function to clean column names

def clean_cols(cols):
    clean_cols = cols.str.lower().str.replace(' ', '_').str.replace('?','')
    clean_cols = clean_cols.str.replace('(','').str.replace(')','').str.replace('-','_')
    return(clean_cols)

hide_toggle('Show generic header')

## State-Year Frame and Controls

In [3]:
# State-year panel frame for 2005-2019
state_panel_frame = pd.read_csv(cleanDataDir+'state_inputs/state_panel_frame.csv')

# Merge in population data
state_df = state_panel_frame.merge(pd.read_csv(cleanDataDir+'state_inputs/state_population.csv'), how = 'inner')
state_df['log_pop'] = np.log(state_df['pop'])

# Merge in acreage
state_df = state_df.merge(pd.read_csv(cleanDataDir + 'state_inputs/state_acreage.csv'))

# Merge in capacity factor 
state_df = state_df.merge(pd.read_csv(cleanDataDir + 'state_inputs/state_cf_nrel.csv'))

# Merge in state sales
sales = pd.read_csv(cleanDataDir + 'state_sales/state_sales_panel.csv')
state_df = state_df.merge(sales[['year','state','tot_sales']])
state_df['log_sales'] = np.log(state_df['tot_sales'])

# Merge in state_gdp
gdp = pd.read_csv(cleanDataDir + 'state_inputs/state_gdp_panel.csv')
state_df = state_df.merge(gdp)

In [4]:
# Dummy for retail choice
# Dummy for RTO/ISO Region
# Technical Potential from NREL 

## Outcome variables

In [11]:
# TO DO: Check how this one was cleaned; add link to script that cleans it
state_solar=pd.read_csv(cleanDataDir + 'state_solar/state_micro_solar_2005-2019.csv')
state_df = state_df.merge(state_solar)

## Policy variables

### A. RPS Targets for 2016 and 2025 Solar + DG + General RPS in % and MWh Terms

In [12]:
rps = pd.read_csv(cleanDataDir+'state_policies/state_rps_panel_clean.csv')

yrs = [2016, 2025]
targets = []

percent_targets = ['tot_rps_lbl', 'solar_rps_manual', 'dg_rps_manual',
       'solar_rps_lbl']

for yr in yrs: 
    temp = rps[rps['year']==yr]
    for x in percent_targets:
        temp[x + '_mwh_' + str(yr)] = temp[x]*temp['lbl_sales']
        temp[x + '_target_' + str(yr)] = temp[x]

    temp.rename(columns= {'lbl_sales':'lbl_sales_' + str(yr)},inplace=True)
    temp = temp.drop(columns = percent_targets + ['year'])
    targets.append(temp)
    
rps_targets = targets[0].merge(targets[1])
state_df = state_df.merge(rps_targets);

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/us

### B. PURPA Policies


In [5]:
purpa = pd.read_csv(cleanDataDir+'state_policies/state_purpa_dsire_updated.csv')
state_df = state_df.merge(purpa,how='left').fillna(0)

In [6]:
# Manually fix some states

# NC had different rules pre-2017
state_df.loc[(state_df['state']=='NC') & (state_df['year'] > 2017), 'purpa_competitive_2020'] = 1
state_df.loc[(state_df['state']=='NC') & (state_df['year'] > 2017), 'purpa_max_term_2020'] = 10 

# AZ lacked formal PURPA policy until 2019-ish (assigned all 0's)

# Utility bidding programs in CO, FL, GA, MI, NV, NC, OK, OR established in 2015
# see:  https://brattlefiles.blob.core.windows.net/files/20706_fercs_recent_rulings_on_purpa_-_competitive_procurement_option.pdf

bid_states = ['CO', 'FL','GA','MI','NV','NC']

state_df.loc[(state_df['state']=='GA') & (state_df['year'] <= 2015), 'purpa_competitive_2020'] = 1


In [9]:
state_df[state_df['state']=='GA']

Unnamed: 0,year,state,pop,log_pop,acres,nrel_cap_factor,tot_sales,log_sales,gdp,purpa_wholesale_2020,purpa_competitive_2020,purpa_standard_contract_2020,purpa_mw_limit_2020,purpa_max_term_2020
90,2010,GA,9711881,16.088861,37745,0.203,140671580.0,18.761939,414664.0,0.0,1.0,1.0,30.0,30.0
91,2011,GA,9802431,16.098141,37745,0.203,136371149.0,18.730891,427826.9,0.0,1.0,1.0,30.0,30.0
92,2012,GA,9901430,16.10819,37745,0.203,130978872.0,18.690547,443566.1,0.0,1.0,1.0,30.0,30.0
93,2013,GA,9972479,16.11534,37745,0.203,130497470.0,18.686864,459578.7,0.0,1.0,1.0,30.0,30.0
94,2014,GA,10067278,16.124801,37745,0.203,135789932.0,18.72662,485282.5,0.0,1.0,1.0,30.0,30.0
95,2015,GA,10178447,16.135783,37745,0.203,135878215.0,18.72727,515753.0,0.0,1.0,1.0,30.0,30.0
96,2016,GA,10301890,16.147838,37745,0.203,138112239.0,18.743577,541292.2,0.0,1.0,1.0,30.0,30.0
97,2017,GA,10410330,16.158309,37745,0.203,133456620.0,18.709287,568398.9,0.0,1.0,1.0,30.0,30.0
98,2018,GA,10511131,16.167945,37745,0.203,139866074.0,18.756196,602023.9,0.0,1.0,1.0,30.0,30.0
99,2019,GA,10617423,16.178007,37745,0.203,139300990.0,18.752148,625713.6,0.0,1.0,1.0,30.0,30.0


### C.  Financial Incentives 

In [15]:
### NC and OR had large ITC pre-2016 and pre-July 2014, respectively

# NC Pre-2016
state_df['itc_dummy'] = 0
state_df.loc[(state_df['state'] == 'NC') & (state_df['year']<2016),'itc_dummy'] = 1

# OR Pre-2014 
state_df.loc[(state_df['state'] == 'OR') & (state_df['year']<=2014),'itc_dummy'] = 1

In [16]:
### AZ and NM had large PTC. To best of my knowledge AZ hit capacity starting FY 2015

state_df['ptc_dummy'] = 0
state_df.loc[(state_df['state'] == 'AZ') & (state_df['year']>=2011) & (state_df['year'] <= 2014),'ptc_dummy'] = 1

# NM Info: http://www.emnrd.state.nm.us/ECMD/CleanEnergyTaxIncentives/documents/REPTCFinalReportFeb2015.pdf
# Facilities on waiting list after applications submitte din 2011 
# Could get more disaggregated -- some facilities came online in 2014
# http://www.emnrd.state.nm.us/ECMD/CleanEnergyTaxIncentives/documents/UpdatedQueueREPTC9.18.2020.pdf
state_df.loc[(state_df['state'] == 'NM') & (state_df['year']>=2007) & (state_df['year'] <= 2011),'ptc_dummy'] = 1
state_df['alt_ptc_dummy'] = state_df['ptc_dummy']
state_df.loc[(state_df['state'] == 'NM') & (state_df['year']>=2007) & (state_df['year'] <= 2014),'alt_ptc_dummy'] = 1

## C.  Market structure variables 



In [17]:
retail_choice = ['CA','IL','OH','NY','PA','MD','DE','NJ','MA','CT','NH','ME','TX']
state_df['retail_choice'] = 0
state_df.loc[(state_df['state'].isin(retail_choice)),'retail_choice'] = 1

state_df = state_df.merge(pd.read_csv(cleanDataDir + 'state_sales/state_sales_by_owner.csv'))

## Save final dataset to current project folder

In [18]:
state_df.to_csv(homeDir+'code/analysis/stata-policy-analysis/state_policy_panel.csv',index=False)