In [16]:
from IPython.display import HTML
import random

def hide_toggle(text='Toggle Code', for_next=False):
    this_cell = """$('div.cell.code_cell.rendered.selected')"""
    next_cell = this_cell + '.next()'

    toggle_text = text  # text shown on toggle link
    target_cell = this_cell  # target cell to control with toggle
    js_hide_current = ''  # bit of JS to permanently hide code in current cell (only when toggling next cell)

    if for_next:
        target_cell = next_cell
        toggle_text += ' next cell'
        js_hide_current = this_cell + '.find("div.input").hide();'

    js_f_name = 'code_toggle_{}'.format(str(random.randint(1,2**64)))

    html = """
        <script>
            function {f_name}() {{
                {cell_selector}.find('div.input').toggle();
            }}

            {js_hide_current}
        </script>

        <a href="javascript:{f_name}()">{toggle_text}</a>
    """.format(
        f_name=js_f_name,
        cell_selector=target_cell,
        js_hide_current=js_hide_current, 
        toggle_text=toggle_text
    )

    return HTML(html)

hide_toggle(text='Toggle IPython Code')


## Code Description
Uses EIA Form 860 plant-level data to compute for each state/year:

A. Outcome Variables
- number of plants, 
- average plant size, 
- AC/DC capacity additions
- cumulative values of each variable

This code **defines** commerical/industrial facilities and facilities with net metering agreements as a separate variable

B. Market Preparation Policy Variables
- Market preparation
    - First year that any solar PV project > 1 MW is installed in a state
    - First year that an IPP-owned project installed in a state
   

In [17]:
#####################
# set up workspace
#####################

import pandas as pd
import numpy as np
import os

# set the homeDir to where the project directory is located
homeDir = '/Users/rachelanderson/Dropbox (Princeton)/Research_V2/descriptive_solar/'

# raw and clean data directories are located in the homeDir 
rawDataDir = homeDir + 'data/raw/'
cleanDataDir =  homeDir + 'data/clean/'

##########################################
# Import helper functions
##########################################

# helper function to clean column names

def clean_cols(cols):
    clean_cols = cols.str.lower().str.replace(' ', '_').str.replace('?','')
    clean_cols = clean_cols.str.replace('(','').str.replace(')','').str.replace('-','_')
    return(clean_cols)

hide_toggle('Show generic header')

## Define sample

In [18]:
#####################################################
# Import solar generator data from EIA Form 860
#####################################################

eia_solar = pd.read_csv('/Users/rachelanderson/Dropbox (Princeton)/Research_V2/solar_determinants/data/clean/eia860_2019_operating_solar_generators.csv')

#####################################################
# Drop facilities with net metering agreements
#####################################################

net_meter_cap = eia_solar[(eia_solar['net_metering_agreement']!='N')|(eia_solar['virtual_net_metering_agreement']!='N')].dc_net_capacity_mw.sum()

print('Dropping virtual and net metering customers removes ' + str(np.round(net_meter_cap,1)) + ' MW (DC)')

solar_sample = eia_solar[(eia_solar['net_metering_agreement']=='N')&(eia_solar['virtual_net_metering_agreement']=='N')]


#####################################################
# Drop commercial and industrial facilities
#####################################################

keep_sectors = ['IPP Non-CHP', 'Electric Utility']
c_i_cap =  eia_solar[~(eia_solar['sector_name'].isin(keep_sectors))].dc_net_capacity_mw.sum()

print('Dropping Commerical and Industrial sectors removes ' + str(np.round(c_i_cap,1)) + ' MW (DC)')

solar_sample = solar_sample[solar_sample['sector_name'].isin(keep_sectors)]

Dropping virtual and net metering customers removes 3215.7 MW (DC)
Dropping Commerical and Industrial sectors removes 532.4 MW (DC)


## Compute market preparation policy variables

In [19]:
eia_solar['plant_cap']=eia_solar.groupby('plant_code').nameplate_capacity_mw.transform(sum)

In [20]:
first_ipp_year = eia_solar[eia_solar['sector_name'] == 'IPP Non-CHP'].groupby(['state','operating_year']).plant_code.nunique().reset_index().sort_values(by = ['state','operating_year']).groupby(['state']).apply(lambda x: x.iloc[0])
first_ipp_year = first_ipp_year.rename(columns={'operating_year':'first_ipp_plant_yr'}).drop(columns=['plant_code','state']).reset_index()

In [21]:
first_plant_sector = eia_solar.groupby(['state','operating_year','sector_name']).plant_code.nunique().reset_index().sort_values(by=['state','operating_year']).groupby(['state']).apply(lambda x: x.iloc[0])
first_plant_sector  = first_plant_sector.rename(columns={'operating_year':'first_plant_yr', 'sector_name': 'first_plant_sector'}).drop(columns=['plant_code','state']).reset_index()

In [22]:
qf_years = eia_solar[eia_solar['ferc_small_power_producer_status']=='Y'].groupby(['state','operating_year']).plant_code.nunique().reset_index().sort_values(by=['state','operating_year'], ascending=True)
first_qf_year = qf_years.groupby('state').operating_year.apply(lambda x: x.iloc[0]).reset_index()
first_qf_year = first_qf_year.rename(columns = {'operating_year': 'first_qf_year'})

In [25]:
first_years = first_plant_sector.merge(first_ipp_year)
first_years= first_years.merge(first_qf_year, how = 'left')

## Aggregate data to compute capacity and \# plant additions
### Includes commercial/net metering facilities as a separate variable

In [26]:
##################################################################
# Compute capacity of C + I and net meter per year)
##################################################################

net_meter = eia_solar[(eia_solar['net_metering_agreement']!='N')|(eia_solar['virtual_net_metering_agreement']!='N')]

keep_sectors = ['IPP Non-CHP', 'Electric Utility']
c_i_cap =  eia_solar[~(eia_solar['sector_name'].isin(keep_sectors))]


def fix_cols(df, prefix):
    df = df.rename(columns={'operating_year':'year', 
                            'dc_net_capacity_mw': prefix + '_cap_dc',
                            'nameplate_capacity_mw': prefix + '_cap_ac'
                           })
    return(df)

dfList = []

# Compute DC/AC Cap additions
dfList.append(fix_cols(net_meter.groupby(['state','operating_year'])[['dc_net_capacity_mw','nameplate_capacity_mw']].sum().reset_index(), 'net_meter'))

dfList.append(fix_cols(c_i_cap .groupby(['state','operating_year']).nameplate_capacity_mw.sum().reset_index(),'comm_'))

In [27]:
##################################################################
# Compute capacity and # plant additions per year
##################################################################

def fix_cols(df):
    df = df.rename(columns={'operating_year':'year'})
    return(df)

# Compute DC/AC Cap additions
dfList.append(fix_cols(solar_sample.groupby(['state','operating_year']).dc_net_capacity_mw.sum().reset_index()))

dfList.append(fix_cols(solar_sample.groupby(['state','operating_year']).nameplate_capacity_mw.sum().reset_index()))

# Compute number of plants
dfList.append(fix_cols(solar_sample.groupby(['state','operating_year']).plant_code.nunique().reset_index()))

# Add QF DC/AC Cap
dfList.append(fix_cols(solar_sample[solar_sample['ferc_small_power_producer_status']=='Y'].groupby(['state','operating_year']).dc_net_capacity_mw.sum().reset_index()).rename(columns={'dc_net_capacity_mw': 'dc_cap_added_qf'}))

dfList.append(fix_cols(solar_sample[solar_sample['ferc_small_power_producer_status']=='Y'].groupby(['state','operating_year']).nameplate_capacity_mw.sum().reset_index()).rename(columns={'nameplate_capacity_mw': 'ac_cap_added_qf'}))

# Add QF # plants 
dfList.append(fix_cols(solar_sample[solar_sample['ferc_small_power_producer_status']=='Y'].groupby(['state','operating_year']).plant_code.nunique().reset_index()).rename(columns={'plant_code': 'n_plants_qf'}))

In [28]:
#####################################################
# Code to make state frame with months and quarters
#####################################################

# helper function to assign quarter
def assign_quarter(x):
    if x in [1,2,3]:
        return(1)
    elif x in [4,5,6]:
        return(2)
    elif x in [7,8,9]:
        return(3)
    else:
        return(4)

state_frame = pd.read_csv(cleanDataDir+'state_inputs/state_panel_frame.csv')

nYear = state_frame.year.nunique()
nStates = state_frame.state.nunique()
# months = np.tile(range(1,13), nYear * nStates)

# newdf = pd.DataFrame(np.repeat(state_frame.values, (nYear), axis=0), columns=state_frame.columns)
# newdf['month'] = months
# newdf['quarter'] = newdf.month.apply(lambda x: assign_quarter(x))

for x in dfList:
    state_frame = state_frame.merge(x, how='left').fillna(0)

newdf = state_frame.rename(columns={'dc_net_capacity_mw': 'dc_cap_added', 'nameplate_capacity_mw': 'ac_cap_added', 'plant_code': 'n_plants'})

In [29]:
#####################################################
# Compute cumulative values
#####################################################

var_names = ['dc_cap_added', 'ac_cap_added', 'n_plants',
             'dc_cap_added_qf', 'ac_cap_added_qf', 'n_plants_qf']

cum_vals = newdf.groupby('state')[var_names].apply(lambda x: np.cumsum(x))
cum_vals.columns = ['cum_' + x for x in cum_vals.columns]

for var in cum_vals.columns:
    newdf[var] = cum_vals[var]

In [30]:
newdf['avg_cap_ac'] = newdf['ac_cap_added']/newdf['n_plants']
newdf['avg_cap_ac_qf'] = newdf['ac_cap_added_qf']/newdf['n_plants_qf']

In [31]:
newdf = newdf.merge(first_years, how='left')

In [32]:
#####################################################
# Save to CSV
#####################################################

# newdf.to_csv(homeDir + 'analysis/new_policy_analysis/data/solar_cap_series.csv', index=False)
newdf.to_csv(cleanDataDir + 'state_solar/event_study_state_micro_solar_2005-2019.csv', index=False)

In [33]:
# #####################################################
# # Argument for dropping net metering
# #####################################################

# net_meter = eia_solar[eia_solar['net_metering_dc_capacity_mw'].str.strip() != '']
# net_meter['net_meter_cap'] = pd.to_numeric(net_meter['net_metering_dc_capacity_mw'])
# nm_cap = (net_meter['dc_net_capacity_mw'] - net_meter['net_meter_cap']).sum()
# print('Facilities with net metering only have ' + str(nm_cap) + ' MW DC not under contract')

# virtual_net_meter = eia_solar[eia_solar['virtual_net_metering_dc_capacity_mw'].str.strip() != '']
# virtual_net_meter['virtual_net_cap'] = pd.to_numeric(virtual_net_meter['virtual_net_metering_dc_capacity_mw'])
# vnm_cap = (virtual_net_meter['dc_net_capacity_mw'] - virtual_net_meter['virtual_net_cap']).sum()
# print('Facilities with virtual net metering only have ' + str(np.round(vnm_cap,1)) + ' MW DC not under (virtual) contract')