# Master file containing functions for recoding variables 

In [30]:
import pandas as pd
import json
import glob
from streetaddress import StreetAddressFormatter, StreetAddressParser

## Read in dictionaries for recoding variables

In [99]:
jsonDir = '../../Data for Tax Equity Project/JSONFiles/'

recode_dict = {}
for fileName in glob.glob(jsonDir + "*.txt"):
    dict_name = fileName.split(jsonDir)[-1].split('.txt')[0]
    with open(fileName) as outfile:
        recode_dict[dict_name] = json.load(outfile)


### Code that cleans proposed generator data

In [80]:
df = pd.read_csv('../Clean Data/proposed_gen_master_list_post08.csv')
df['region'] = df['plant_state'].map(recode_dict['region_dict'])
df['regulated'] = df['plant_state'].map(recode_dict['regulated_dict'])
df['status_clean'] = df['status'].map (recode_dict['status_dict'])
df['alt_status'] = df['status'].map(recode_dict['alt_status_dict'])
df['period'] = pd.cut(df['year'], bins=[min(df['year']-1), 2011, 2016, max(df['year'])],
      labels = ['2009-2011: Loan grant + ITC', 
                '2012-2016: ITC Round 1', 
                '2017-2023: ITC Round 2'])
df.drop([x for x in df.columns if "Unnamed" in x],axis=1,inplace=True)
df.to_csv('../Clean Data/proposed_gen_master_list_post08.csv')

### Generic function for recoding state/year variables 

In [59]:
def clean_df(fileName, state_var,year_var):
    df = pd.read_csv(fileName)
    df['region'] = df[state_var].map(region_dict)
    df['regulated'] = df[state_var].map(regulated_dict)
    df['period'] = pd.cut(df[year_var], bins=[min(df[year_var]-1), 2011, 2016, max(df[year_var])],
      labels = ['2009-2011: Loan grant + ITC', 
                '2012-2016: ITC Round 1', 
                '2017-2023: ITC Round 2'])
    df.to_csv(fileName[-3])
    out_name = fileName[:-4] + '_new.csv'
    df.to_csv(out_name)
    return(df)

In [60]:
fileName = '../../Tax Equity Code/STATA/capacity_clean.csv'
clean_df(fileName, 'State Code', 'Year');

In [62]:
fileName = '../../Data for Tax Equity Project/eia_data/eia8602018/merged.csv'
clean_df(fileName, 'plant_state','operating_year');

### Code for cleaning street addresses

In [95]:
def clean_address(df, address_var):
    s=""
    addr_formatter = StreetAddressFormatter()
    df['clean_address']= df[address_var].apply(lambda x: addr_formatter.abbrev_street_avenue_etc(x))
    df['clean_address'] = df['clean_address'].apply(lambda x: s.join(x.lower().split("flo")[:-1]) if "flo" in x.lower() else x.lower())
#     df['clean_address'] = df['clean_address'].apply(lambda x: s.join(x.lower().split("suite")[:-1]) if "flo" in x.lower() else x.lower())
    df['clean_address'] = df['clean_address'].apply(lambda x: x.replace(",", " ").replace(".","").replace("  ", " "))

    return(df)

In [96]:
fileName = '../../Data for Tax Equity Project/eia_data/eia8602018/merged_new.csv'
fileOut = "../../Data for Tax Equity Project/eia_data/eia8602018/merged_clean_address.csv"
gen = pd.read_csv(fileName)
gen.drop([x for x in gen.columns if "Unnamed" in x], axis=1, inplace=True)
clean_gen = clean_address(gen,'street_address')

  interactivity=interactivity, compiler=compiler, result=result)


In [94]:
# clean_gen.to_csv(fileOut)

## Code for fixing utilities (linking llcs to parent companies)

In [87]:
# dictionary of subsidiary companies for top solar utilities

utility_subsidiaries = {
    "Southern California Edison Co": 'Edison International',

# southern power co    
'Southern Power Co': 'Southern Company',
'Mississippi Power Co': 'Southern Company',
'Alabama Power Co': 'Southern Company',
'Georgia Power Co': 'Southern Company',

'Consolidated Edison Energy, Inc.': 'Consolidated Edison',
'Consolidated Edison Co-NY Inc': 'Consolidated Edison',
'Consolidated Edison Development Inc': 'Consolidated Edison',
'Consolidated Edison Solutions Inc': 'Consolidated Edison',
    
'Dominion Energy South Carolina, Inc': 'Dominion Energy',
'Dominion Renewable Energy': 'Dominion Energy',
'Virginia Electric & Power Co': 'Dominion Energy',
'Dominion Renewable Energy - Clipperton'  : 'Dominion Energy',
'Dominion Renewable Energy - Fremont': 'Dominion Energy',
    
'Florida Power & Light Co': 'NextEra',
'Bythe Solar II, LLC': 'NextEra', 
'Blythe Solar 110': 'NextEra',
'McCoy Solar, LLC': 'NextEra',
'Silver State Solar Power South, LLC': 'NextEra',
'Stuttgart Solar, LLC': 'NextEra',
'Marshall Solar Energy Project': 'NextEra',
    
'BHER Power Resources, Inc': 'Berkshire Hathaway',
'BHE Renewables, LLC': 'Berkshire Hathaway',
'Topaz Solar Farms LLC': 'Berkshire Hathaway',
    
'Imperial Valley Solar, LLC': '8me',
    
'D E Shaw & Co., LP': 'DE Shaw',
'North Star Solar PV LLC': 'DE Shaw',
'MS Solar 2, LLC': 'DE Shaw'
}
    

In [89]:
with open(jsonDir + 'parent_utility_dict.txt', 'w') as outFile:
    json.dump(utility_subsidiaries, outFile)

In [97]:
def id_parent_comp(df, utility_var, address_var, utility_dict):
    #for d in utility_dicts:
        
    df['parent_utility']=df[utility_var].map(utility_dict)
    
    df['parent_utility']=df['parent_utility'].fillna(df[utility_var])
    
    # assign same address to common parent 
    df[df[address_var].str.contains('1414 harbour way')]['parent_utility'] = "Solar Star"
    df[df[address_var].str.contains('700 universe')]['parent_utility'] = 'NextEra'
    
    
    df['parent_utility'] = df['parent_utility'].apply(lambda x: "Duke Energy" if "Duke Energy" in x else x)
    df['parent_utility'] = df['parent_utility'].apply(lambda x: "NextEra" if "NextEra" in x else x)
    df['parent_utility'] = df['parent_utility'].apply(lambda x: "NRG" if "NRG" in x else x)
    df['parent_utility'] = df['parent_utility'].apply(lambda x: "Solar Star" if "Solar Star" in x else x)
    df['parent_utility'] = df['parent_utility'].apply(lambda x: "NRG" if "Agua Caliente Solar" in x else x)
    df['parent_utility'] = df['parent_utility'].apply(lambda x: "NextEra" if "Blythe Solar" in x else x)
    df['parent_utility'] = df['parent_utility'].apply(lambda x: "8me" if "8me" in x.lower() else x)
    df['parent_utility'] = df['parent_utility'].apply(lambda x: "Exelon" if "AV Solar" in x else x)
    
    return df

In [102]:
clean_gen = id_parent_comp(clean_gen,'utility_name', 'clean_address', recode_dict['parent_utility_dict'])
# clean_gen.to_csv(fileOut)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.


In [114]:
t

{'AL': 0.0,
 'AK': 1.0,
 'AZ': 0.0,
 'AR': 1.0,
 'CA': 1.0,
 'CO': 1.0,
 'CT': 1.0,
 'DE': 1.0,
 'DC': 1.0,
 'FL': 1.0,
 'GA': 0.0,
 'HI': 1.0,
 'ID': 0.0,
 'IL': 0.0,
 'IN': 0.0,
 'IA': 1.0,
 'KS': 1.0,
 'KY': 0.0,
 'LA': 0.0,
 'ME': 1.0,
 'MD': 1.0,
 'MA': 1.0,
 'MI': 0.0,
 'MN': 1.0,
 'MS': 0.0,
 'MO': 1.0,
 'MT': 1.0,
 'NE': 1.0,
 'NV': 1.0,
 'NH': 1.0,
 'NJ': 1.0,
 'NM': 0.0,
 'NY': 0.0,
 'NC': 1.0,
 'ND': 1.0,
 'OH': 1.0,
 'OK': 1.0,
 'OR': 1.0,
 'PA': 1.0,
 'RI': 1.0,
 'SC': 1.0,
 'SD': 0.0,
 'TN': 0.0,
 'TX': 0.0,
 'UT': 0.0,
 'VT': 1.0,
 'VA': 1.0,
 'WA': 1.0,
 'WV': 1.0,
 'WI': 1.0,
 'WY': 1.0,
 nan: nan}

In [115]:
t=pd.read_csv('../../Data for Tax Equity Project/policies/rps.csv')
t.rename(columns={'Postal Code':'state', 'Net metering': 'net_meter'},inplace=True)
t.set_index('state',inplace=True)
t = t['net_meter'].to_dict()
with open(jsonDir + 'net_meter.txt', 'w') as outFile:
    json.dump(t, outFile)