## Setup

In [1]:
import pandas as pd
import numpy as np
import json
import glob
import matplotlib.pyplot as plt
%matplotlib inline

pd.options.display.max_rows = 999
pd.options.display.max_columns=100
plt.rcParams['figure.dpi'] = 200
plt.rcParams['figure.figsize'] = [15, 10]

dataDir = '/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/'
jsonDir = '/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/JSONFiles/'

## Read and fix data

In [2]:
utils = pd.read_csv(dataDir + 'eia_data/eia8602018/utility_2018.csv',header=1).fillna(0)
utils.columns = [x.lower().replace(" ","_").replace("?","") for x in utils.columns]

In [3]:
utils['owner_of_plants_reported_on_form'].value_counts()

Y    4400
0     639
Name: owner_of_plants_reported_on_form, dtype: int64

In [4]:
utils[utils['owner_of_plants_reported_on_form']==0];

In [5]:
gen_data = pd.read_csv(dataDir + 'eia_data/eia8602018/gen_2018.csv')
gen_data.drop(['Unnamed: 0'],axis=1,inplace=True)
gen_data = gen_data[~gen_data['Utility ID'].str.contains('NOTE', na=False)]
gen_data['Utility ID'] = pd.to_numeric(gen_data['Utility ID'])
gen_data.columns = gen_data.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('?','')

  interactivity=interactivity, compiler=compiler, result=result)


Now I want to merge the utility data with the generator data so I can do some sick analyses bro

I'll start by merging the utility list with the list of generators (all in 2018), so I'll see if any columns will conflict

In [6]:
[print(x) for x in gen_data.columns if x in utils.columns]

utility_id
utility_name
state


[None, None, None]

I'm merging by utility_id so let's fix the state variable

In [7]:
gen_data = gen_data.rename(columns={'state': 'plant_state'})

In [8]:
merged = gen_data.merge(utils, how='left', on=['utility_id','utility_name'])

Various checks assures me that the merge was successful. So now I want to merge in ownership data in a clever way

## Now merge in the owner variable in a creative way
### solution is to add owner_1 owner_1_perc, owner_2 ... etc.  for each plant

In [9]:
# S Single ownership by respondent 
# J Jointly owned with another entity
# W Wholly owned by an entity other than respondent
df = merged.join(pd.get_dummies(merged['ownership']))
df.rename(columns={'S': 'single_owner', 'W': 'other_owner', 'J':'joint_owner'},inplace=True)

In [10]:
owners = pd.read_csv(dataDir + 'eia_data/eia8602018/owner_2018.csv',header=1)
owners.columns = owners.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('?','')

owners['perc_owned'] = pd.to_numeric(owners['percent_owned'].str.replace("%",""))

num_owners = owners.groupby(['plant_name', 'generator_id']).apply(lambda grp: len(grp.owner_name.unique())).reset_index()
num_owners.rename(columns={0: 'num_owners'},inplace=True)
owners = owners.merge(num_owners, how='outer', on=['plant_name','generator_id'])

In [11]:
owners['idx'] = owners['plant_code'].astype(str) + '_' + owners['generator_id']

In [12]:
# owner_dict with key = generator; value = owners 
owner_dict = owners.groupby(['idx']).owner_name.apply(lambda grp: list(grp.value_counts().index)).to_dict()

perc_dict = owners.groupby(['idx']).perc_owned.apply(lambda grp: list(grp)).to_dict()

# dictionary may be useful for other stuff 
perc_owned_dict = {}
for x in owner_dict:
    temp_dict = {}
    for i,y in enumerate(owner_dict[x]):
        temp_dict.update({y : perc_dict[x][i]})
    perc_owned_dict[x] = temp_dict

In [13]:
owners = owners.join(owners['idx'].map(owner_dict).apply(pd.Series).iloc[:,0:4].rename(columns={0: 'owner_1', 1: 'owner_2', 2: 'owner_3', 3: 'owner_4'}))

owners = owners.join(owners['idx'].map(perc_dict).apply(pd.Series).iloc[:,0:4].rename(columns={0: 'perc_owner_1', 1: 'perc_owner_2', 2: 'perc_owner_3', 3: 'perc_owner_4'}))

# do also for the owner_address...

### Now do the parent utility stuff

In [14]:
recode_dict = {}
for fileName in glob.glob(jsonDir + "*.txt"):
    dict_name = fileName.split(jsonDir)[-1].split('.txt')[0]
    with open(fileName) as outfile:
        recode_dict[dict_name] = json.load(outfile)

In [15]:
keepList = ['plant_code', 'generator_id']
owners_to_merge = owners.drop([x for x in df.columns if x in owners.columns if x not in keepList], axis = 1)
df = df.merge(owners_to_merge, how='left', on=['plant_code','generator_id'])

In [16]:
df['clean_address'] = df['street_address'].str.lower()
df['parent_utility'] = df['utility_name'].map(recode_dict['parent_utility_dict'])
df['parent_utility']=df['parent_utility'].fillna(df['utility_name'])
    
# assign same address to common parent 
df[df['clean_address'].str.contains('1414 harbour way')]['parent_utility'] = "Solar Star"
df[df['clean_address'].str.contains('700 universe')]['parent_utility'] = 'NextEra'
df['parent_utility'] = df['parent_utility'].apply(lambda x: "Duke Energy" if "Duke Energy" in x else x)
df['parent_utility'] = df['parent_utility'].apply(lambda x: "NextEra" if "NextEra" in x else x)
df['parent_utility'] = df['parent_utility'].apply(lambda x: "NRG" if "NRG" in x else x)
df['parent_utility'] = df['parent_utility'].apply(lambda x: "Solar Star" if "Solar Star" in x else x)
df['parent_utility'] = df['parent_utility'].apply(lambda x: "NRG" if "Agua Caliente Solar" in x else x)
df['parent_utility'] = df['parent_utility'].apply(lambda x: "NextEra" if "Blythe Solar" in x else x)
df['parent_utility'] = df['parent_utility'].apply(lambda x: "8me" if "8me" in x.lower() else x)
df['parent_utility'] = df['parent_utility'].apply(lambda x: "Exelon" if "AV Solar" in x else x)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [17]:
utils.utility_name.nunique()

5025

In [18]:
utils.groupby(['street_address']).utility_name.apply(lambda grp: list(grp.value_counts().index)).to_dict()

{'-16849 102nd Street SE': ['Dakota Magic Casino'],
 '01 N.E. 7th Street': ['Western Farmers Elec Coop, Inc'],
 '1 Adelaide Street East': ['Dorena Hydro, LLC', 'Clark Canyon Hydro, LLC'],
 '1 Armorcast Rd': ['Birdsboro Power LLC'],
 '1 Ashley Place': ['Ashley Energy LLC'],
 '1 Atwell Road': ['Bassett Healthcare'],
 '1 Blanchard RD PO BOX 13190': ['Calpine Corp - Metcalf Energy Center'],
 '1 Bonnybridge Road': ['International Paper Port Wentworth Mill'],
 '1 Energy Park Way': ['City of Holland'],
 '1 Federal Street': ['American Sugar Refining, Inc.'],
 '1 Gateway Plaza': ['Metropolitan Transportation Authority'],
 '1 Glen Street': ['Finch Paper LLC'],
 '1 Gustave L Levy Pl': ['Mount Sinai Hospital'],
 '1 Hansen Rd PO BOX 266': ['Alexander & Baldwin, Inc.'],
 '1 Healthy Way': ['South Nassau Communities Hospital'],
 '1 Hoag Drive': ['Hoag Memorial Presbyterian Hospital'],
 '1 Horwith Drive': ['EIF Northampton GP, LLC'],
 '1 Infinite Loop': ['Apple, Inc'],
 '1 Landmark Sq. Suite 320': ['Gr

In [19]:
# need to clean addresses -- 11 Speen StreetSuite 410', 111 Speen Street, Suite 410, 111 Speen Street Suite 410',  '111 Speen Street': all the same

In [26]:
df

Unnamed: 0,utility_id,utility_name,plant_code,plant_name,plant_state,county,generator_id,technology,prime_mover,unit_code,ownership,duct_burners,can_bypass_heat_recovery_steam_generator,rto/iso_lmp_node_designation,rto/iso_location_designation_for_reporting_wholesale_sales_data_to_ferc,nameplate_capacity_mw,nameplate_power_factor,summer_capacity_mw,winter_capacity_mw,minimum_load_mw,uprate_or_derate_completed_during_year,month_uprate_or_derate_completed,year_uprate_or_derate_completed,status,synchronized_to_transmission_grid,operating_month,operating_year,planned_retirement_month,planned_retirement_year,associated_with_combined_heat_and_power_system,sector_name,sector,topping_or_bottoming,energy_source_1,energy_source_2,energy_source_3,energy_source_4,energy_source_5,energy_source_6,startup_source_1,startup_source_2,startup_source_3,startup_source_4,solid_fuel_gasification_system,carbon_capture_technology,turbines_or_hydrokinetic_buoys,time_from_cold_shutdown_to_full_load,fluidized_bed_technology,pulverized_coal_technology,stoker_technology,...,planned_net_winter_capacity_uprate_mw,planned_uprate_month,planned_uprate_year,planned_net_summer_capacity_derate_mw,planned_net_winter_capacity_derate_mw,planned_derate_month,planned_derate_year,planned_new_prime_mover,planned_energy_source_1,planned_new_nameplate_capacity_mw,planned_repower_month,planned_repower_year,other_planned_modifications,other_modifications_month,other_modifications_year,multiple_fuels,cofire_fuels,switch_between_oil_and_natural_gas,street_address,city,state,zip,owner_of_plants_reported_on_form,operator_of_plants_reported_on_form,asset_manager_of_plants_reported_on_form,other_relationships_with_plants_reported_on_form,entity_type,joint_owner,single_owner,other_owner,owner_name,owner_street_address,owner_city,owner_state,owner_zip,ownership_id,percent_owned,perc_owned,num_owners,idx,owner_1,owner_2,owner_3,owner_4,perc_owner_1,perc_owner_2,perc_owner_3,perc_owner_4,clean_address,parent_utility
0,195,Alabama Power Co,2.0,Bankhead Dam,AL,Tuscaloosa,1,Conventional Hydroelectric,HY,,S,X,X,,,53.9,0.920,53.0,53.0,52.0,N,,,OP,X,7.0,1963.0,,,N,Electric Utility,1.0,X,WAT,,,,,,,,,,,,0,1H,,,,...,,,,,,,,,,,,,,,,N,,,P O Box 2641600 North 18th Str,Birmingham,AL,35291,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,p o box 2641600 north 18th str,Southern Company
1,195,Alabama Power Co,3.0,Barry,AL,Mobile,1,Natural Gas Steam Turbine,ST,,S,X,X,,,153.1,0.850,55.5,55.5,55.0,N,,,OP,X,2.0,1954.0,,,N,Electric Utility,1.0,X,NG,,,,,,NG,,,,,,,OVER,,,,...,,,,,,,,,,,,,,,,N,N,,P O Box 2641600 North 18th Str,Birmingham,AL,35291,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,p o box 2641600 north 18th str,Southern Company
2,195,Alabama Power Co,3.0,Barry,AL,Mobile,2,Natural Gas Steam Turbine,ST,,S,X,X,,,153.1,0.850,55.5,55.5,55.0,N,,,OP,X,7.0,1954.0,,,N,Electric Utility,1.0,X,NG,,,,,,NG,,,,,,,OVER,,,,...,,,,,,,,,,,,,,,,N,,,P O Box 2641600 North 18th Str,Birmingham,AL,35291,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,p o box 2641600 north 18th str,Southern Company
3,195,Alabama Power Co,3.0,Barry,AL,Mobile,4,Conventional Steam Coal,ST,,S,X,X,,,403.7,0.850,362.0,362.0,200.0,N,,,OP,X,12.0,1969.0,,,N,Electric Utility,1.0,X,BIT,,,,,,NG,,,,,,,OVER,,Y,,...,,,,,,,,,,,,,,,,N,,,P O Box 2641600 North 18th Str,Birmingham,AL,35291,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,p o box 2641600 north 18th str,Southern Company
4,195,Alabama Power Co,3.0,Barry,AL,Mobile,5,Conventional Steam Coal,ST,,S,X,X,,,788.8,0.850,756.5,756.5,465.0,N,,,OP,X,10.0,1971.0,,,N,Electric Utility,1.0,X,BIT,,,,,,NG,,,,,,,OVER,,Y,,...,,,,,,,,,,,,,,,,N,,,P O Box 2641600 North 18th Str,Birmingham,AL,35291,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,p o box 2641600 north 18th str,Southern Company
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23701,62777,Burrstone Energy Center,62920.0,Burrstone Energy Center,NY,Oneida,CG-2,Natural Gas Internal Combustion Engine,IC,,S,N,Y,,,1.1,0.800,1.1,1.1,0.7,N,,,OP,X,8.0,2009.0,,,Y,IPP CHP,3.0,T,NG,,,,,,,,,,N,,,10M,,,,...,,,,,,,,,,,,,,,,N,,,1656 Champlain Avenue,New Hartford,NY,13413,Y,0,0,0,COM,0,1,0,,,,,,,,,,,,,,,,,,,1656 champlain avenue,Burrstone Energy Center
23702,62777,Burrstone Energy Center,62920.0,Burrstone Energy Center,NY,Oneida,CG-3,Natural Gas Internal Combustion Engine,IC,,S,N,Y,,,1.1,0.800,1.1,1.1,0.7,N,,,OP,X,8.0,2009.0,,,Y,IPP CHP,3.0,T,NG,,,,,,,,,,N,,,10M,,,,...,,,,,,,,,,,,,,,,N,,,1656 Champlain Avenue,New Hartford,NY,13413,Y,0,0,0,COM,0,1,0,,,,,,,,,,,,,,,,,,,1656 champlain avenue,Burrstone Energy Center
23703,62777,Burrstone Energy Center,62920.0,Burrstone Energy Center,NY,Oneida,CG-4,Natural Gas Internal Combustion Engine,IC,,S,N,Y,,,0.3,0.800,0.3,0.3,0.2,N,,,OP,X,8.0,2009.0,,,Y,IPP CHP,3.0,T,NG,,,,,,,,,,N,,,10M,,,,...,,,,,,,,,,,,,,,,N,,,1656 Champlain Avenue,New Hartford,NY,13413,Y,0,0,0,COM,0,1,0,,,,,,,,,,,,,,,,,,,1656 champlain avenue,Burrstone Energy Center
23704,62731,"Gopher Energy Storage, LLC",62937.0,Athens BESS,MN,Isanti,BA,Batteries,BA,,S,X,X,,,6,,6.0,6.0,0.0,N,,,OP,X,12.0,2018.0,,,N,IPP Non-CHP,2.0,X,MWH,,,,,,,,,,,,,1H,,,,...,,,,,,,,,,,,,,,,,,,700 Universe Boulevard,Juno Beach,FL,33408,Y,Y,0,0,Q,0,1,0,,,,,,,,,,,,,,,,,,,700 universe boulevard,"Gopher Energy Storage, LLC"


## Merge owners back into generator data

In [25]:
t = df.merge(owners, on=['plant_name','generator_id'], how = 'left')
# 8 gen. are weirdly missing from the joint owner list.. 
t[(t['single_owner'] != 1) & (t['idx'].isna())]

KeyError: 'idx'

AttributeError: 'DataFrameGroupBy' object has no attribute 'nameplate_cap'