## Setup

In [24]:
import pandas as pd
import numpy as np
import json
import glob
import matplotlib.pyplot as plt
%matplotlib inline

pd.options.display.max_rows = 999
pd.options.display.max_columns=100
plt.rcParams['figure.dpi'] = 200
plt.rcParams['figure.figsize'] = [15, 10]

dataDir = '/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/'
jsonDir = '/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/JSONFiles/'

## Read and fix data

In [25]:
utils = pd.read_csv(dataDir + 'eia_data/eia8602018/utility_2018.csv',header=1).fillna(0)
utils.columns = [x.lower().replace(" ","_").replace("?","") for x in utils.columns]

In [26]:
utils['owner_of_plants_reported_on_form'].value_counts()

Y    4400
0     639
Name: owner_of_plants_reported_on_form, dtype: int64

In [27]:
utils[utils['owner_of_plants_reported_on_form']==0];

In [5]:
gen_data = pd.read_csv(dataDir + 'eia_data/eia8602018/gen_2018.csv')
gen_data.drop(['Unnamed: 0'],axis=1,inplace=True)
gen_data = gen_data[~gen_data['Utility ID'].str.contains('NOTE', na=False)]
gen_data['Utility ID'] = pd.to_numeric(gen_data['Utility ID'])
gen_data.columns = gen_data.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('?','')

  interactivity=interactivity, compiler=compiler, result=result)


Now I want to merge the utility data with the generator data so I can do some sick analyses bro

I'll start by merging the utility list with the list of generators (all in 2018), so I'll see if any columns will conflict

In [6]:
[print(x) for x in gen_data.columns if x in utils.columns]

utility_id
utility_name
state


[None, None, None]

I'm merging by utility_id so let's fix the state variable

In [7]:
gen_data = gen_data.rename(columns={'state': 'plant_state'})

In [8]:
merged = gen_data.merge(utils, how='left', on=['utility_id','utility_name'])

Various checks assures me that the merge was successful. So now I want to merge in ownership data in a clever way

## Now merge in the owner variable in a creative way
### solution is to add owner_1 owner_1_perc, owner_2 ... etc.  for each plant

In [9]:
# S Single ownership by respondent 
# J Jointly owned with another entity
# W Wholly owned by an entity other than respondent
df = merged.join(pd.get_dummies(merged['ownership']))
df.rename(columns={'S': 'single_owner', 'W': 'other_owner', 'J':'joint_owner'},inplace=True)

In [10]:
owners = pd.read_csv(dataDir + 'eia_data/eia8602018/owner_2018.csv',header=1)
owners.columns = owners.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('?','')

owners['perc_owned'] = pd.to_numeric(owners['percent_owned'].str.replace("%",""))

num_owners = owners.groupby(['plant_name', 'generator_id']).apply(lambda grp: len(grp.owner_name.unique())).reset_index()
num_owners.rename(columns={0: 'num_owners'},inplace=True)
owners = owners.merge(num_owners, how='outer', on=['plant_name','generator_id'])

In [11]:
owners['idx'] = owners['plant_code'].astype(str) + '_' + owners['generator_id']

In [12]:
# owner_dict with key = generator; value = owners 
owner_dict = owners.groupby(['idx']).owner_name.apply(lambda grp: list(grp.value_counts().index)).to_dict()

perc_dict = owners.groupby(['idx']).perc_owned.apply(lambda grp: list(grp)).to_dict()

# dictionary may be useful for other stuff 
perc_owned_dict = {}
for x in owner_dict:
    temp_dict = {}
    for i,y in enumerate(owner_dict[x]):
        temp_dict.update({y : perc_dict[x][i]})
    perc_owned_dict[x] = temp_dict

In [13]:
owners = owners.join(owners['idx'].map(owner_dict).apply(pd.Series).iloc[:,0:4].rename(columns={0: 'owner_1', 1: 'owner_2', 2: 'owner_3', 3: 'owner_4'}))

owners = owners.join(owners['idx'].map(perc_dict).apply(pd.Series).iloc[:,0:4].rename(columns={0: 'perc_owner_1', 1: 'perc_owner_2', 2: 'perc_owner_3', 3: 'perc_owner_4'}))


## Apply parent dict to the list of owners

In [14]:
with open(jsonDir + 'final_parent_dict.json') as outfile:
    parent_map = json.load(outfile)

In [15]:
owners['parent_1'] = owners.owner_1.str.lower().map(parent_map)
owners['parent_2'] = owners.owner_2.str.lower().map(parent_map)
owners['parent_3'] = owners.owner_3.str.lower().map(parent_map)
owners['parent_4'] = owners.owner_4.str.lower().map(parent_map)

In [16]:
owners['diff_parent'] = owners['parent_1'] != owners['owner_1']

## Drop duplicates before merging

In [17]:
# list of cols to ignore to avoid _x and _y 
cols_to_ignore = ['utility_id',
 'utility_name',
 'plant_name',
 'state',
 'status'] 

owners_to_merge = owners.drop_duplicates('idx')
owners_to_merge = owners_to_merge.drop([x for x in df.columns if x in owners.columns if x in cols_to_ignore],axis=1)
df = df.merge(owners_to_merge, how='left', on=['plant_code','generator_id'])

In [23]:
df.to_csv(dataDir + 'master_gen_by_owner_2028.csv')

## Merge owners back into generator data

### Now do the parent utility stuff

In [18]:
# fill na of parent 1 with parent map on the utility name

df['parent_1'] = df['parent_1'].fillna(df.utility_name.str.lower().map(parent_map))

In [19]:
df['nameplate_cap']=pd.to_numeric(df['nameplate_capacity_mw'].astype(str).str.replace(",",""))

In [20]:
solar = df[df['energy_source_1']=='SUN']
wind=df[df['energy_source_1']=='WND']

In [21]:
solar.groupby('plant_state').nameplate_cap.sum().sort_values(ascending=False).reset_index()

Unnamed: 0,plant_state,nameplate_cap
0,CA,11837.6
1,NC,4007.9
2,AZ,2067.3
3,TX,1943.1
4,NV,1926.0
5,FL,1401.6
6,GA,1026.2
7,UT,859.1
8,NJ,771.9
9,MA,744.9


In [230]:
solar[solar['plant_state']=='TX'].groupby('parent_1').nameplate_cap.sum().sort_values(ascending=False).reset_index()

Unnamed: 0,parent_1,nameplate_cap
0,consolidated edison development inc.,355.5
1,southern company,218.5
2,clearway energy,202.0
3,174 power global corp.,182.0
4,upton county solar 2 llc,180.0
5,re roserock,160.0
6,berkshire hathaway,155.0
7,cypress creek renewables,110.8
8,"solaireholman 1, llc",50.0
9,"lamesa solar ii, l.l.c.",50.0


In [228]:
wind

Unnamed: 0,utility_id,utility_name,plant_code,plant_name,plant_state,county,generator_id,technology,prime_mover,unit_code,ownership,duct_burners,can_bypass_heat_recovery_steam_generator,rto/iso_lmp_node_designation,rto/iso_location_designation_for_reporting_wholesale_sales_data_to_ferc,nameplate_capacity_mw,nameplate_power_factor,summer_capacity_mw,winter_capacity_mw,minimum_load_mw,uprate_or_derate_completed_during_year,month_uprate_or_derate_completed,year_uprate_or_derate_completed,status,synchronized_to_transmission_grid,operating_month,operating_year,planned_retirement_month,planned_retirement_year,associated_with_combined_heat_and_power_system,sector_name,sector,topping_or_bottoming,energy_source_1,energy_source_2,energy_source_3,energy_source_4,energy_source_5,energy_source_6,startup_source_1,startup_source_2,startup_source_3,startup_source_4,solid_fuel_gasification_system,carbon_capture_technology,turbines_or_hydrokinetic_buoys,time_from_cold_shutdown_to_full_load,fluidized_bed_technology,pulverized_coal_technology,stoker_technology,...,planned_net_winter_capacity_derate_mw,planned_derate_month,planned_derate_year,planned_new_prime_mover,planned_energy_source_1,planned_new_nameplate_capacity_mw,planned_repower_month,planned_repower_year,other_planned_modifications,other_modifications_month,other_modifications_year,multiple_fuels,cofire_fuels,switch_between_oil_and_natural_gas,street_address,city,state,zip,owner_of_plants_reported_on_form,operator_of_plants_reported_on_form,asset_manager_of_plants_reported_on_form,other_relationships_with_plants_reported_on_form,entity_type,joint_owner,single_owner,other_owner,owner_name,owner_street_address,owner_city,owner_state,owner_zip,ownership_id,percent_owned,perc_owned,num_owners,idx,owner_1,owner_2,owner_3,owner_4,perc_owner_1,perc_owner_2,perc_owner_3,perc_owner_4,parent_1,parent_2,parent_3,parent_4,diff_parent,nameplate_cap
195,13642,Nome Joint Utility Systems,90.0,Snake River,AK,Nome,ENT,Onshore Wind Turbine,WT,,S,X,X,,,0.8,,0.8,0.8,0.1,N,,,OA,X,1.0,2015.0,,,N,Electric Utility,1.0,X,WND,,,,,,,,,,,N,15,,,,,...,,,,,,,,,,,,,,,1226 Port Road,Nome,AK,99762,Y,0,0,0,M,0,1,0,,,,,,,,,,,,,,,,,,,nome joint utility systems,,,,,0.8
196,13642,Nome Joint Utility Systems,90.0,Snake River,AK,Nome,EWT 1,Onshore Wind Turbine,WT,,S,X,X,,,0.9,,0.9,0.9,0.1,N,,,OP,X,7.0,2013.0,,,N,Electric Utility,1.0,X,WND,,,,,,,,,,N,N,1,,,,,...,,,,,,,,,,,,,,,1226 Port Road,Nome,AK,99762,Y,0,0,0,M,0,1,0,,,,,,,,,,,,,,,,,,,nome joint utility systems,,,,,0.9
197,13642,Nome Joint Utility Systems,90.0,Snake River,AK,Nome,EWT 2,Onshore Wind Turbine,WT,,S,X,X,,,0.9,,0.9,0.9,0.1,N,,,OP,X,7.0,2013.0,,,N,Electric Utility,1.0,X,WND,,,,,,,,,,N,N,1,,,,,...,,,,,,,,,,,,,,,1226 Port Road,Nome,AK,99762,Y,0,0,0,M,0,1,0,,,,,,,,,,,,,,,,,,,nome joint utility systems,,,,,0.9
887,10633,City of Lamar - (CO),508.0,Lamar Plant,CO,Prowers,T1-T3,Onshore Wind Turbine,WT,,S,X,X,,,4.5,,4.0,4.0,1.0,N,,,OP,X,2.0,2004.0,,,N,Electric Utility,1.0,X,WND,,,,,,,,,,N,,3,,,,,...,,,,,,,,,,,,,,,100 North 2nd Street,Lamar,CO,81052,0,Y,0,0,M,0,1,0,,,,,,,,,,,,,,,,,,,city of lamar - (co),,,,,4.5
888,10633,City of Lamar - (CO),508.0,Lamar Plant,CO,Prowers,T4,Onshore Wind Turbine,WT,,W,X,X,,,1.5,,1.5,1.5,1.0,N,,,OP,X,2.0,2004.0,,,N,Electric Utility,1.0,X,WND,,,,,,,,,,N,,1,,,,,...,,,,,,,,,,,,N,,,100 North 2nd Street,Lamar,CO,81052,0,Y,0,0,M,0,0,1,Arkansas River Power Authority,P O Box 703409 South Main Street,Lamar,CO,81052,712.0,100.00%,100.0,1.0,508_T4,Arkansas River Power Authority,,,,100.0,,,,arkansas river power authority,,,,True,1.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22006,58270,GE Wind Energy LLC,62642.0,GE-Lubbock,TX,Lubbock,GE-P2,Onshore Wind Turbine,WT,,S,X,X,,,2.5,,3.0,3.0,0.1,N,,,OP,X,4.0,2014.0,,,N,IPP Non-CHP,2.0,X,WND,,,,,,,,,,,,1,,,,,...,,,,,,,,,,,,,,,1 River Road,Schenectady,NY,12345,Y,0,0,0,Q,0,1,0,,,,,,,,,,,,,,,,,,,ge wind energy llc,,,,,2.5
22007,58270,GE Wind Energy LLC,62642.0,GE-Lubbock,TX,Lubbock,GE-P3,Onshore Wind Turbine,WT,,S,X,X,,,2.8,,3.1,3.1,0.1,Y,11,2018,OP,X,2.0,2015.0,,,N,IPP Non-CHP,2.0,X,WND,,,,,,,,,,,,1,,,,,...,,,,,,,,,,,,,,,1 River Road,Schenectady,NY,12345,Y,0,0,0,Q,0,1,0,,,,,,,,,,,,,,,,,,,ge wind energy llc,,,,,2.8
22076,62657,Harbec Energy,62736.0,Harbec Energy,NY,Ontario,250KW,Onshore Wind Turbine,WT,,S,X,X,,,0.3,,0.3,0.3,0.0,N,,,OP,X,12.0,2001.0,,,N,Industrial CHP,7.0,X,WND,,,,,,,,,,N,N,1,,,,,...,,,,,,,,,,,,N,,,358 Timothy Lane,Ontario,NY,14519,Y,Y,Y,0,IND,0,1,0,,,,,,,,,,,,,,,,,,,harbec energy,,,,,0.3
22077,62657,Harbec Energy,62736.0,Harbec Energy,NY,Ontario,850KW,Onshore Wind Turbine,WT,,S,X,X,,,0.9,,0.9,0.9,0.0,N,,,OP,X,12.0,2011.0,,,N,Industrial CHP,7.0,X,WND,,,,,,,,,,N,N,1,,,,,...,,,,,,,,,,,,N,,,358 Timothy Lane,Ontario,NY,14519,Y,Y,Y,0,IND,0,1,0,,,,,,,,,,,,,,,,,,,harbec energy,,,,,0.9


## answer ga solar questions


In [25]:
ga_solar = solar[solar['plant_state']=='GA']


In [35]:
ga_solar[ga_solar['parent_1'] == 'southern company']

Unnamed: 0,utility_id,utility_name,plant_code,plant_name,plant_state,county,generator_id,technology,prime_mover,unit_code,ownership,duct_burners,can_bypass_heat_recovery_steam_generator,rto/iso_lmp_node_designation,rto/iso_location_designation_for_reporting_wholesale_sales_data_to_ferc,nameplate_capacity_mw,nameplate_power_factor,summer_capacity_mw,winter_capacity_mw,minimum_load_mw,uprate_or_derate_completed_during_year,month_uprate_or_derate_completed,year_uprate_or_derate_completed,status,synchronized_to_transmission_grid,operating_month,operating_year,planned_retirement_month,planned_retirement_year,associated_with_combined_heat_and_power_system,sector_name,sector,topping_or_bottoming,energy_source_1,energy_source_2,energy_source_3,energy_source_4,energy_source_5,energy_source_6,startup_source_1,startup_source_2,startup_source_3,startup_source_4,solid_fuel_gasification_system,carbon_capture_technology,turbines_or_hydrokinetic_buoys,time_from_cold_shutdown_to_full_load,fluidized_bed_technology,pulverized_coal_technology,stoker_technology,...,planned_net_winter_capacity_derate_mw,planned_derate_month,planned_derate_year,planned_new_prime_mover,planned_energy_source_1,planned_new_nameplate_capacity_mw,planned_repower_month,planned_repower_year,other_planned_modifications,other_modifications_month,other_modifications_year,multiple_fuels,cofire_fuels,switch_between_oil_and_natural_gas,street_address,city,state,zip,owner_of_plants_reported_on_form,operator_of_plants_reported_on_form,asset_manager_of_plants_reported_on_form,other_relationships_with_plants_reported_on_form,entity_type,joint_owner,single_owner,other_owner,owner_name,owner_street_address,owner_city,owner_state,owner_zip,ownership_id,percent_owned,perc_owned,num_owners,idx,owner_1,owner_2,owner_3,owner_4,perc_owner_1,perc_owner_2,perc_owner_3,perc_owner_4,parent_1,parent_2,parent_3,parent_4,diff_parent,nameplate_cap
19075,17650,Southern Power Co,59449.0,Decatur County Solar Project,GA,Decatur,DCSP1,Solar Photovoltaic,PV,,S,X,X,,,20.0,,19.0,19.0,,N,,,OP,X,12.0,2015.0,,,N,IPP Non-CHP,2.0,X,SUN,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,N,,,"30 Ivan Allen Jr. Blvd, NW",Atlanta,GA,30308,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,southern company,,,,,20.0
19076,17650,Southern Power Co,59450.0,"Decatur Parkway Solar Project, LLC",GA,Decatur,DPSP1,Solar Photovoltaic,PV,,S,X,X,,,80.0,,80.0,80.0,,N,,,OP,X,12.0,2015.0,,,N,IPP Non-CHP,2.0,X,SUN,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,N,,,"30 Ivan Allen Jr. Blvd, NW",Atlanta,GA,30308,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,southern company,,,,,80.0
19185,7140,Georgia Power Co,59556.0,Dalton 2,GA,Murray,1,Solar Photovoltaic,PV,,S,X,X,,,6.5,,6.5,6.5,,N,,,OP,X,12.0,2014.0,,,N,Electric Utility,1.0,X,SUN,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,N,,,P O Box 4545,Atlanta,GA,30302,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,southern company,,,,,6.5
19552,7140,Georgia Power Co,59862.0,Fort Benning Solar Facility,GA,Chattahoochee,1,Solar Photovoltaic,PV,,S,X,X,,,30.0,,30.0,30.0,,N,,,OP,X,12.0,2015.0,,,N,Electric Utility,1.0,X,SUN,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,N,,,P O Box 4545,Atlanta,GA,30302,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,southern company,,,,,30.0
19553,7140,Georgia Power Co,59863.0,Fort Gordon Solar Facility,GA,Richmond,1,Solar Photovoltaic,PV,,S,X,X,,,30.0,,30.0,30.0,,N,,,OP,X,10.0,2016.0,,,N,Electric Utility,1.0,X,SUN,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,N,,,P O Box 4545,Atlanta,GA,30302,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,southern company,,,,,30.0
19554,7140,Georgia Power Co,59864.0,King's Bay Solar Facility,GA,Camden,1,Solar Photovoltaic,PV,,S,X,X,,,30.0,,30.2,30.2,,N,,,OP,X,12.0,2016.0,,,N,Electric Utility,1.0,X,SUN,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,N,,,P O Box 4545,Atlanta,GA,30302,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,southern company,,,,,30.0
19555,7140,Georgia Power Co,59865.0,Fort Stewart Solar Facility,GA,Liberty,1,Solar Photovoltaic,PV,,S,X,X,,,30.0,,30.0,30.0,,N,,,OP,X,10.0,2016.0,,,N,Electric Utility,1.0,X,SUN,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,N,,,P O Box 4545,Atlanta,GA,30302,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,southern company,,,,,30.0
19563,7140,Georgia Power Co,59876.0,Marine Corps Logistics Base Solar,GA,Dougherty,1,Solar Photovoltaic,PV,,S,X,X,,,31.0,,31.2,31.2,,N,,,OP,X,2.0,2018.0,,,N,Electric Utility,1.0,X,SUN,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,N,,,P O Box 4545,Atlanta,GA,30302,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,southern company,,,,,31.0
19567,17650,Southern Power Co,59891.0,Butler Solar Farm 20,GA,Taylor,1,Solar Photovoltaic,PV,,S,X,X,,,20.0,,20.0,20.0,,N,,,OP,X,2.0,2016.0,,,N,IPP Non-CHP,2.0,X,SUN,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,N,,,"30 Ivan Allen Jr. Blvd, NW",Atlanta,GA,30308,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,southern company,,,,,20.0
19568,17650,Southern Power Co,59894.0,Pawpaw Solar Plant,GA,Taylor,1,Solar Photovoltaic,PV,,S,X,X,,,30.0,,30.0,30.0,,N,,,OP,X,3.0,2016.0,,,N,IPP Non-CHP,2.0,X,SUN,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,N,,,"30 Ivan Allen Jr. Blvd, NW",Atlanta,GA,30308,Y,0,0,0,I,0,1,0,,,,,,,,,,,,,,,,,,,southern company,,,,,30.0
