## Setup

In [5]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
%matplotlib inline

pd.options.display.max_rows = 999
plt.rcParams['figure.dpi'] = 200
plt.rcParams['figure.figsize'] = [15, 10]

dataDir = '/Users/rachelanderson/Dropbox (Princeton)/Data for Tax Equity Project/'
jsonDir = '/Users/rachelanderson/Dropbox (Princeton)/Tax Equity Code/Analyses/'

## Read and fix data

In [28]:
utils = pd.read_csv(dataDir + 'eia_data/eia8602018/utility_2018.csv',header=1).fillna(0)
utils.columns = [x.lower().replace(" ","_").replace("?","") for x in utils.columns]
# owners = pd.read_csv(dataDir + 'owner_2018.csv', header=1)b

In [29]:
utils['owner_of_plants_reported_on_form'].value_counts()

Y    4400
0     639
Name: owner_of_plants_reported_on_form, dtype: int64

In [46]:
utils[utils['owner_of_plants_reported_on_form']==0];

In [54]:
gen_data = pd.read_csv(dataDir + 'eia_data/eia8602018/gen_2018.csv')
gen_data.drop(['Unnamed: 0'],axis=1,inplace=True)
gen_data = gen_data[~gen_data['Utility ID'].str.contains('NOTE', na=False)]
gen_data['Utility ID'] = pd.to_numeric(gen_data['Utility ID'])
gen_data.columns = gen_data.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('?','')

Now I want to merge the utility data with the generator data so I can do some sick analyses bro

I'll start by merging the utility list with the list of generators (all in 2018), so I'll see if any columns will conflict

In [55]:
[print(x) for x in gen_data.columns if x in utils.columns]

utility_id
utility_name
state


[None, None, None]

I'm merging by utility_id so let's fix the state variable

In [56]:
gen_data = gen_data.rename(columns={'state': 'plant_state'})

In [57]:
merged = gen_data.merge(utils, how='left', on=['utility_id','utility_name'])

Various checks assures me that the merge was successful. So now I want to merge in ownership data in a clever way

## Now merge in the owner variable in a creative way
### solution is to add owner_1 owner_1_perc, owner_2 ... etc.  for each plant

In [135]:
# S Single ownership by respondent 
# J Jointly owned with another entity
# W Wholly owned by an entity other than respondent
df = merged.join(pd.get_dummies(merged['ownership']))
df.rename(columns={'S': 'single_owner', 'W': 'other_owner', 'J':'joint_owner'},inplace=True)

In [161]:
owners = pd.read_csv(dataDir + 'eia_data/eia8602018/owner_2018.csv',header=1)
owners.columns = owners.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '').str.replace('?','')
owners['perc_owned'] = pd.to_numeric(owners['percent_owned'].str.replace("%",""))
num_owners = owners.groupby(['plant_name', 'generator_id']).apply(lambda grp: len(grp.owner_name.unique())).reset_index()
num_owners.rename(columns={0: 'num_owners'},inplace=True)
owners = owners.merge(num_owners, how='outer', on=['plant_name','generator_id'])

In [169]:
owners = owners.sort_values(['plant_name','generator_id','perc_owned'])

In [186]:
# owner_dict with key = generator; value = owners 
owner_dict = owners.groupby(['plant_name','generator_id']).owner_name.apply(lambda grp: list(grp.value_counts().index)).to_dict()

In [250]:
owners.groupby(['plant_name','generator_id']).perc_owned.apply(lambda grp: list(grp.value_counts().index)).to_dict()

AttributeError: 'numpy.ndarray' object has no attribute 'index'

In [244]:
perc_owned_dict = {}
for x in owner_dict:
    temp_dict = {}
    for i,y in enumerate(owner_dict[x]):
        print(str(y) + str(i) + " : ")
        print(perc_dict[x][i])
        temp_dict.update({y : perc_dict[x][i]})
    perc_owned_dict[x] = temp_dict

231 Dixon 74 Solar I, LLC0 : 
100.0
Dependable Highway Express Inc0 : 
100.0
Holocene Renewable Energy Fund 3, LLC0 : 
100.0
Edison Mission Energy0 : 
100.0
Edison Mission Energy0 : 
100.0
AL Sandersville0 : 
100.0
AL Sandersville0 : 
100.0
AL Sandersville0 : 
100.0
AL Sandersville0 : 
100.0
AL Sandersville0 : 
100.0
AL Sandersville0 : 
100.0
AL Sandersville0 : 
100.0
AL Sandersville0 : 
100.0
Strata Solar Farms, LLC0 : 
100.0
Solar Mission Sunstone US LLC0 : 
100.0
Solar Mission Sunstone US LLC0 : 
100.0
Apple, Inc0 : 
100.0
Apple, Inc0 : 
100.0
Apple, Inc0 : 
100.0
Wells Fargo0 : 
100.0
KeyBank0 : 
100.0
KeyBank0 : 
100.0
New York Community Bank0 : 
100.0
Key Equipment Finance0 : 
100.0
Mound Solar Owner X, LLC0 : 
100.0
Entergy Corp0 : 
100.0
Entergy Corp0 : 
100.0
Entergy Corp0 : 
100.0
Ameresco Solar Acton LLC0 : 
100.0
National Bank of Arizona0 : 
100.0
GLC-(CA) Edwards AFB,LLC0 : 
100.0
Acushnet Company0 : 
100.0
Acushnet Company0 : 
100.0
GLC-(CA) Edwards AFB,LLC0 : 
100.0
GLC-

IndexError: list index out of range

In [235]:
owner_dict[('Adams Wind Generations LLC', 'AWG')]

['BE Minnesota Wind Holdings LLC', 'Adams Wind Generations LLC']

In [212]:
owner_dict[('231 Dixon 74 Solar I, LLC', 'DIXON')][0]

'231 Dixon 74 Solar I, LLC'

In [154]:
owners.sort_values(by=['plant_name','perc_owned']).head(20)

Unnamed: 0,utility_id,utility_name,plant_code,plant_name,state,generator_id,status,owner_name,owner_street_address,owner_city,owner_state,owner_zip,ownership_id,percent_owned,perc_owned
4370,60163,Soltage LLC,61195,"231 Dixon 74 Solar I, LLC",NC,DIXON,OP,"231 Dixon 74 Solar I, LLC","c/o Soltage, LLC 66 York Street, 5th Floor",Jersey City,NJ,7302,60845,100.00%,100.0
3427,11208,Los Angeles Department of Water & Power,57871,2555 E Olympic Bl,CA,1,OP,Dependable Highway Express Inc,2555 E. Olympic Blvd,Los Angeles,CA,90023,59552,100.00%,100.0
3973,60025,Greenbacker Renewable Energy Corporation,59534,4Oaks,NC,4OAKS,OP,"Holocene Renewable Energy Fund 3, LLC","24600 W 26th Ave., Ste. 280C",Denver,CO,80211,59280,100.00%,100.0
66,23693,AES Huntington Beach LLC,335,AES Huntington Beach LLC,CA,3,RE,Edison Mission Energy,2244 WALNUT GROVE AVE,Rosemead,CA,91770,34505,100.00%,100.0
67,23693,AES Huntington Beach LLC,335,AES Huntington Beach LLC,CA,4,RE,Edison Mission Energy,2244 WALNUT GROVE AVE,Rosemead,CA,91770,34505,100.00%,100.0
2745,59417,"SEPG Operating Services, LLC ALS",55672,AL Sandersville LLC,GA,CT1,OP,AL Sandersville,9405 Arrowpoint Blvd.,Charlotte,NC,28273,57261,100.00%,100.0
2746,59417,"SEPG Operating Services, LLC ALS",55672,AL Sandersville LLC,GA,CT2,OP,AL Sandersville,9405 Arrowpoint Blvd.,Charlotte,NC,28273,57261,100.00%,100.0
2747,59417,"SEPG Operating Services, LLC ALS",55672,AL Sandersville LLC,GA,CT3,OP,AL Sandersville,9405 Arrowpoint Blvd.,Charlotte,NC,28273,57261,100.00%,100.0
2748,59417,"SEPG Operating Services, LLC ALS",55672,AL Sandersville LLC,GA,CT4,OP,AL Sandersville,9405 Arrowpoint Blvd.,Charlotte,NC,28273,57261,100.00%,100.0
2749,59417,"SEPG Operating Services, LLC ALS",55672,AL Sandersville LLC,GA,CT5,OP,AL Sandersville,9405 Arrowpoint Blvd.,Charlotte,NC,28273,57261,100.00%,100.0


In [120]:
num_owners.apply(lambda x: )

0       1
1       1
2       1
3       1
4       1
       ..
1576    1
1577    1
1578    1
1579    3
1580    1
Name: num_owners, Length: 1581, dtype: int64

In [99]:
owners.groupby(['plant_name']).percent_owned.apply(lambda grp: list(grp.value_counts().index)).to_dict()

{'231 Dixon 74 Solar I, LLC': ['100.00%'],
 '2555 E Olympic Bl': ['100.00%'],
 '4Oaks': ['100.00%'],
 'AES Huntington Beach LLC': ['100.00%'],
 'AL Sandersville LLC': ['100.00%'],
 'AM Best Farm': ['100.00%'],
 'APG New Chesapeake': ['100.00%'],
 'APG Old Bayside': ['100.00%'],
 'APP Prune A-D': ['100.00%'],
 'AT&T Anaheim': ['100.00%'],
 'AT&T Holger': ['100.00%'],
 'ATT  Kelvin': ['100.00%'],
 'ATT Jericho': ['100.00%'],
 'ATT Van Nyus': ['100.00%'],
 'AVS Lancaster 1': ['100.00%'],
 'Acadia Energy Center': ['100.00%'],
 'Acton Solar Landfill': ['100.00%'],
 'Actus Lend Lease DMAFB': ['100.00%'],
 'Acushnet AD Makepeace': ['100.00%'],
 'Acushnet Ball Plant 2': ['100.00%'],
 'Acushnet Hawes Reed Road': ['100.00%'],
 'Acushnet- High Hill': ['100.00%'],
 'Acushnet-Braley Road 1': ['100.00%'],
 'Adams Wind Generations LLC': ['20.00%', '80.00%'],
 'Adobe San Jose': ['100.00%'],
 'Adobe Solar': ['100.00%'],
 'Adrian Energy Associates LLC': ['50.00%'],
 'Advance Stores Company, Inc': ['100.

In [98]:
df['owner_2']

{'231 Dixon 74 Solar I, LLC': ['231 Dixon 74 Solar I, LLC'],
 '2555 E Olympic Bl': ['Dependable Highway Express Inc'],
 '4Oaks': ['Holocene Renewable Energy Fund 3, LLC'],
 'AES Huntington Beach LLC': ['Edison Mission Energy'],
 'AL Sandersville LLC': ['AL Sandersville'],
 'AM Best Farm': ['Strata Solar Farms, LLC'],
 'APG New Chesapeake': ['Solar Mission Sunstone US LLC'],
 'APG Old Bayside': ['Solar Mission Sunstone US LLC'],
 'APP Prune A-D': ['Apple, Inc'],
 'AT&T Anaheim': ['Wells Fargo'],
 'AT&T Holger': ['KeyBank'],
 'ATT  Kelvin': ['KeyBank'],
 'ATT Jericho': ['New York Community Bank'],
 'ATT Van Nyus': ['Key Equipment Finance'],
 'AVS Lancaster 1': ['Mound Solar Owner X, LLC'],
 'Acadia Energy Center': ['Entergy Corp'],
 'Acton Solar Landfill': ['Ameresco Solar Acton LLC'],
 'Actus Lend Lease DMAFB': ['National Bank of Arizona'],
 'Acushnet AD Makepeace': ['GLC-(CA) Edwards AFB,LLC'],
 'Acushnet Ball Plant 2': ['Acushnet Company'],
 'Acushnet Hawes Reed Road': ['GLC-(CA) Edwa

## Want to deal with multiple owners issue

In [272]:
multi_owners = merged.merge(owners,how='inner', 
             on=['utility_id','plant_name','generator_id'], 
             indicator=True,
            validate="one_to_many")

In [273]:
multi_owners['percent_owned'] = pd.to_numeric(multi_owners.percent_owned.str.strip('%'))

In [274]:
mo_unique = multi_owners.drop_duplicates(['utility_id','plant_state','generator_id'])

In [277]:
mo_lt100 = mo_unique[mo_unique['percent_owned']<100]

In [281]:
a1 = mo_unique.technology.value_counts().sort_index()
a1b = mo_lt100.technology.value_counts().sort_index()
a2 = merged.technology.value_counts().sort_index()
a1.divide(a2, fill_value=0).sort_values(ascending=False)
a1b.divide(a2, fill_value=0).sort_values(ascending=False);

This isn't fruitful... so switching gears

In [284]:
merged.to_csv(dataDir + 'merged.csv')

make dictionary of utility names for each street address -- this is important because # entries > # utilities

In [287]:
utils.street_address.nunique()

3624

In [288]:
utils.utility_name.nunique()

5025

In [47]:
utils.groupby(['street_address']).utility_name.apply(lambda grp: list(grp.value_counts().index)).to_dict()

{'-16849 102nd Street SE': ['Dakota Magic Casino'],
 '01 N.E. 7th Street': ['Western Farmers Elec Coop, Inc'],
 '1 Adelaide Street East': ['Clark Canyon Hydro, LLC', 'Dorena Hydro, LLC'],
 '1 Armorcast Rd': ['Birdsboro Power LLC'],
 '1 Ashley Place': ['Ashley Energy LLC'],
 '1 Atwell Road': ['Bassett Healthcare'],
 '1 Blanchard RD PO BOX 13190': ['Calpine Corp - Metcalf Energy Center'],
 '1 Bonnybridge Road': ['International Paper Port Wentworth Mill'],
 '1 Energy Park Way': ['City of Holland'],
 '1 Federal Street': ['American Sugar Refining, Inc.'],
 '1 Gateway Plaza': ['Metropolitan Transportation Authority'],
 '1 Glen Street': ['Finch Paper LLC'],
 '1 Gustave L Levy Pl': ['Mount Sinai Hospital'],
 '1 Hansen Rd PO BOX 266': ['Alexander & Baldwin, Inc.'],
 '1 Healthy Way': ['South Nassau Communities Hospital'],
 '1 Hoag Drive': ['Hoag Memorial Presbyterian Hospital'],
 '1 Horwith Drive': ['EIF Northampton GP, LLC'],
 '1 Infinite Loop': ['Apple, Inc'],
 '1 Landmark Sq. Suite 320': ['Gr