In [None]:
import pandas as pd
import warnings

pd.set_option('display.max_columns', 500)

def get_state_abbr():
    return {
        "AL": "Alabama",
        "AK": "Alaska",
        "AZ": "Arizona",
        "AR": "Arkansas",
        "CA": "California",
        "CO": "Colorado",
        "CT": "Connecticut",
        "DE": "Delaware",
        "FL": "Florida",
        "GA": "Georgia",
        "HI": "Hawaii",
        "ID": "Idaho",
        "IL": "Illinois",
        "IN": "Indiana",
        "IA": "Iowa",
        "KS": "Kansas",
        "KY": "Kentucky",
        "LA": "Louisiana",
        "ME": "Maine",
        "MD": "Maryland",
        "MA": "Massachusetts",
        "MI": "Michigan",
        "MN": "Minnesota",
        "MS": "Mississippi",
        "MO": "Missouri",
        "MT": "Montana",
        "NE": "Nebraska",
        "NV": "Nevada",
        "NH": "New Hampshire",
        "NJ": "New Jersey",
        "NM": "New Mexico",
        "NY": "New York",
        "NC": "North Carolina",
        "ND": "North Dakota",
        "OH": "Ohio",
        "OK": "Oklahoma",
        "OR": "Oregon",
        "PA": "Pennsylvania",
        "RI": "Rhode Island",
        "SC": "South Carolina",
        "SD": "South Dakota",
        "TN": "Tennessee",
        "TX": "Texas",
        "UT": "Utah",
        "VT": "Vermont",
        "VA": "Virginia",
        "WA": "Washington",
        "WV": "West Virginia",
        "WI": "Wisconsin",
        "WY": "Wyoming",
    }
pd.set_option('display.max_columns', None)
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv("data/wind.csv")
data.head()

In [None]:
data_important = data[['State', 'County', 'Nameplate Capacity (MW)', 'Operating Year']]
data_important['State'] = data_important['State'].map(get_state_abbr())
operating_years = data_important['Operating Year'].unique()
data_important.head()

In [None]:
dfs = {}
for operating_year in operating_years:
    grouped = data_important[data_important['Operating Year'] == operating_year].drop(columns=['Operating Year']).groupby(['State', 'County'])
    grouped_summed = grouped.sum().reset_index()
    grouped_mean = grouped.mean().reset_index()
    grouped_count = grouped.count().reset_index()
    
    merged = grouped_summed.merge(grouped_mean, on=['State', 'County'], suffixes=('_sum', '_mean')).merge(grouped_count, on=['State', 'County']).rename(columns={'Nameplate Capacity (MW)_sum': 'Nameplate Capacity (MW) sum', 'Nameplate Capacity (MW)_mean': 'Nameplate Capacity (MW) mean', 'Nameplate Capacity (MW)': 'Number of Plants'})
    
    dfs[operating_year] = merged

## Normalize

In [None]:
# Get County Sizes
county_sizes = pd.read_csv('data/county_sizes.csv')
county_sizes.head() 

In [None]:
for year, df in dfs.items():
    merged = df.merge(county_sizes, left_on=['State', 'County'], right_on=['State', 'County'])
    merged['Nameplate Capacity (MW) sum - Norm mi2'] = merged['Nameplate Capacity (MW) sum'] / merged['area mi2']
    merged['Nameplate Capacity (MW) sum - Norm km2'] = merged['Nameplate Capacity (MW) mean'] / merged['area km2']
    
    merged['Number of Plants - Norm mi2'] = merged['Number of Plants'] / merged['area mi2']
    merged['Number of Plants - Norm km2'] = merged['Number of Plants'] / merged['area km2']
    
    dfs[year] = merged

In [None]:
dfs[2018].head()