# Table of Contents
* [Data Preparation and Basic EDA](#1)
* [Map of all Plants](#2)
* [Map of Coal Plants](#3)
* [Map of Hydroelectric Plants](#4)

In [None]:
# packages

# standard
import numpy as np
import pandas as pd
import time

# plots
import matplotlib.pyplot as plt
import matplotlib
import plotly.express as px
import seaborn as sns

# Maps
import folium

In [None]:
# load data / preview
df_2018 = pd.read_csv('../input/us-electric-power-generators/y2018.csv')
df_2018.head(10)

<a id='1'></a>
# Data Preparation and Basic EDA

In [None]:
# type conversions
df_2018.Plant_ID = df_2018.Plant_ID.astype(str)
df_2018.Nameplate = df_2018.Nameplate.astype(str)

In [None]:
# remove duplicates
df_2018['ID_2'] = df_2018.Plant_ID + '-' + df_2018.Technology
df_2018['ID_3'] = df_2018.Plant_ID + '-' + df_2018.Technology + '-' + df_2018.Nameplate
df_2018 = df_2018.drop_duplicates(subset='ID_2', keep='first') # ignoring different nameplates...
df_2018 = df_2018.reset_index(drop=True)

In [None]:
# show result w/o duplicates
df_2018.head(10)

In [None]:
# structure of data frame
df_2018.info()

In [None]:
# distribution of technology
df_2018.Technology.value_counts().plot(kind='bar')
plt.title('Technology')
plt.grid()
plt.show()

In [None]:
# distribution by state
df_2018.State.value_counts().plot(kind='bar', figsize=(14,4))
plt.title('State')
plt.grid()
plt.show()

<a id='2'></a>
# Map of all Plants

In [None]:
# add auxiliary column for colour
df_2018['COLOR'] = df_2018.Technology.astype('category').cat.codes

In [None]:
# interactive map
df_plot = df_2018

zoom_factor = 4 # inital map zoom

my_map_1 = folium.Map(location=[40,-95], zoom_start=zoom_factor)

v_min = np.min(df_plot.COLOR)
v_max = np.max(df_plot.COLOR)

for i in range(0,df_plot.shape[0]):
    v = df_2018.iloc[i]['COLOR']
    v_norm = (v-v_min)/(v_max-v_min)
    i_col = int(v_norm*256)
    current_color = matplotlib.colors.to_hex(matplotlib.cm.rainbow(i_col))
    folium.CircleMarker(
       location=[df_plot.iloc[i]['Latitude'], df_plot.iloc[i]['Longitude']],
       radius=5,
       popup='Technology: ' + df_plot.iloc[i]['Technology'] + '| Id: ' + df_plot.iloc[i]['Plant_ID'],
       color='black',
       weigth=1, # stroke width in pixel
       opacity=0.5, # stroke opacity
       fill=True,
       fill_color=current_color,
       fill_opacity=0.25
    ).add_to(my_map_1)

my_map_1 # display

<a id='3'></a>
# Map of Coal Plants

In [None]:
# interactive map - coal plants only
df_plot = df_2018[df_2018.Technology=='Coal']

zoom_factor = 4 # inital map zoom

my_map_1 = folium.Map(location=[40,-95], zoom_start=zoom_factor)

for i in range(0,df_plot.shape[0]):
    folium.CircleMarker(
       location=[df_plot.iloc[i]['Latitude'], df_plot.iloc[i]['Longitude']],
       radius=5,
       popup='Technology: ' + df_plot.iloc[i]['Technology'] + ' | Id: ' + df_plot.iloc[i]['Plant_ID'],
       color='black',
       weigth=1, # stroke width in pixel
       opacity=0.5, # stroke opacity
       fill=True,
       fill_color='orange',
       fill_opacity=0.25
    ).add_to(my_map_1)

my_map_1 # display

<a id='4'></a>
# Map of Hydroelectric Plants

In [None]:
# interactive map - hydroelectric plants only
df_plot = df_2018[df_2018.Technology=='Hydroelectric']

zoom_factor = 4 # inital map zoom

my_map_1 = folium.Map(location=[40,-95], zoom_start=zoom_factor)

for i in range(0,df_plot.shape[0]):
    folium.CircleMarker(
       location=[df_plot.iloc[i]['Latitude'], df_plot.iloc[i]['Longitude']],
       radius=5,
       popup='Technology: ' + df_plot.iloc[i]['Technology'] + ' | Id: ' + df_plot.iloc[i]['Plant_ID'],
       color='black',
       weigth=1, # stroke width in pixel
       opacity=0.5, # stroke opacity
       fill=True,
       fill_color='blue',
       fill_opacity=0.25
    ).add_to(my_map_1)

my_map_1 # display