In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
%matplotlib inline

In [None]:
import numpy as np
import matplotlib.pyplot as plt

GROUP ONE QUESTIONS:

1) Make a map of power plants in the US, color coded by type
2) Make a map of air quality stations, color coded by average air quality
3) Make a map of air quality stations, color coded by worst air quality ever 
4) Map the power plants owned by the top 5 most common energy companies as red. Map the rest of the plants as dark grey.
5) Map all of the nuclear power plants in red, with the rest of the plants as dark grey.
6) Map power plants that produce energy through both natural gas and oil.

In [None]:
states = gpd.read_file("cb_2016_us_state_500k")
states

In [None]:
# Or could do something like: states = states[~states.NAME.isin(['Hawaii', 'Alaska'])]
states.drop(states.index[[32,33,53,54,55]], inplace = True)

In [None]:
states.crs = {'init': 'epsg:5071'}

In [None]:
states.plot(scheme='quantiles')

In [None]:
powerplants = pd.read_csv('powerplants.csv')
powerplants.head()

In [None]:
def make_point(row):
    return Point(row.Longitude, row.Latitude)
points = powerplants.apply(make_point, axis=1)
powerplants = gpd.GeoDataFrame(powerplants, geometry=points)
powerplants.crs = {'init': 'epsg:5071'}
powerplants.head()

In [None]:
powerplants.plot(figsize=(50,50))

In [None]:
states.crs

In [None]:
powerplants.crs

In [None]:
#Map of US powerplants by type 
ax = states.plot(figsize=(50,50), color='grey', edgecolor='white')
powerplants.plot(markersize=2, ax=ax, color='red', column='PrimSource') 
ax.axis('off') 
ax.set_xlim = ([-130, -60])
ax.set_ylim = ([25,50]) 

In [None]:
#2) Make a map of air quality stations, color coded by average air quality 

In [None]:
air_quality = pd.read_csv('pm25_annual_fixed.csv')
air_quality.head() 

In [None]:
def make_point(row):
    return Point(row.Longitude, row.Latitude)
points = air_quality.apply(make_point, axis=1)
air_quality = gpd.GeoDataFrame(air_quality, geometry=points)
air_quality.crs = {'init': 'epsg:5071'}
air_quality.head() 

In [None]:
ax = states.plot(color='lightgrey', linewidth=0.5, edgecolor='white', figsize=(50,20))
air_quality.plot(markersize=5, alpha=0.5, ax=ax, column='50th Percentile', cmap='plasma') 
ax.axis('off') 

In [None]:
# Map all of the nuclear power plants in red, with the rest of the plants as dark grey.

In [None]:
powerplants.columns

In [None]:
powerplants['PrimSource'].value_counts() 

In [None]:
df_nuclear = powerplants[powerplants['PrimSource'] == 'nuclear']
df_nuclear

In [None]:
#This is mapping just the nuclear plants in the contiguous USA 
ax = states.plot(color='lightgrey', linewidth=0.5, edgecolor='white', figsize=(70,70))
powerplants.plot(markersize=2, alpha=0.5, ax=ax, legend=True, color='grey')
df_nuclear.plot(markersize=3, ax=ax, legend=True, color='red') 
ax.axis('off') 

In [None]:
#not_nuclear = powerplants[powerplants['PrimSource'] != 'nuclear']
#not_nuclear.head()


In [None]:
# 3) Make a map of air quality stations, color coded by worst air quality ever  

In [None]:
#air_quality.columns

In [None]:
#4) Map the power plants owned by the top 5 most common energy companies as red. Map the rest of the plants as dark grey.

In [None]:
powerplants.columns

In [None]:
#so we get the company names with Utility_Na 

In [None]:
powerplants['Utility_Na'].value_counts().head()

In [None]:
#most_companies = powerplants[powerplants['Utility_Na'] == ['Pacific Gas & Electric Co', 'Erie Boulevard Hydropower LP']]  
#most_companies

In [None]:
df_pacificorp = powerplants[powerplants['Utility_Na'] == 'PacifiCorp'] 

In [None]:
df_socal = powerplants[powerplants['Utility_Na'] == 'Southern California Edison Co'] 

In [None]:
df_sustainable = powerplants[powerplants['Utility_Na'] == 'Sustainable Power Group, LLC'] 

In [None]:
df_erie = powerplants[powerplants['Utility_Na'] == 'Erie Boulevard Hydropower LP'] 

In [None]:
df_pacific = powerplants[powerplants['Utility_Na'] == 'Pacific Gas & Electric Co']

In [None]:
ax = states.plot(color='lightgrey', linewidth=0.5, edgecolor='white', figsize=(30,30))
powerplants.plot(markersize=2, ax=ax, color='grey')
df_pacificorp.plot(markersize=3, ax=ax, color='red') 
df_socal.plot(markersize=3, ax=ax,color='red') 
df_sustainable.plot(markersize=3, ax=ax, color='red') 
df_erie.plot(markersize=3, ax=ax, color='red') 
df_pacific.plot(markersize=3, ax=ax, color='red') 

ax.axis('off') 

In [None]:
#another, faster way to do the above 

In [None]:
companies = ['Pacific Gas & Electric Co', 'Erie Boulevard Hydropower LP', 'Sustainable Power Group, LLC', 'Southern California Edison Co', 'PacifiCorp']
new_companies = powerplants[powerplants['Utility_Na'].isin(companies)]
new_companies

In [None]:
ax = states.plot(color='lightgrey', linewidth=0.5, edgecolor='white', figsize=(30,5))
powerplants.plot(markersize=2, alpha=0.5, ax=ax, color='grey')
new_companies.plot(markersize=3, ax=ax, color='red') 

In [None]:
#  6) Map power plants that produce energy through both natural gas and oil.

In [None]:
pd.set_option('display.max_columns', 29)
powerplants.head(100)

# GROUP TWO: For most of these you'll need a spatial join

Which power plants are in New York?

Which state produces the most energy from coal?

Map states with nuclear power plants in red, and states without in grey. Do the same thing for solar.

Make a map of total wind (or solar or coal) energy produced by each state, with little being a light color and high emissions being a brighter/darker color.

Make a map of worst air quality measured in each state, with high quality being light red and low quality being dark red

Which states have the most renewable plants, as a % of total plants? (include hydro, wind, solar)

In [None]:
states.head()

In [None]:
powerplants.head()

In [None]:
new_states = states[states['NAME'] == 'New York'] 
new_states.plot(color='white')

In [None]:
plants_with_states = gpd.sjoin(powerplants, states, how='inner', op='within')
plants_with_states.head(25)

In [None]:
plants_with_states['NAME'].value_counts().head()

In [None]:
#Which state produces the most energy from coal?

coal_plants = plants_with_states[plants_with_states['PrimSource'] == 'coal']
coal_plants['NAME'].value_counts().head(1)

In [None]:
# Map states with nuclear power plants in red, and states without in grey. Do the same thing for solar.
 

In [None]:
nuclear_plant_states = plants_with_states[plants_with_states['PrimSource'] == 'nuclear']

In [None]:
ax = states.plot(color='lightgrey', linewidth=0.5, edgecolor='white', figsize=(50,50))
nuclear_plant_states.plot(markersize=2, alpha=0.5, ax=ax, color='red')
ax.axis('off')

In [None]:
solar_plant_states = plants_with_states[plants_with_states['PrimSource'] == 'solar']
solar_plant_states

In [None]:
ax = states.plot(color='lightgrey', linewidth=0.5, edgecolor='white', figsize=(50,50))
solar_plant_states.plot(markersize=2, alpha=0.5, ax=ax, color='red')
ax.axis('off') 

In [None]:
# Make a map of total wind (or solar or coal) energy produced by each state, 
#with little being a light color and high emissions being a brighter/darker color.

In [None]:
df_wind = powerplants[powerplants['PrimSource'] == 'wind']

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df_wind_new = df_wind.sort_values(by = 'Wind_MW', ascending=False)
df_wind_new

In [None]:
#alpha will show where things are densely populated 
ax = states.plot(color='lightgrey', linewidth=0.5, edgecolor='white', figsize=(30,5))
df_wind_new.plot(markersize=2, alpha=0.5, ax=ax, color='blue')
ax.axis('off')
ax.set_xlim = ([-130, -50])
ax.set_ylim = ([25,50]) 

In [None]:
# Make a map of worst air quality measured in each state, with high quality being light red and low quality being dark red


In [None]:
ax = states.plot(color='lightgrey', linewidth=0.5, edgecolor='white', figsize=(30,30))
air_quality.plot(markersize=2, alpha=0.5, ax=ax, color='red', legend=False)
ax.axis('off') 
ax.set_xlim = ([-125, -60])
ax.set_ylim = ([25,40]) 