# Week 15. 
# Data visualization : Choropleth Map

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
% matplotlib inline
import seaborn as sns

# Data manipulation

In [4]:
df = pd.read_csv('usa_county.csv')

In [5]:
max_county = df.groupby(by=['Province_State', 'county']).max().reset_index()

## aggregate & sum data by country

In [10]:
death_county_agg = df.groupby(by=['Province_State', 'county']).agg({'Deaths': sum}).reset_index()

In [11]:
confirmed_county_agg = df.groupby(by=['Province_State', 'county']).agg({'Confirmed': sum}).reset_index()

In [14]:
county_df = pd.merge(death_county_agg, confirmed_county_agg)

Merge donfirmed and death data together.

In [15]:
county_df

Unnamed: 0,Province_State,county,Deaths,Confirmed
0,Alabama,Autauga,803,35257
1,Alabama,Baldwin,882,64254
2,Alabama,Barbour,137,21840
3,Alabama,Bibb,95,12080
4,Alabama,Blount,72,15676
...,...,...,...,...
3146,Wyoming,Sweetwater,0,6606
3147,Wyoming,Teton,0,13368
3148,Wyoming,Uinta,0,8800
3149,Wyoming,Washakie,0,3008


We need to have what's called a 'fips' code.

In [16]:
fips = pd.read_csv('fips.csv')

In [18]:
fips['FIPS'] = fips['FIPS'].astype(str).str.zfill(5)

## We have State as AL, but in our original file we do not have state as AL.
## We only have 'Province State' in one file and 'State' in the other.

In [20]:
state_code = pd.read_csv('state_code.csv')

Time to think about data!

In [23]:
county_df = county_df.rename(columns={'Province_State': 'state_name'})

In [25]:
state_code = state_code.rename(columns={'state': 'state_name'})

In [28]:
fips = fips.rename(columns={'State': 'code'})

In [31]:
county_level = pd.merge(county_df, state_code, on=['state_name'], how='left')

In [34]:
county_level.isnull().sum() # see how many nulls are in there (if the merge failed or smth)

state_name    0
county        0
Deaths        0
Confirmed     0
code          0
dtype: int64

In [35]:
county_level = pd.merge(county_level, fips, on=['code', 'county'], how='left')

In [36]:
county_level

Unnamed: 0,state_name,county,Deaths,Confirmed,code,FIPS
0,Alabama,Autauga,803,35257,AL,01001
1,Alabama,Baldwin,882,64254,AL,01003
2,Alabama,Barbour,137,21840,AL,01005
3,Alabama,Bibb,95,12080,AL,01007
4,Alabama,Blount,72,15676,AL,01009
...,...,...,...,...,...,...
3146,Wyoming,Sweetwater,0,6606,WY,56037
3147,Wyoming,Teton,0,13368,WY,56039
3148,Wyoming,Uinta,0,8800,WY,56041
3149,Wyoming,Washakie,0,3008,WY,56043


In [8]:
from urllib.request import urlopen
import json

# load GeoJSON file containing the geometry information for US counties
with urlopen(r'https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)
counties['features'][0]

{'type': 'Feature',
 'properties': {'GEO_ID': '0500000US01001',
  'STATE': '01',
  'COUNTY': '001',
  'NAME': 'Autauga',
  'LSAD': 'County',
  'CENSUSAREA': 594.436},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-86.496774, 32.344437],
    [-86.717897, 32.402814],
    [-86.814912, 32.340803],
    [-86.890581, 32.502974],
    [-86.917595, 32.664169],
    [-86.71339, 32.661732],
    [-86.714219, 32.705694],
    [-86.413116, 32.707386],
    [-86.411172, 32.409937],
    [-86.496774, 32.344437]]]},
 'id': '01001'}

Reset the color? the range? <br>
1. need to look at the min max value

Save your file in html

# Now let's do the same for State level!

Let's try one more