# Regional Analysis of a Specific Country (US)

In [1]:
from pathlib import Path
import re

import folium
import ipywidgets as widgets
import pandas as pd
import plotly.express as px
from tqdm.notebook import tqdm

## Load all data

Here we load all datasets into a dictionary which maps the filename to the corresponding `pandas.DataFrame`.

In [2]:
datasets = {f.name : pd.read_csv(f) for f in tqdm(Path('.').glob('*.csv'))}

|          | 0/? [00:00<?, ?it/s]

## Clean the dataframes

In [3]:
us_confirmed_df = datasets['time_series_covid19_confirmed_US.csv']
us_death_df = datasets['time_series_covid19_deaths_US.csv']

### Drop unused columns and merge

In [27]:
us_confirmed_agg = us_confirmed_df.filter(regex=r'Province|\d+/').groupby('Province_State').sum().sum(axis=1)
us_death_agg = us_death_df.filter(regex=r'Province|\d+/').groupby('Province_State').sum().sum(axis=1)
us_merged_df = us_confirmed_agg.to_frame().join(us_death_agg.to_frame(), lsuffix='_c', rsuffix='_d')
us_merged_df.columns = ['confirmed', 'deaths']

In [28]:
us_merged_df.head()

Unnamed: 0_level_0,confirmed,deaths
Province_State,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,34157382,550736
Alaska,3114202,15154
American Samoa,0,0
Arizona,49826182,1093698
Arkansas,19567161,304680


## Most affected states

In [35]:
top10_df = us_merged_df.sort_values(by='confirmed', ascending=False)
top10_df

Unnamed: 0_level_0,confirmed,deaths
Province_State,Unnamed: 1_level_1,Unnamed: 2_level_1
California,186306256,3193956
Texas,169495366,3099565
Florida,144932797,2742160
New York,131389249,8699868
Illinois,86767301,2292942
Georgia,68024524,1452323
New Jersey,59510408,4039460
Pennsylvania,50603766,2066221
Arizona,49826182,1093698
North Carolina,49459338,765301


### Top 10 of worst-hit states

In [36]:
fig = px.bar(top10_df.head(10).reset_index(),
             x='Province_State',
             y='confirmed',
             title=f'Top 10 (confirmed)')
fig.show()

In [37]:
fig = px.bar(top10_df.head(10).reset_index(),
             x='Province_State',
             y='deaths',
             title=f'Top 10 (deaths)')
fig.show()