In [1]:
import pandas as pd
import numpy as np
import math
import glob
import re
from datetime import date, timedelta
import io
import requests

# Standard plotly imports
import plotly.graph_objects as go
from plotly.offline import iplot, init_notebook_mode
# Using plotly + cufflinks in offline mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

In [100]:
confirm  = pd.read_csv('time_series_covid19_confirmed_global.csv')
deaths = pd.read_csv('time_series_covid19_deaths_global.csv')
recovered = pd.read_csv('time_series_covid19_recovered_global.csv')

In [101]:
confirm_filtered = confirm.drop(['Province/State','Lat','Long'],axis =1)
death_filtered = deaths.drop(['Province/State','Lat','Long'],axis =1)
recovered_filtered = recovered.drop(['Province/State','Lat','Long'],axis =1)

In [102]:
df_confirm = confirm_filtered.groupby(['Country/Region']).sum().reset_index()
df_death = death_filtered.groupby(['Country/Region']).sum().reset_index()
df_recovered = recovered_filtered.groupby(['Country/Region']).sum().reset_index()

In [103]:
# Rename countries with duplicate naming conventions
def rename(df):
    df['Country/Region'].replace('Mainland China', 'China', inplace=True)
    df['Country/Region'].replace('Hong Kong SAR', 'Hong Kong', inplace=True)
    df['Country/Region'].replace(' Azerbaijan', 'Azerbaijan', inplace=True)
    df['Country/Region'].replace('Holy See', 'Vatican City', inplace=True)
    df['Country/Region'].replace('Iran (Islamic Republic of)', 'Iran', inplace=True)
    df['Country/Region'].replace('Taiwan*', 'Taiwan', inplace=True)
    df['Country/Region'].replace('Korea, South', 'South Korea', inplace=True)
    df['Country/Region'].replace('Viet Nam', 'Vietnam', inplace=True)
    df['Country/Region'].replace('Macao SAR', 'Macau', inplace=True)
    df['Country/Region'].replace('Russian Federation', 'Russia', inplace=True)
    df['Country/Region'].replace('Republic of Moldova', 'Moldova', inplace=True)
    df['Country/Region'].replace('Czechia', 'Czech Republic', inplace=True)
    df['Country/Region'].replace('Congo (Kinshasa)', 'Congo', inplace=True)
    df['Country/Region'].replace('Northern Ireland', 'United Kingdom', inplace=True)
    df['Country/Region'].replace('Republic of Korea', 'North Korea', inplace=True)
    df['Country/Region'].replace('Congo (Brazzaville)', 'Congo', inplace=True)
    df['Country/Region'].replace('Taipei and environs', 'Taiwan', inplace=True)
    df['Country/Region'].replace('Others', 'Cruise Ship', inplace=True)
    return df

In [104]:
df_confirm = rename(df_confirm)
df_death = rename(df_death)
df_recovered = rename(df_recovered)

In [46]:
# #summing the respective cases
# column_list_c = list(df_confirm)
# column_list_c.remove("Country/Region")

# column_list_d = list(df_death)
# column_list_d.remove("Country/Region")

# column_list_r = list(df_recovered)
# column_list_r.remove("Country/Region")

In [9]:
# column_list

In [105]:
# df_confirm["sum"] = df_confirm[column_list].sum(axis=1)
# df_death["sum"] = df_death[column_list].sum(axis=1)
# df_recovered["sum"] = df_recovered[column_list].sum(axis=1)

df_confirm["sum"] = df_confirm.iloc[:,-1]
df_death["sum"] = df_death.iloc[:,-1]
df_recovered["sum"] = df_recovered.iloc[:,-1]

In [106]:
# df_confirm.iloc[:,-1]

In [108]:
# df_confirm.head(1)

In [30]:
# df = df.sort_values(by='sum', ascending=False)
# df_10 = df.head(10)

In [12]:
# dfcountry = pd.read_csv('plotly_countries_and_codes.csv') 

In [13]:
# dfcountry = dfcountry.drop(['GDP (BILLIONS)'],axis =1)

In [31]:
# dfcountry.COUNTRY.unique()

In [32]:
# %matplotlib inline
# init_notebook_mode(connected=True)
# import plotly as py
# import plotly.graph_objs as go
# import pandas as pd

# data = dict (
#     type = 'choropleth',
#     locations = df_10['Country/Region'],
#     locationmode='country names',
#     colorscale = [[0, 'green'], [0.5, 'red'], [1.0, 'rgb(0, 0, 255)']],
#     z=df_10['sum'])

# map = go.Figure(data=[data])
# py.offline.plot(map)

## Confirm Cases

In [109]:
df_c = df_confirm[['Country/Region','sum']]

In [110]:
df_c.head()

Unnamed: 0,Country/Region,sum
0,Afghanistan,367
1,Albania,377
2,Algeria,1423
3,Andorra,525
4,Angola,16


In [111]:
#choropleth plot

#data to be inputed
data = [go.Choropleth(
    autocolorscale = True,
    locations = df_c['Country/Region'],
    z = df_c['sum'],
    locationmode = 'country names',
    text = df_c['Country/Region'],
    marker = go.choropleth.Marker(
        line = go.choropleth.marker.Line(
            color = 'rgb(255,255,255)',
            width = 1
        )),
    colorbar = go.choropleth.ColorBar(
        title = "Total Confirmed Cases")
)]

#layout of the map
layout = go.Layout(
    title = go.layout.Title(
        text = 'Countrywise Total Confirmed Cases (April 6th)<br>(hover for more details)',
        x=0.5
    ),
    geo = go.layout.Geo(
        scope = 'world',
        showlakes = True,
        lakecolor = 'rgb(255, 255, 255)'),
)

fig = go.Figure(data = data, layout = layout)
iplot(fig)

## Death Cases

In [112]:
df_d = df_death[['Country/Region','sum']]

In [113]:
df_d.head()

Unnamed: 0,Country/Region,sum
0,Afghanistan,11
1,Albania,21
2,Algeria,173
3,Andorra,21
4,Angola,2


In [114]:
#choropleth plot

#data to be inputed
data = [go.Choropleth(
    autocolorscale = True,
    locations = df_d['Country/Region'],
    z = df_d['sum'],
    locationmode = 'country names',
    text = df_d['Country/Region'],
    marker = go.choropleth.Marker(
        line = go.choropleth.marker.Line(
            color = 'rgb(255,255,255)',
            width = 1
        )),
    colorbar = go.choropleth.ColorBar(
        title = "Total Confirmed Deaths")
)]

#layout of the map
layout = go.Layout(
    title = go.layout.Title(
        text = 'Countrywise Total Confirmed Deaths (April 6th)<br>(hover for more details)',
        x=0.5
    ),
    geo = go.layout.Geo(
        scope = 'world',
        showlakes = True,
        lakecolor = 'rgb(255, 255, 255)'),
)

fig = go.Figure(data = data, layout = layout)
iplot(fig)

## Recovered Cases

In [115]:
df_r = df_recovered[['Country/Region','sum']]

In [116]:
df_r.head()

Unnamed: 0,Country/Region,sum
0,Afghanistan,18
1,Albania,116
2,Algeria,90
3,Andorra,31
4,Angola,2


In [117]:
#choropleth plot

#data to be inputed
data = [go.Choropleth(
    autocolorscale = True,
    locations = df_r['Country/Region'],
    z = df_r['sum'],
    locationmode = 'country names',
    text = df_r['Country/Region'],
    marker = go.choropleth.Marker(
        line = go.choropleth.marker.Line(
            color = 'rgb(255,255,255)',
            width = 1
        )),
    colorbar = go.choropleth.ColorBar(
        title = "Total Confirmed Recovered")
)]

#layout of the map
layout = go.Layout(
    title = go.layout.Title(
        text = 'Countrywise Total Recovered Cases (April 6th)<br>(hover for more details)',
        x=0.5
    ),
    geo = go.layout.Geo(
        scope = 'world',
        showlakes = True,
        lakecolor = 'rgb(255, 255, 255)'),
)

fig = go.Figure(data = data, layout = layout)
iplot(fig)

## USA
States wise breakdown

In [119]:
df_us = pd.read_csv('04-14-2020.csv')

In [120]:
df_us.head()

Unnamed: 0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,FIPS,Incident_Rate,People_Tested,People_Hospitalized,Mortality_Rate,UID,ISO3,Testing_Rate,Hospitalization_Rate
0,Alabama,US,2020-04-14 23:33:31,32.3182,-86.9023,3953,114,,3839.0,1.0,84.305541,33117.0,493.0,2.883886,84000001,USA,706.285508,12.471541
1,Alaska,US,2020-04-14 23:33:31,61.3707,-152.4044,285,9,98.0,276.0,2.0,47.678875,8348.0,32.0,3.157895,84000002,USA,1396.572809,11.22807
2,American Samoa,US,,-14.271,-170.132,0,0,,,60.0,0.0,3.0,,,16,ASM,5.391708,
3,Arizona,US,2020-04-14 23:33:31,33.7298,-111.4312,3809,131,249.0,3678.0,4.0,52.330651,44096.0,580.0,3.439223,84000004,USA,605.821053,15.227094
4,Arkansas,US,2020-04-14 23:33:31,34.9697,-92.3731,1498,32,427.0,1466.0,5.0,57.859574,21131.0,130.0,2.136182,84000005,USA,816.175343,8.678238


In [122]:
df_us = df_us[df_us['ISO3'] == 'USA']

In [124]:
df_us.Recovered = df_us.Recovered.fillna(0)

In [126]:
def state_conversion():
    us_state_abbrev = {
        'Alabama': 'AL',
        'Alaska': 'AK',
        'Arizona': 'AZ',
        'Arkansas': 'AR',
        'California': 'CA',
        'Colorado': 'CO',
        'Connecticut': 'CT',
        'Delaware': 'DE',
        'Florida': 'FL',
        'Georgia': 'GA',
        'Hawaii': 'HI',
        'Idaho': 'ID',
        'Illinois': 'IL',
        'Indiana': 'IN',
        'Iowa': 'IA',
        'Kansas': 'KS',
        'Kentucky': 'KY',
        'Louisiana': 'LA',
        'Maine': 'ME',
        'Maryland': 'MD',
        'Massachusetts': 'MA',
        'Michigan': 'MI',
        'Minnesota': 'MN',
        'Mississippi': 'MS',
        'Missouri': 'MO',
        'Montana': 'MT',
        'Nebraska': 'NE',
        'Nevada': 'NV',
        'New Hampshire': 'NH',
        'New Jersey': 'NJ',
        'New Mexico': 'NM',
        'New York': 'NY',
        'North Carolina': 'NC',
        'North Dakota': 'ND',
        'Ohio': 'OH',
        'Oklahoma': 'OK',
        'Oregon': 'OR',
        'Pennsylvania': 'PA',
        'Rhode Island': 'RI',
        'South Carolina': 'SC',
        'South Dakota': 'SD',
        'Tennessee': 'TN',
        'Texas': 'TX',
        'Utah': 'UT',
        'Vermont': 'VT',
        'Virginia': 'VA',
        'Washington': 'WA',
        'West Virginia': 'WV',
        'Wisconsin': 'WI',
        'Wyoming': 'WY',
    }
    return us_state_abbrev

In [129]:
df_us['state_abbr'] = df_us['Province_State'].map(state_conversion()) #abbrevation function
df_us = df_us[df_us.state_abbr.notna()] #handling na
# df_us.head()

In [157]:
df_us.sort_values(by= ['Confirmed'], ascending = False, inplace = True)

tt = 'State: '+df_us['Province_State'].apply(str)+'<br>'+'Confirmed: '+df_us['Confirmed'].apply(str)+'<br>'+'Deaths: '+df_us['Deaths'].apply(str)+'<br>'+'Recovered: ' +df_us['Recovered'].apply(str)
#limits = [(0,1000),(1000,10000),(10000,100000),(100000,1000000),(1000000,10000000)]
limits =[(0,1000000)]
colors = ["rgb(255,0,0)"]
#colors = ["rgb(0,116,217)","rgb(255,65,54)","rgb(133,20,75)","rgb(255,133,27)","lightgrey"]
states = []
scale = 5000

for i in range(len(limits)):
    lim = limits[i]
    df_sub = df_us[lim[0]:lim[1]]
    state = go.Scattergeo(
        locationmode = 'USA-states',
        lon = df_sub['Long_'],
        lat = df_sub['Lat'],
        text = tt,
        marker = go.scattergeo.Marker(
            size = df_us['Confirmed']/100,
            color = colors[i],
            line = go.scattergeo.marker.Line(
                width=0.5, color='rgb(40,40,40)'
            ),
            sizemode = 'area'
        ),
        name = '{0} - {1}'.format(lim[0],lim[1]) )
    states.append(state)

layout = go.Layout(
        title = go.layout.Title(
            text = 'States with most Confirmed Cases - US<br>(Hover for Break Down)',
            x=0.5
        ),
        showlegend = False,
        geo = go.layout.Geo(
            scope = 'usa',
            projection = go.layout.geo.Projection(
                type='albers usa'
            ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"
        )
    )

fig = go.Figure(data=states, layout=layout)
iplot(fig, filename='d3-bubble-map-populations')