In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [2]:
conda install -c plotly plotly=4.6.0

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.


Note: you may need to restart the kernel to use updated packages.


In [3]:
import chart_studio.plotly as py
import chart_studio.tools as tls
import plotly.graph_objs as go
import plotly.tools as tls
import plotly.express as px

In [4]:
# lets load explore the data 

In [5]:
df=pd.read_csv("covid_19_clean_complete.csv",parse_dates=['Date'])

In [6]:
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Afghanistan,33.0,65.0,2020-01-22,0,0,0
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0


In [7]:
df.shape

(19836, 8)

In [8]:
df.columns

Index(['Province/State', 'Country/Region', 'Lat', 'Long', 'Date', 'Confirmed',
       'Deaths', 'Recovered'],
      dtype='object')

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19836 entries, 0 to 19835
Data columns (total 8 columns):
Province/State    6080 non-null object
Country/Region    19836 non-null object
Lat               19836 non-null float64
Long              19836 non-null float64
Date              19836 non-null datetime64[ns]
Confirmed         19836 non-null int64
Deaths            19836 non-null int64
Recovered         19836 non-null int64
dtypes: datetime64[ns](1), float64(2), int64(3), object(2)
memory usage: 1.2+ MB


In [10]:
df.describe()

Unnamed: 0,Lat,Long,Confirmed,Deaths,Recovered
count,19836.0,19836.0,19836.0,19836.0,19836.0
mean,21.456103,22.498687,880.324662,40.387225,225.932799
std,24.785712,70.688066,8331.133731,492.369584,2645.682524
min,-51.7963,-135.0,0.0,0.0,0.0
25%,7.0,-19.0208,0.0,0.0,0.0
50%,23.685,20.902977,1.0,0.0,0.0
75%,41.2044,81.0,62.0,0.0,2.0
max,71.7069,178.065,366614.0,16523.0,64014.0


In [11]:
df.isnull().sum()

Province/State    13756
Country/Region        0
Lat                   0
Long                  0
Date                  0
Confirmed             0
Deaths                0
Recovered             0
dtype: int64

In [12]:
# Rename the columns to standardize the names
df.rename(columns={'Date':'date','Province/State':'state',   'Country/Region':'country',
                  'Lat':'lat',
                  'Long':'long',
                  'Confirmed':'confirmed',
                  'Deaths':'deaths',
                  'Recovered':'recovered'}
                    , inplace=True)

In [13]:
#Add active column in the data 
#Active cases =confirmed-(deaths+recovered)
df['active']=df['confirmed']-(df['deaths']+ df['recovered'])

In [14]:
df.head()

Unnamed: 0,state,country,lat,long,date,confirmed,deaths,recovered,active
0,,Afghanistan,33.0,65.0,2020-01-22,0,0,0,0
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0,0
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0,0
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0,0
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0,0


In [33]:
# Spread of covid 19
df1 = df
df1['date'] = pd.to_datetime(df1['date'])
df1['date'] = df1['date'].dt.strftime('%m/%d/%Y')
df1 = df1.fillna('-')
fig = px.density_mapbox(df1, lat='lat', lon='long', z='confirmed', radius=20,zoom=1, hover_data=["country",'state',"confirmed"],
                        mapbox_style="carto-positron", animation_frame = 'date', range_color= [0, 1000],title='Spread of Covid-19')
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.show()


In [34]:
## Active cases around the world
top=df[df['date'] == df['date'].max()]
world=top.groupby(by='country')['confirmed','active','deaths'].sum().reset_index()
world.head(10)

Unnamed: 0,country,confirmed,active,deaths
0,Afghanistan,367,338,11
1,Albania,377,240,21
2,Algeria,1423,1160,173
3,Andorra,525,473,21
4,Angola,16,12,2
5,Antigua and Barbuda,15,15,0
6,Argentina,1554,1181,48
7,Armenia,833,763,8
8,Australia,5797,4677,40
9,Austria,12297,8614,220


In [36]:
# countries with active cases 
figure = px.choropleth(world, locations="country", 
                    locationmode='country names', color="active", 
                    hover_name="country", range_color=[1,5000], 
                    color_continuous_scale="emrld", 
                    title='countries with Active Cases')
figure.show()


In [39]:
#recovered around the world
fig = px.scatter_mapbox(top, lat="lat", lon="long", hover_name="country", hover_data=["country","recovered"],
                        color_discrete_sequence=["fuchsia"], zoom=0.5, height=300,title='Recovered count of each country' )
fig.update_layout(
    mapbox_style="white-bg",
    mapbox_layers=[
        {
            "below": 'traces',
            "sourcetype": "raster",
            "source": [
                "https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}"
            ]
        }
      ])
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.show()


In [42]:
# death around the world
world['size']=world['deaths'].pow(0.2)
fig=px.scatter_geo(world,locations="country", locationmode='country names', color="deaths",
                  hover_name="country", size="size", hover_data=['country', 'deaths'],
                  projection="natural earth", title='Death count of each country')
fig.show()

In [None]:
.