In [23]:
#import necessary libraries
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
import requests
from datetime import datetime


In [24]:
#request data from web serve, returns COVID-19 data from web service in JSON format
raw= requests.get("https://services1.arcgis.com/0MSEUqKaxRlEPj5g/arcgis/rest/services/Coronavirus_2019_nCoV_Cases/FeatureServer/1/query?where=1%3D1&outFields=*&outSR=4326&f=json")
raw_json = raw.json()
df = pd.DataFrame(raw_json["features"])

In [3]:
#preview data
df.head()

Unnamed: 0,attributes,geometry
0,"{'OBJECTID': 1, 'Province_State': 'Abruzzo', '...","{'x': 13.398438230000068, 'y': 42.35122196000003}"
1,"{'OBJECTID': 2, 'Province_State': 'Acre', 'Cou...","{'x': -70.81199999999995, 'y': -9.023799999999..."
2,"{'OBJECTID': 3, 'Province_State': 'Aguascalien...","{'x': -102.29159999999996, 'y': 21.88530000000..."
3,"{'OBJECTID': 4, 'Province_State': 'Aichi', 'Co...","{'x': 137.21162100000004, 'y': 35.035551000000..."
4,"{'OBJECTID': 5, 'Province_State': 'Akita', 'Co...","{'x': 140.408228, 'y': 39.74867900000004}"


In [4]:
#view values of attributes in more detail
df["attributes"][0]

{'OBJECTID': 1,
 'Province_State': 'Abruzzo',
 'Country_Region': 'Italy',
 'Last_Update': 1590759169000,
 'Lat': 42.35122196,
 'Long_': 13.39843823,
 'Confirmed': 3237,
 'Recovered': 2011,
 'Deaths': 402}

In [5]:
### Transform Data

In [8]:
#convert dictionary to a list
data_list = df["attributes"].tolist()
#build a new dataframe
df_final = pd.DataFrame(data_list)
#set "OBJECTID" as index for every record
df_final.set_index("OBJECTID")
#reorder columns 
df_final = df_final[["Country_Region", "Province_State", "Lat", "Long_", "Confirmed", "Recovered", "Deaths", "Last_Update"]]
#preview tranformed data
df_final.head()

Unnamed: 0,Country_Region,Province_State,Lat,Long_,Confirmed,Recovered,Deaths,Last_Update
0,Italy,Abruzzo,42.351222,13.398438,3237,2011,402,1590759000000.0
1,Brazil,Acre,-9.0238,-70.812,5600,0,122,1590759000000.0
2,Mexico,Aguascalientes,21.8853,-102.2916,769,523,33,1590759000000.0
3,Japan,Aichi,35.035551,137.211621,506,453,34,1590759000000.0
4,Japan,Akita,39.748679,140.408228,16,16,0,1590759000000.0


In [14]:
#Last_Update in milliseconds --> convert to yyyy-mm-dd and drop NAs
#fromtimestamp method
def convertTime(t):
    t = int(t)
    return datetime.fromtimestamp(t)

df_final = df_final.dropna(subset=["Last_Update"])
df_final["Province_State"].fillna(value="", inplace=True)

df_final["Last_Update"]= df_final["Last_Update"]/1000
df_final["Last_Update"] = df_final["Last_Update"].apply(convertTime)

df_final.head()

df_final["Confirmed"] = df_final[]
###something is going on with Last_Update (1969???)

Unnamed: 0,Country_Region,Province_State,Lat,Long_,Confirmed,Recovered,Deaths,Last_Update
0,Italy,Abruzzo,42.351222,13.398438,3237,2011,402,1969-12-31 18:00:01
1,Brazil,Acre,-9.0238,-70.812,5600,0,122,1969-12-31 18:00:01
2,Mexico,Aguascalientes,21.8853,-102.2916,769,523,33,1969-12-31 18:00:01
3,Japan,Aichi,35.035551,137.211621,506,453,34,1969-12-31 18:00:01
4,Japan,Akita,39.748679,140.408228,16,16,0,1969-12-31 18:00:01


In [65]:
#create a totals dataframe to show a list of top ten countries 
df_total = df_final.groupby("Country_Region", as_index=False).agg(
        {
            "Confirmed" : "sum",
            "Recovered" : "sum",
            "Deaths" : "sum"
        }
)

df_total.head()

Unnamed: 0,Country_Region,Confirmed,Recovered,Deaths
0,Afghanistan,13659,1259,246
1,Albania,1099,851,33
2,Algeria,8997,5277,630
3,Andorra,763,681,51
4,Angola,74,18,4


In [54]:
change_confirmed = df_final["Confirmed"].pct_change(fill_method='ffill')
change_confirmed

0           NaN
1      0.729997
2     -0.862679
3     -0.342003
4     -0.968379
         ...   
551    1.559816
552   -0.512605
553    0.562371
554   -0.671184
555   -0.742228
Name: Confirmed, Length: 554, dtype: float64

In [55]:
change_recovered = df_final["Recovered"].pct_change(fill_method='ffill')
change_recovered

0           NaN
1     -1.000000
2           inf
3     -0.133843
4     -0.964680
         ...   
551         NaN
552         NaN
553         NaN
554         NaN
555         NaN
Name: Recovered, Length: 554, dtype: float64

In [63]:
change_deaths = df_final["Deaths"].pct_change

In [61]:
df_final["Confirmed"].pct_change()

0           NaN
1      0.729997
2     -0.862679
3     -0.342003
4     -0.968379
         ...   
551    1.559816
552   -0.512605
553    0.562371
554   -0.671184
555   -0.742228
Name: Confirmed, Length: 554, dtype: float64

In [66]:
#calculate the daily toals of COVID-19 cases globally and top ten countries
#global
global_confirmed = df_final["Confirmed"].sum()
global_recovered = df_final["Recovered"].sum()
global_deaths = df_final["Deaths"].sum()

global_confirmed

AttributeError: 'numpy.int64' object has no attribute 'rename'

In [None]:
#top ten countries
df_top10 = df_total.nlargest(10, "Confirmed")
top10_countries_1 = df_top10["Country_Region"].tolist()
top10_confirmed = df_top10["Confirmed"].tolist()

df_top10 = df_total.nlargest(10, "Recovered")
top10_countries_2 = df_top10["Country_Region"].tolist()
top10_recovered = df_top10["Recovered"].tolist()

df_top10 = df_total.nlargest(10, "Deaths")
top10_countries_3 = df_top10["Country_Region"].tolist()
top10_deaths = df_top10["Deaths"].tolist()