In [1]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import requests
from datetime import datetime

#collecting data from arcgis website using request package
raw= requests.get("https://services1.arcgis.com/0MSEUqKaxRlEPj5g/arcgis/rest/services/Coronavirus_2019_nCoV_Cases/FeatureServer/1/query?where=1%3D1&outFields=*&outSR=4326&f=json")
raw_json = raw.json()
df = pd.DataFrame(raw_json["features"])

In [2]:
df.head()

Unnamed: 0,attributes,geometry
0,"{'OBJECTID': 1, 'Province_State': 'Abruzzo', '...","{'x': 13.398438230000068, 'y': 42.35122196000003}"
1,"{'OBJECTID': 2, 'Province_State': 'Acre', 'Cou...","{'x': -70.81199999999995, 'y': -9.023799999999..."
2,"{'OBJECTID': 3, 'Province_State': 'Adygea Repu...","{'x': 40.152042100000074, 'y': 44.69390060000006}"
3,"{'OBJECTID': 4, 'Province_State': 'Aguascalien...","{'x': -102.29159999999996, 'y': 21.88530000000..."
4,"{'OBJECTID': 5, 'Province_State': 'Aichi', 'Co...","{'x': 137.21162100000004, 'y': 35.035551000000..."


In [3]:
# Converting the .json data format to pandas dataframe
df_list = df["attributes"].tolist()
df_final = pd.DataFrame(df_list)
df_final.set_index("OBJECTID")
df_final = df_final[["Country_Region", "Province_State", "Lat", "Long_", "Confirmed", "Deaths", "Recovered", "Last_Update"]]
df_final.head()

Unnamed: 0,Country_Region,Province_State,Lat,Long_,Confirmed,Deaths,Recovered,Last_Update
0,Italy,Abruzzo,42.351222,13.398438,3279,456,2312,1592210000000.0
1,Brazil,Acre,-9.0238,-70.812,9642,259,5108,1592210000000.0
2,Russia,Adygea Republic,44.693901,40.152042,1340,12,796,1592210000000.0
3,Mexico,Aguascalientes,21.8853,-102.2916,1431,70,1008,1592210000000.0
4,Japan,Aichi,35.035551,137.211621,515,34,467,1592210000000.0


In [4]:
df_final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 731 entries, 0 to 730
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Country_Region  731 non-null    object 
 1   Province_State  562 non-null    object 
 2   Lat             718 non-null    float64
 3   Long_           718 non-null    float64
 4   Confirmed       731 non-null    int64  
 5   Deaths          731 non-null    int64  
 6   Recovered       731 non-null    int64  
 7   Last_Update     729 non-null    float64
dtypes: float64(3), int64(3), object(2)
memory usage: 45.8+ KB


In [5]:
# Converting the last_update variable to proper timestamp
def convertTime(t):
    t = int(t)
    return datetime.fromtimestamp(t)

df_final = df_final.dropna(subset=["Last_Update"])
df_final["Province_State"].fillna(value="", inplace=True)

df_final["Last_Update"]= df_final["Last_Update"]/1000
df_final["Last_Update"] = df_final["Last_Update"].apply(convertTime)

df_final.head()

Unnamed: 0,Country_Region,Province_State,Lat,Long_,Confirmed,Deaths,Recovered,Last_Update
0,Italy,Abruzzo,42.351222,13.398438,3279,456,2312,2020-06-15 11:33:20
1,Brazil,Acre,-9.0238,-70.812,9642,259,5108,2020-06-15 11:33:20
2,Russia,Adygea Republic,44.693901,40.152042,1340,12,796,2020-06-15 11:33:20
3,Mexico,Aguascalientes,21.8853,-102.2916,1431,70,1008,2020-06-15 11:33:20
4,Japan,Aichi,35.035551,137.211621,515,34,467,2020-06-15 11:33:20


In [6]:
# i wanna see how my home country is doing and also the analysis is centered around Qatar
df_final.loc[df_final['Country_Region'] == 'Qatar']

Unnamed: 0,Country_Region,Province_State,Lat,Long_,Confirmed,Deaths,Recovered,Last_Update
627,Qatar,,25.3548,51.1839,79602,73,56898,2020-06-15 11:33:20


In [7]:
total_confirmed = df_final["Confirmed"].sum()
total_recovered = df_final["Recovered"].sum()
total_deaths = df_final["Deaths"].sum()

In [8]:
print(total_confirmed) 

7925237


In [9]:
print(total_recovered)

3783781


In [10]:
print(total_deaths)

433648


In [11]:
df_total = df_final.groupby("Country_Region", as_index=False).agg(
    {
        "Confirmed" : "sum",
        "Deaths" : "sum",
        "Recovered" : "sum"
    }
)
pd.set_option("max_rows", None)
print(df_total)

                       Country_Region  Confirmed  Deaths  Recovered
0                         Afghanistan      25527     476       5164
1                             Albania       1521      36       1044
2                             Algeria      10919     767       7606
3                             Andorra        853      51        781
4                              Angola        140       6         61
5                 Antigua and Barbuda         26       3         20
6                           Argentina      31577     833       9564
7                             Armenia      17064     285       6276
8                           Australia       7335     102       6851
9                             Austria      17109     677      16059
10                         Azerbaijan       9957     119       5583
11                            Bahamas        103      11         68
12                            Bahrain      18544      45      13197
13                         Bangladesh      87520

In [12]:
#making a list of top 10 'confirmed' countries
df_top10 = df_total.nlargest(10, "Confirmed")
top10_countries_1 = df_top10["Country_Region"].tolist()
top10_confirmed = df_top10["Confirmed"].tolist()

In [14]:
#making a list of top 10 'recovered' countries
df_top10 = df_total.nlargest(10, "Recovered")
top10_countries_2 = df_top10["Country_Region"].tolist()
top10_recovered = df_top10["Recovered"].tolist()

In [15]:
#making a list of top 10 'deaths' countries
df_top10 = df_total.nlargest(10, "Deaths")
top10_countries_3 = df_top10["Country_Region"].tolist()
top10_deaths = df_top10["Deaths"].tolist()