# COVID-19 Dashboard data exploration and GeoJSON creator

In [53]:
import sys
assert sys.version_info >= (3, 5)
# Python ≥3.5 is required
import numpy as np
import pandas as pd
import json
from pathlib import Path

## Loading the data 

In [54]:
data = pd.read_csv("03-09-2023.csv")

In [55]:
data

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio
0,,,,Afghanistan,2023-03-10 04:21:03,33.939110,67.709953,209451,7896,,,Afghanistan,538.042451,3.769855
1,,,,Albania,2023-03-10 04:21:03,41.153300,20.168300,334457,3598,,,Albania,11621.968170,1.075774
2,,,,Algeria,2023-03-10 04:21:03,28.033900,1.659600,271496,6881,,,Algeria,619.132366,2.534476
3,,,,Andorra,2023-03-10 04:21:03,42.506300,1.521800,47890,165,,,Andorra,61981.492267,0.344540
4,,,,Angola,2023-03-10 04:21:03,-11.202700,17.873900,105288,1933,,,Angola,320.352770,1.835917
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4011,,,,West Bank and Gaza,2023-03-10 04:21:03,31.952200,35.233200,703228,5708,,,West Bank and Gaza,13784.956961,0.811686
4012,,,,Winter Olympics 2022,2023-03-10 04:21:03,39.904200,116.407400,535,0,,,Winter Olympics 2022,,0.000000
4013,,,,Yemen,2023-03-10 04:21:03,15.552727,48.516388,11945,2159,,,Yemen,40.048994,18.074508
4014,,,,Zambia,2023-03-10 04:21:03,-13.133897,27.849332,343135,4057,,,Zambia,1866.491630,1.182333


### GeoJSON structure

In [56]:
geoJSONDict = {

  "type": "FeatureCollection",
  "crs": {
    "type": "name",
    "properties": {
      "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
    }
  },
  "features": []
}


### Data Manipulation

In [58]:
# incident rate = (Cases/population)*100,000
# population = (Cases*100,000)/incident rate
data["Population"] = (data["Confirmed"] * 100000)/data["Incident_Rate"]
data["Cases_per_1000"] = (data["Confirmed"]/data['Population'])*1000
#data["Incident_Rate"] = (data["Deaths"] / data["Population"])* 1000
data["Case_Fatality_Ratio"] = (data["Case_Fatality_Ratio"]*100).round()
data = data[data['Confirmed'] >= data['Deaths']]
data = data.sort_values(by='Incident_Rate', ascending=False)
data

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio,Population,Cases_per_1000
3495,48301.0,Loving,Texas,US,2023-03-10 04:21:03,31.849476,-103.581857,403,1,,,"Loving, Texas, US",238461.538462,2500.0,169.0,2384.615385
755,2068.0,Denali,Alaska,US,2023-03-10 04:21:03,63.672640,-150.007611,1803,2,,,"Denali, Alaska, US",85979.971388,1100.0,2097.0,859.799714
765,2180.0,Nome,Alaska,US,2023-03-10 04:21:03,64.903207,-164.035380,7617,7,,,"Nome, Alaska, US",76139.544182,900.0,10004.0,761.395442
767,2188.0,Northwest Arctic,Alaska,US,2023-03-10 04:21:03,67.049192,-159.750395,5794,22,,,"Northwest Arctic, Alaska, US",76026.768141,3800.0,7621.0,760.267681
3468,48247.0,Jim Hogg,Texas,US,2023-03-10 04:21:03,27.044539,-98.696819,3805,23,,,"Jim Hogg, Texas, US",73173.076923,6000.0,5200.0,731.730769
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3852,90054.0,Unassigned,West Virginia,US,2023-03-10 04:21:03,,,904,9,,,"Unassigned, West Virginia, US",,10000.0,,
3922,90055.0,Unassigned,Wisconsin,US,2023-03-10 04:21:03,,,0,0,,,"Unassigned, Wisconsin, US",,,,
3954,90056.0,Unassigned,Wyoming,US,2023-01-08 23:21:00,,,0,0,,,"Unassigned, Wyoming, US",,,,
3980,,,Unknown,Ukraine,2023-03-10 04:21:03,,,589607,5793,,,"Unknown, Ukraine",,9800.0,,


#### UAE's Data within the dataframe

In [8]:
data.loc[data['Country_Region']=='United Arab Emirates']

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio,Population,Cases_per_1000
3986,,,,United Arab Emirates,2023-03-10 04:21:03,23.424076,53.847818,1053213,2349,,,United Arab Emirates,10648.841301,22.0,9890400.0,106.488413


### Summary statistics for the data dataframe

The initial idea behind these statistics were to get a rough idea on how 
many coordinate points would need to plotted on the map

In [9]:
data["Deaths"].sum()

6873116

In [10]:
data["Case_Fatality_Ratio"].sum()

568969.0

In [11]:
data["Incident_Rate"].sum()

111930162.34864745

### US Data cleaning

The US has reported COVID-19 to the county level. Which results in clutter all over the map. For the sake of simplicity, we are summarizing the US data by States

In [12]:
us_data = data.loc[data['Country_Region']=='US']
data = data[data['Country_Region']!='US']

In [13]:
us_data

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio,Population,Cases_per_1000
3495,48301.0,Loving,Texas,US,2023-03-10 04:21:03,31.849476,-103.581857,403,1,,,"Loving, Texas, US",238461.538462,25.0,169.0,2384.615385
755,2068.0,Denali,Alaska,US,2023-03-10 04:21:03,63.672640,-150.007611,1803,2,,,"Denali, Alaska, US",85979.971388,11.0,2097.0,859.799714
765,2180.0,Nome,Alaska,US,2023-03-10 04:21:03,64.903207,-164.035380,7617,7,,,"Nome, Alaska, US",76139.544182,9.0,10004.0,761.395442
767,2188.0,Northwest Arctic,Alaska,US,2023-03-10 04:21:03,67.049192,-159.750395,5794,22,,,"Northwest Arctic, Alaska, US",76026.768141,38.0,7621.0,760.267681
3468,48247.0,Jim Hogg,Texas,US,2023-03-10 04:21:03,27.044539,-98.696819,3805,23,,,"Jim Hogg, Texas, US",73173.076923,60.0,5200.0,731.730769
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3753,90051.0,Unassigned,Virginia,US,2020-12-21 13:27:30,,,0,0,,,"Unassigned, Virginia, US",,,,
3798,90053.0,Unassigned,Washington,US,2023-03-10 04:21:03,,,3297,6,,,"Unassigned, Washington, US",,18.0,,
3852,90054.0,Unassigned,West Virginia,US,2023-03-10 04:21:03,,,904,9,,,"Unassigned, West Virginia, US",,100.0,,
3922,90055.0,Unassigned,Wisconsin,US,2023-03-10 04:21:03,,,0,0,,,"Unassigned, Wisconsin, US",,,,


In [14]:
#data struct to store states
# for every state in us_data: 
# get average of Case_Fatality_Ratio, median of lat and long, country
# insert row into data

states =  us_data.Province_State.values.tolist()
states = list(dict.fromkeys(states))
tempStateDf = pd.DataFrame()

for state in states:
    tempDf = us_data.loc[us_data['Province_State']==state]
    stateDf = tempDf.groupby('Province_State', as_index=False).Case_Fatality_Ratio.mean()
    lat = tempDf.groupby('Province_State', as_index=False).Lat.median().iloc[0]['Lat']
    long = tempDf.groupby('Province_State', as_index=False).Long_.median().iloc[0]['Long_']
    cases_per_1000 = tempDf.groupby('Province_State', as_index=False).Cases_per_1000.mean().iloc[0]['Cases_per_1000']
    deaths = tempDf.groupby('Province_State', as_index=False).Deaths.sum().iloc[0]['Deaths']
    confirmed = tempDf.groupby('Province_State', as_index=False).Confirmed.sum().iloc[0]['Confirmed']
    population = tempDf.groupby('Province_State', as_index=False).Population.sum().iloc[0]['Population']
    #  country = "US"
    stateDf["Lat"] = lat
    stateDf["Long_"] = long
    stateDf["Cases_per_1000"] = cases_per_1000
    stateDf["Country_Region"] = 'US'
    stateDf["Deaths"] = deaths
    stateDf["Confirmed"] = confirmed
    stateDf["Population"] = population
    stateDf
    tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
data = data.append(tempStateDf, ignore_index=True)

  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_inde

  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  tempStateDf = tempStateDf.append(stateDf, ignore_index=True)
  data = data.append(tempStateDf, ignore_index=True)


In [15]:
tempStateDf

Unnamed: 0,Province_State,Case_Fatality_Ratio,Lat,Long_,Cases_per_1000,Country_Region,Deaths,Confirmed,Population
0,Texas,186.419608,31.783209,-98.543035,288.203523,US,93390,8466220,28995881.0
1,Alaska,39.586207,60.067639,-149.953805,457.93632,US,1486,307655,731793.0
2,Georgia,198.863354,32.749765,-83.68845,277.904769,US,42489,3068208,10617423.0
3,Kentucky,126.487603,37.579299,-84.869131,391.886589,US,18130,1718471,4467673.0
4,Colorado,124.4,38.934415,-105.451273,293.375151,US,14181,1764401,5758736.0
5,South Dakota,165.287879,44.184784,-99.042527,293.472903,US,3190,279130,884659.0
6,Florida,168.073529,29.459336,-82.31831,320.072004,US,86850,7574590,21477737.0
7,North Dakota,114.339623,47.453678,-99.964078,322.700339,US,2232,286950,762062.0
8,Tennessee,141.206186,35.929299,-86.05739,374.344324,US,29263,2515130,6829174.0
9,Wisconsin,99.333333,44.434222,-89.668273,332.086432,US,16375,2006582,5822434.0


In [16]:
data.loc[data['Country_Region']=='US']

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio,Population,Cases_per_1000
733,,,Texas,US,,31.783209,-98.543035,8466220,93390,,,,,186.419608,28995881.0,288.203523
734,,,Alaska,US,,60.067639,-149.953805,307655,1486,,,,,39.586207,731793.0,457.93632
735,,,Georgia,US,,32.749765,-83.68845,3068208,42489,,,,,198.863354,10617423.0,277.904769
736,,,Kentucky,US,,37.579299,-84.869131,1718471,18130,,,,,126.487603,4467673.0,391.886589
737,,,Colorado,US,,38.934415,-105.451273,1764401,14181,,,,,124.4,5758736.0,293.375151
738,,,South Dakota,US,,44.184784,-99.042527,279130,3190,,,,,165.287879,884659.0,293.472903
739,,,Florida,US,,29.459336,-82.31831,7574590,86850,,,,,168.073529,21477737.0,320.072004
740,,,North Dakota,US,,47.453678,-99.964078,286950,2232,,,,,114.339623,762062.0,322.700339
741,,,Tennessee,US,,35.929299,-86.05739,2515130,29263,,,,,141.206186,6829174.0,374.344324
742,,,Wisconsin,US,,44.434222,-89.668273,2006582,16375,,,,,99.333333,5822434.0,332.086432


### Country wise data for data exploration

In [17]:
country_wise_data = pd.DataFrame()

In [60]:
#data struct to store countries
# for every country in data: 
# get average of Case_Fatality_Ratio, median of lat and long, country
# insert row into data

countries =  data.Country_Region.values.tolist()
countries = list(dict.fromkeys(countries))

for country in countries:
    tempDf = data.loc[data['Country_Region']==country]
    countryDf = tempDf.groupby('Country_Region', as_index=False).Deaths.sum()
    case_fatality = tempDf.groupby('Country_Region', as_index=False).Case_Fatality_Ratio.median().iloc[0]['Case_Fatality_Ratio']
    lat = tempDf.groupby('Country_Region', as_index=False).Lat.median().iloc[0]['Lat']
    long = tempDf.groupby('Country_Region', as_index=False).Long_.median().iloc[0]['Long_']
  #  country = "US"
    countryDf["Lat"] = lat
    countryDf["Long_"] = long
    countryDf["Case_Fatality_Ratio"] = case_fatality
   # stateDf["Country_Region"] = country
    countryDf
    country_wise_data = country_wise_data.append(countryDf, ignore_index=True)

  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)

  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)

  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)

  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)

  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)

In [61]:
country_wise_data_scaled = country_wise_data
country_wise_data.sort_values(by='Case_Fatality_Ratio', ascending=False).head(100)

Unnamed: 0,Country_Region,Deaths,Lat,Long_,Case_Fatality_Ratio
597,MS Zaandam,2.0,,,222200.0
397,MS Zaandam,2.0,,,222200.0
593,Yemen,2159.0,15.552727,48.516388,180700.0
393,Yemen,2159.0,15.552727,48.516388,180700.0
586,Sudan,5017.0,12.862800,30.217600,78600.0
...,...,...,...,...,...
519,Eswatini,1425.0,-26.522500,31.465900,19200.0
305,Iran,144933.0,32.427908,53.688046,19100.0
505,Iran,144933.0,32.427908,53.688046,19100.0
299,Bolivia,22365.0,-16.290200,-63.588700,18700.0


In [20]:
country_wise_data.quantile(0.25)

Deaths                 384.000000
Lat                      3.990062
Long_                   -9.128249
Case_Fatality_Ratio     47.750000
Name: 0.25, dtype: float64

In [21]:
country_wise_data.median()

  country_wise_data.median()


Deaths                 3187.500000
Lat                      17.125350
Long_                    20.921188
Case_Fatality_Ratio     103.000000
dtype: float64

In [22]:
country_wise_data.quantile(0.75).round()

Deaths                 19176.0
Lat                       39.0
Long_                     47.0
Case_Fatality_Ratio      186.0
Name: 0.75, dtype: float64

In [23]:
country_wise_data_scaled["Case_Fatality_Ratio"] = country_wise_data_scaled["Case_Fatality_Ratio"]/2.28
country_wise_data_scaled["Deaths"] = country_wise_data_scaled["Deaths"]/2.28
country_wise_data.sort_values(by='Deaths', ascending=False).tail(100)

Unnamed: 0,Country_Region,Deaths,Lat,Long_,Case_Fatality_Ratio
171,Syria,1387.719298,34.802075,38.996815,241.666667
186,Nigeria,1383.771930,9.082000,8.675300,51.754386
151,Cambodia,1340.350877,11.550000,104.916700,96.491228
131,Kyrgyzstan,1311.842105,41.204380,74.766098,63.596491
27,Estonia,1292.105263,58.595300,25.013600,21.052632
...,...,...,...,...,...
128,Holy See,0.000000,41.902900,12.453400,0.000000
60,Tuvalu,0.000000,-7.109500,177.649300,0.000000
194,Antarctica,0.000000,-71.949900,23.347000,0.000000
197,Summer Olympics 2020,0.000000,35.649100,139.773700,0.000000


In [24]:
country_wise_data_scaled.quantile(0.25)

Deaths                 168.421053
Lat                      3.990062
Long_                   -9.128249
Case_Fatality_Ratio     20.942982
Name: 0.25, dtype: float64

In [25]:
country_wise_data_scaled.median()

  country_wise_data_scaled.median()


Deaths                 1398.026316
Lat                      17.125350
Long_                    20.921188
Case_Fatality_Ratio      45.175439
dtype: float64

In [26]:
country_wise_data_scaled.quantile(0.75).round()

Deaths                 8410.0
Lat                      39.0
Long_                    47.0
Case_Fatality_Ratio      81.0
Name: 0.75, dtype: float64

In [27]:
clean_data_case_fatality = data.drop(['FIPS', 'Admin2','Province_State','Country_Region','Last_Update','Incident_Rate','Recovered','Active','Combined_Key','Deaths','Confirmed'], axis=1)
clean_data_case_fatality = clean_data_case_fatality.dropna()
clean_data_case_fatality.loc[~(clean_data_case_fatality==0.0).all(axis=1)]
clean_data_case_fatality = clean_data_case_fatality.sort_values(by='Case_Fatality_Ratio', ascending=False)
clean_data_case_fatality

Unnamed: 0,Lat,Long_,Case_Fatality_Ratio,Population,Cases_per_1000
688,15.552727,48.516388,1807.0,29825968.0,0.400490
675,12.862800,30.217600,786.0,43849269.0,1.455646
604,19.566500,-101.706800,780.0,4825401.0,24.012927
588,19.041400,-98.206300,757.0,6604451.0,33.504526
464,-6.353049,-79.824113,730.0,1310800.0,99.094446
...,...,...,...,...,...
700,35.745200,95.995600,0.0,6030000.0,0.129685
708,32.971100,119.455000,0.0,80510000.0,0.063036
84,51.036200,3.737300,0.0,1515064.0,425.947683
50,-19.054400,-169.867200,0.0,1650.0,480.000000


In [28]:
clean_data_deaths = data.drop(['FIPS', 'Admin2','Province_State','Country_Region','Last_Update','Incident_Rate','Recovered','Active','Combined_Key','Case_Fatality_Ratio','Confirmed'], axis=1)
clean_data_deaths = clean_data_deaths.dropna()
clean_data_deaths.loc[~(clean_data_deaths==0.0).all(axis=1)]
clean_data_deaths = clean_data_deaths.sort_values(by='Deaths', ascending=False)
clean_data_deaths

Unnamed: 0,Lat,Long_,Deaths,Population,Cases_per_1000
118,52.355500,-1.174300,186138,55977200.0,369.010544
373,-23.550500,-46.633300,179039,45919049.0,140.887979
12,46.227600,2.213700,161512,65273512.0,591.641354
603,-0.789300,113.921300,160941,273523621.0,24.634893
526,19.449759,76.108221,148424,123144223.0,66.086161
...,...,...,...,...,...
581,41.902900,12.453400,0,809.0,35.846724
684,31.692700,88.092400,0,3440000.0,0.478779
697,37.269200,106.165500,0,6880000.0,0.185465
708,32.971100,119.455000,0,80510000.0,0.063036


In [29]:
clean_data_cases_per_1000 = clean_data_deaths.drop(['Deaths','Population'], axis=1)
clean_data_cases_per_1000

Unnamed: 0,Lat,Long_,Cases_per_1000
118,52.355500,-1.174300,369.010544
373,-23.550500,-46.633300,140.887979
12,46.227600,2.213700,591.641354
603,-0.789300,113.921300,24.634893
526,19.449759,76.108221,66.086161
...,...,...,...
581,41.902900,12.453400,35.846724
684,31.692700,88.092400,0.478779
697,37.269200,106.165500,0.185465
708,32.971100,119.455000,0.063036


## The algorithm that creates our GeoJSON files

In [30]:
for i in range(len(clean_data_case_fatality)):
    
    longitude = clean_data_case_fatality.iloc[i]['Long_']
    latitude = clean_data_case_fatality.iloc[i]['Lat']
    data_points = int(clean_data_case_fatality.iloc[i]['Case_Fatality_Ratio'])
    
    
    for x in range(data_points):
        geometry = {
        "geometry": {
          "coordinates": [
            longitude,
            latitude
          ]
        }
      }
        geoJSONDict["features"].append(geometry)
    

json_object = json.dumps(geoJSONDict, separators=(',', ":"))
with open("../GeoJSON/case_fatality_ratio.json", "w") as outfile:
    outfile.write(json_object)

    
geoJSONDict = {

  "type": "FeatureCollection",
  "crs": {
    "type": "name",
    "properties": {
      "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
    }
  },
  "features": []
}
    
    
    

for i in range(len(clean_data_deaths)):
    
    longitude = clean_data_deaths.iloc[i]['Long_']
    latitude = clean_data_deaths.iloc[i]['Lat']
    data_points = int(clean_data_deaths.iloc[i]['Deaths'])
    
    
    for x in range(data_points):
        geometry = {
        "geometry": {
          "coordinates": [
            longitude,
            latitude
          ]
        }
      }
        geoJSONDict["features"].append(geometry)
    

json_object = json.dumps(geoJSONDict, separators=(',', ":"))
with open("../GeoJSON/deaths.json", "w") as outfile:
    outfile.write(json_object)
    
geoJSONDict = {

  "type": "FeatureCollection",
  "crs": {
    "type": "name",
    "properties": {
      "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
    }
  },
  "features": []
}
    
    
for i in range(len(clean_data_cases_per_1000)):
    
    longitude = clean_data_case_fatality.iloc[i]['Long_']
    latitude = clean_data_case_fatality.iloc[i]['Lat']
    data_points = int(clean_data_case_fatality.iloc[i]['Cases_per_1000'])
    
    
    for x in range(data_points):
        geometry = {
        "geometry": {
          "coordinates": [
            longitude,
            latitude
          ]
        }
      }
        geoJSONDict["features"].append(geometry)
    

json_object = json.dumps(geoJSONDict, separators=(',', ":"))
with open("../GeoJSON/cases_per_1000.json", "w") as outfile:
    outfile.write(json_object)
    

## Creating the graphs

In [31]:
vaccination_data = pd.read_csv("vaccinations-by-manufacturer.csv")

In [32]:
vaccination_data

Unnamed: 0,location,date,vaccine,total_vaccinations
0,Argentina,2020-12-29,Oxford/AstraZeneca,1
1,Argentina,2020-12-29,Sinopharm/Beijing,1
2,Argentina,2020-12-29,Sputnik V,20491
3,Argentina,2020-12-30,Sputnik V,40592
4,Argentina,2020-12-31,Sputnik V,43398
...,...,...,...,...
57665,European Union,2023-03-21,Sanofi/GSK,4532
57666,European Union,2023-03-21,Sinopharm/Beijing,2319921
57667,European Union,2023-03-21,Sinovac,8891
57668,European Union,2023-03-21,Sputnik V,1845386


In [33]:
vaccination_data.dropna()

Unnamed: 0,location,date,vaccine,total_vaccinations
0,Argentina,2020-12-29,Oxford/AstraZeneca,1
1,Argentina,2020-12-29,Sinopharm/Beijing,1
2,Argentina,2020-12-29,Sputnik V,20491
3,Argentina,2020-12-30,Sputnik V,40592
4,Argentina,2020-12-31,Sputnik V,43398
...,...,...,...,...
57665,European Union,2023-03-21,Sanofi/GSK,4532
57666,European Union,2023-03-21,Sinopharm/Beijing,2319921
57667,European Union,2023-03-21,Sinovac,8891
57668,European Union,2023-03-21,Sputnik V,1845386


In [34]:
vaccination_data.drop(['location','date'],axis=1)

Unnamed: 0,vaccine,total_vaccinations
0,Oxford/AstraZeneca,1
1,Sinopharm/Beijing,1
2,Sputnik V,20491
3,Sputnik V,40592
4,Sputnik V,43398
...,...,...
57665,Sanofi/GSK,4532
57666,Sinopharm/Beijing,2319921
57667,Sinovac,8891
57668,Sputnik V,1845386


In [35]:
vaccines_list = vaccination_data.vaccine.unique().tolist()
vaccine_dict = {}
for i in range(len(vaccines_list)):
    vaccine_dict[vaccines_list[i]] = 0
vaccine_dict

{'Oxford/AstraZeneca': 0,
 'Sinopharm/Beijing': 0,
 'Sputnik V': 0,
 'Pfizer/BioNTech': 0,
 'Moderna': 0,
 'CanSino': 0,
 'Sputnik Light': 0,
 'Johnson&Johnson': 0,
 'Novavax': 0,
 'Sanofi/GSK': 0,
 'Valneva': 0,
 'Medicago': 0,
 'Sinovac': 0,
 'Covaxin': 0,
 'SKYCovione': 0}

In [36]:
vaccine_dict = {'Oxford/AstraZeneca': 0,
 'Sinopharm/Beijing': 0,
 'Pfizer/BioNTech': 0,
 'Moderna': 0,
 'Johnson&Johnson': 0,
 'Others':0}
vaccine_dict

{'Oxford/AstraZeneca': 0,
 'Sinopharm/Beijing': 0,
 'Pfizer/BioNTech': 0,
 'Moderna': 0,
 'Johnson&Johnson': 0,
 'Others': 0}

In [37]:
for i in range(len(vaccination_data)):
    currVaccine = vaccination_data.iloc[i]['vaccine']
    currCount = vaccination_data.iloc[i]['total_vaccinations']
    
    if currVaccine == 'Oxford/AstraZeneca' or currVaccine == 'Sinopharm/Beijing' or currVaccine == 'Pfizer/BioNTech' or currVaccine == 'Moderna' or currVaccine == 'Johnson&Johnson' :
        dictCount = vaccine_dict[currVaccine]
        vaccine_dict[currVaccine] = int(currCount + dictCount)
    else:
        othersCount = vaccine_dict['Others']
        vaccine_dict['Others'] = int(currCount + dictCount)

In [38]:
vaccine_dict

{'Oxford/AstraZeneca': 82093563042,
 'Sinopharm/Beijing': 31021031484,
 'Pfizer/BioNTech': 939904167926,
 'Moderna': 263357674637,
 'Johnson&Johnson': 26013600382,
 'Others': 31018721338}

In [39]:
json_object = json.dumps(vaccine_dict, separators=(',', ":"))
with open("../mapbox-protype2/src/data/vaccines.json", "w") as outfile:
    outfile.write(json_object)

In [40]:
from datetime import date

totalCases = int(data["Confirmed"].sum())
totalDeaths = int(data["Deaths"].sum())
statsDict ={
    'totalCases': f"{totalCases:,}",
    'totalDeaths':f"{totalDeaths:,}",
    'cases_per_1000':int((totalCases/data["Population"].sum())*1000),
    'lastUpdated': date.today().strftime("%d/%m/%Y")
}
statsDict

{'totalCases': '676,569,930',
 'totalDeaths': '6,873,116',
 'cases_per_1000': 87,
 'lastUpdated': '20/04/2023'}

In [41]:
json_object = json.dumps(statsDict, separators=(',', ":"))
with open("../mapbox-protype2/src/data/stats.json", "w") as outfile:
    outfile.write(json_object)

In [42]:
data['Cases_per_1000'].sum()

153675.8368342452

In [43]:
clean_data_cases_per_1000.quantile(0.25)

Lat                8.778100
Long_            -68.641025
Cases_per_1000    68.682005
Name: 0.25, dtype: float64

In [44]:
clean_data_cases_per_1000.median()

Lat                33.712259
Long_              14.662780
Cases_per_1000    164.105972
dtype: float64

In [45]:
clean_data_cases_per_1000.quantile(0.75).round()

Lat                47.0
Long_              73.0
Cases_per_1000    298.0
Name: 0.75, dtype: float64

In [46]:
clean_data_case_fatality.quantile(0.25)

Lat                         8.778100
Long_                     -68.641025
Case_Fatality_Ratio        47.000000
Population             786207.500000
Cases_per_1000             68.682005
Name: 0.25, dtype: float64

In [47]:
clean_data_case_fatality.median().round()

Lat                         34.0
Long_                       15.0
Case_Fatality_Ratio        106.0
Population             1944794.0
Cases_per_1000             164.0
dtype: float64

In [48]:
clean_data_case_fatality.quantile(0.75).round()

Lat                         47.0
Long_                       73.0
Case_Fatality_Ratio        192.0
Population             6839702.0
Cases_per_1000             298.0
Name: 0.75, dtype: float64

In [49]:
clean_data_deaths.quantile(0.25).round()

Lat                    9.0
Long_                -69.0
Deaths               630.0
Population        786208.0
Cases_per_1000        69.0
Name: 0.25, dtype: float64

In [50]:
clean_data_deaths.median().round()

Lat                    34.0
Long_                  15.0
Deaths               2706.0
Population        1944794.0
Cases_per_1000        164.0
dtype: float64

In [51]:
clean_data_deaths.quantile(0.75).round()

Lat                    47.0
Long_                  73.0
Deaths               8387.0
Population        6839702.0
Cases_per_1000        298.0
Name: 0.75, dtype: float64