In [47]:
import sys
assert sys.version_info >= (3, 5)
# Python ≥3.5 is required
import sklearn
assert sklearn.__version__ >= "0.20"
import numpy as np
import os
import tarfile
import urllib
import pandas as pd
import urllib.request
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve
from sklearn import preprocessing
from sklearn import metrics
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import json
from pathlib import Path

In [48]:
data = pd.read_csv("02-19-2023.csv")

In [49]:
data

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio
0,,,,Afghanistan,2023-02-20 04:21:05,33.939110,67.709953,209107,7896,,,Afghanistan,537.158776,3.776057
1,,,,Albania,2023-02-20 04:21:05,41.153300,20.168300,334314,3596,,,Albania,11616.999097,1.075635
2,,,,Algeria,2023-02-20 04:21:05,28.033900,1.659600,271425,6881,,,Algeria,618.970454,2.535139
3,,,,Andorra,2023-02-20 04:21:05,42.506300,1.521800,47866,165,,,Andorra,61950.430337,0.344712
4,,,,Angola,2023-02-20 04:21:05,-11.202700,17.873900,105184,1931,,,Angola,320.036336,1.835831
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4011,,,,West Bank and Gaza,2023-02-20 04:21:05,31.952200,35.233200,703228,5708,,,West Bank and Gaza,13784.956961,0.811686
4012,,,,Winter Olympics 2022,2023-02-20 04:21:05,39.904200,116.407400,535,0,,,Winter Olympics 2022,,0.000000
4013,,,,Yemen,2023-02-20 04:21:05,15.552727,48.516388,11945,2159,,,Yemen,40.048994,18.074508
4014,,,,Zambia,2023-02-20 04:21:05,-13.133897,27.849332,342317,4051,,,Zambia,1862.042098,1.183406


In [50]:
geoJSONDict = {

  "type": "FeatureCollection",
  "crs": {
    "type": "name",
    "properties": {
      "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
    }
  },
  "features": []
}


In [51]:
# incident rate = (Cases/population)*100,000
# population = (Cases*100,000)/incident rate
data["Population"] = (data["Confirmed"] * 100000)/data["Incident_Rate"]
data["Cases_per_1000"] = (data["Confirmed"]/data['Population'])*1000
#data["Incident_Rate"] = (data["Deaths"] / data["Population"])* 1000
data["Case_Fatality_Ratio"] = (data["Case_Fatality_Ratio"]*100).round()
data = data[data['Confirmed'] >= data['Deaths']]
data = data.sort_values(by='Incident_Rate', ascending=False)
data.head(50)

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio,Population,Cases_per_1000
3495,48301.0,Loving,Texas,US,2023-02-20 04:21:05,31.849476,-103.581857,396,1,,,"Loving, Texas, US",234319.526627,25.0,169.0,2343.195266
755,2068.0,Denali,Alaska,US,2023-02-20 04:21:05,63.67264,-150.007611,1801,2,,,"Denali, Alaska, US",85884.597043,11.0,2097.0,858.84597
765,2180.0,Nome,Alaska,US,2023-02-20 04:21:05,64.903207,-164.03538,7547,7,,,"Nome, Alaska, US",75439.82407,9.0,10004.0,754.398241
767,2188.0,Northwest Arctic,Alaska,US,2023-02-20 04:21:05,67.049192,-159.750395,5747,22,,,"Northwest Arctic, Alaska, US",75410.051174,38.0,7621.0,754.100512
3468,48247.0,Jim Hogg,Texas,US,2023-02-20 04:21:05,27.044539,-98.696819,3792,23,,,"Jim Hogg, Texas, US",72923.076923,61.0,5200.0,729.230769
1106,13053.0,Chattahoochee,Georgia,US,2023-02-20 04:21:05,32.343412,-84.788092,7886,24,,,"Chattahoochee, Georgia, US",72302.191253,30.0,10907.0,723.021913
189,,,Faroe Islands,Denmark,2023-02-20 04:21:05,61.8926,-6.9118,34658,28,,,"Faroe Islands, Denmark",70926.020669,8.0,48865.0,709.260207
750,2050.0,Bethel,Alaska,US,2023-02-20 04:21:05,60.909805,-159.856183,12837,47,,,"Bethel, Alaska, US",69819.427826,37.0,18386.0,698.194278
606,,,,San Marino,2023-02-20 04:21:05,43.9424,12.4578,23521,122,,,San Marino,69305.792916,52.0,33938.0,693.057929
17,,,,Austria,2023-02-20 04:21:05,47.5162,14.5501,5863481,21820,,,Austria,65103.493072,37.0,9006400.0,651.034931


In [52]:
data.loc[data['Country_Region']=='United Arab Emirates']

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio,Population,Cases_per_1000
3986,,,,United Arab Emirates,2023-02-20 04:21:05,23.424076,53.847818,1051079,2349,,,United Arab Emirates,10627.264822,22.0,9890400.0,106.272648


In [53]:
data["Deaths"].sum()

6854432

In [54]:
data["Case_Fatality_Ratio"].sum()

574322.0

In [55]:
data["Incident_Rate"].sum()

111222738.00460437

In [56]:
us_data = data.loc[data['Country_Region']=='US']
data = data[data['Country_Region']!='US']

In [57]:
us_data

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key,Incident_Rate,Case_Fatality_Ratio,Population,Cases_per_1000
3495,48301.0,Loving,Texas,US,2023-02-20 04:21:05,31.849476,-103.581857,396,1,,,"Loving, Texas, US",234319.526627,25.0,169.0,2343.195266
755,2068.0,Denali,Alaska,US,2023-02-20 04:21:05,63.672640,-150.007611,1801,2,,,"Denali, Alaska, US",85884.597043,11.0,2097.0,858.845970
765,2180.0,Nome,Alaska,US,2023-02-20 04:21:05,64.903207,-164.035380,7547,7,,,"Nome, Alaska, US",75439.824070,9.0,10004.0,754.398241
767,2188.0,Northwest Arctic,Alaska,US,2023-02-20 04:21:05,67.049192,-159.750395,5747,22,,,"Northwest Arctic, Alaska, US",75410.051174,38.0,7621.0,754.100512
3468,48247.0,Jim Hogg,Texas,US,2023-02-20 04:21:05,27.044539,-98.696819,3792,23,,,"Jim Hogg, Texas, US",72923.076923,61.0,5200.0,729.230769
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3753,90051.0,Unassigned,Virginia,US,2020-12-21 13:27:30,,,0,0,,,"Unassigned, Virginia, US",,,,
3798,90053.0,Unassigned,Washington,US,2023-02-20 04:21:05,,,3303,6,,,"Unassigned, Washington, US",,18.0,,
3852,90054.0,Unassigned,West Virginia,US,2021-07-31 23:21:38,,,0,0,,,"Unassigned, West Virginia, US",,,,
3922,90055.0,Unassigned,Wisconsin,US,2023-02-20 04:21:05,,,0,0,,,"Unassigned, Wisconsin, US",,,,


In [58]:
country_wise_data = pd.DataFrame()

In [59]:
#data struct to store states
# for every state in us_data: 
# get average of Case_Fatality_Ratio, median of lat and long, country
# insert row into data

countries =  data.Country_Region.values.tolist()
countries = list(dict.fromkeys(countries))

for country in countries:
    tempDf = data.loc[data['Country_Region']==country]
    countryDf = tempDf.groupby('Country_Region', as_index=False).Deaths.sum()
    case_fatality = tempDf.groupby('Country_Region', as_index=False).Case_Fatality_Ratio.median().iloc[0]['Case_Fatality_Ratio']
    lat = tempDf.groupby('Country_Region', as_index=False).Lat.median().iloc[0]['Lat']
    long = tempDf.groupby('Country_Region', as_index=False).Long_.median().iloc[0]['Long_']
  #  country = "US"
    countryDf["Lat"] = lat
    countryDf["Long_"] = long
    countryDf["Case_Fatality_Ratio"] = case_fatality
   # stateDf["Country_Region"] = country
    countryDf
    country_wise_data = country_wise_data.append(countryDf, ignore_index=True)

  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)

  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)

  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)

  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)

  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)

  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)
  country_wise_data = country_wise_data.append(countryDf, ignore_index=True)


In [60]:
country_wise_data_scaled = country_wise_data
country_wise_data.sort_values(by='Case_Fatality_Ratio', ascending=False).head(100)

Unnamed: 0,Country_Region,Deaths,Lat,Long_,Case_Fatality_Ratio
196,MS Zaandam,2,,,2222.0
192,Yemen,2159,15.552727,48.516388,1807.0
185,Sudan,5011,12.862800,30.217600,786.0
171,Syria,3164,34.802075,38.996815,551.0
181,Somalia,1361,5.152149,46.199616,498.0
...,...,...,...,...,...
70,India,530761,23.780432,78.942140,107.0
139,Venezuela,5852,6.423800,-66.589700,106.0
147,Equatorial Guinea,183,1.650800,10.267900,106.0
35,Chile,64085,-35.046900,-71.475800,104.0


In [61]:
country_wise_data.quantile(0.25)

Deaths                 380.0000
Lat                      3.9193
Long_                   -8.2245
Case_Fatality_Ratio     47.5000
Name: 0.25, dtype: float64

In [62]:
country_wise_data.median()

  country_wise_data.median()


Deaths                 3164.0000
Lat                      17.0608
Long_                    20.9394
Case_Fatality_Ratio     103.0000
dtype: float64

In [63]:
country_wise_data.quantile(0.75).round()

Deaths                 18506.0
Lat                       39.0
Long_                     47.0
Case_Fatality_Ratio      187.0
Name: 0.75, dtype: float64

In [64]:
country_wise_data_scaled["Case_Fatality_Ratio"] = country_wise_data_scaled["Case_Fatality_Ratio"]/2.28
country_wise_data_scaled["Deaths"] = country_wise_data_scaled["Deaths"]/2.28
country_wise_data.sort_values(by='Deaths', ascending=False).tail(100)

Unnamed: 0,Country_Region,Deaths,Lat,Long_,Case_Fatality_Ratio
171,Syria,1387.719298,34.802075,38.996815,241.666667
186,Nigeria,1383.771930,9.082000,8.675300,51.754386
151,Cambodia,1340.350877,11.550000,104.916700,96.491228
131,Kyrgyzstan,1311.842105,41.204380,74.766098,63.596491
27,Estonia,1283.333333,58.595300,25.013600,21.052632
...,...,...,...,...,...
60,Tuvalu,0.000000,-7.109500,177.649300,0.000000
194,Antarctica,0.000000,-71.949900,23.347000,0.000000
128,Holy See,0.000000,41.902900,12.453400,0.000000
197,Summer Olympics 2020,0.000000,35.649100,139.773700,0.000000


In [65]:
country_wise_data_scaled.quantile(0.25)

Deaths                 166.666667
Lat                      3.919300
Long_                   -8.224500
Case_Fatality_Ratio     20.833333
Name: 0.25, dtype: float64

In [66]:
country_wise_data_scaled.median()

  country_wise_data_scaled.median()


Deaths                 1387.719298
Lat                      17.060800
Long_                    20.939400
Case_Fatality_Ratio      45.175439
dtype: float64

In [67]:
country_wise_data_scaled.quantile(0.75).round()

Deaths                 8117.0
Lat                      39.0
Long_                    47.0
Case_Fatality_Ratio      82.0
Name: 0.75, dtype: float64

In [68]:
#data struct to store states
# for every state in us_data: 
# get average of Case_Fatality_Ratio, median of lat and long, country
# insert row into data

states =  us_data.Province_State.values.tolist()
states = list(dict.fromkeys(states))

for state in states:
    tempDf = us_data.loc[us_data['Province_State']==state]
    stateDf = tempDf.groupby('Province_State', as_index=False).Case_Fatality_Ratio.mean()
    lat = tempDf.groupby('Province_State', as_index=False).Lat.median().iloc[0]['Lat']
    long = tempDf.groupby('Province_State', as_index=False).Long_.median().iloc[0]['Long_']
  #  country = "US"
    stateDf["Lat"] = lat
    stateDf["Long_"] = long
   # stateDf["Country_Region"] = country
    stateDf
    data = data.append(stateDf, ignore_index=True)

  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append

  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append(stateDf, ignore_index=True)
  data = data.append

In [69]:
clean_data_case_fatality = data.drop(['FIPS', 'Admin2','Province_State','Country_Region','Last_Update','Incident_Rate','Recovered','Active','Combined_Key','Deaths','Confirmed'], axis=1)
clean_data_case_fatality = clean_data_case_fatality.dropna()
clean_data_case_fatality.loc[~(clean_data_case_fatality==0.0).all(axis=1)]
clean_data_case_fatality = clean_data_case_fatality.sort_values(by='Case_Fatality_Ratio', ascending=False)
clean_data_case_fatality.head(50)

Unnamed: 0,Lat,Long_,Case_Fatality_Ratio,Population,Cases_per_1000
688,15.552727,48.516388,1807.0,29825968.0,0.40049
605,19.5665,-101.7068,789.0,4825401.0,23.700828
675,12.8628,30.2176,786.0,43849269.0,1.454414
588,19.0414,-98.2063,764.0,6604451.0,33.159001
599,19.1738,-96.1342,731.0,8539862.0,27.453722
464,-6.353049,-79.824113,730.0,1310800.0,99.076137
485,-5.133361,-80.335861,729.0,2048000.0,88.52832
505,-9.621718,-73.444929,720.0,589100.0,77.786454
558,30.8406,-115.2838,719.0,3634868.0,47.841352
392,-14.235097,-75.574821,700.0,975200.0,133.483388


In [70]:
clean_data_deaths = data.drop(['FIPS', 'Admin2','Province_State','Country_Region','Last_Update','Incident_Rate','Recovered','Active','Combined_Key','Case_Fatality_Ratio','Confirmed'], axis=1)
clean_data_deaths = clean_data_deaths.dropna()
clean_data_deaths.loc[~(clean_data_deaths==0.0).all(axis=1)]
clean_data_deaths = clean_data_deaths.sort_values(by='Deaths', ascending=False)
clean_data_deaths.head(50)

Unnamed: 0,Lat,Long_,Deaths,Population,Cases_per_1000
118,52.3555,-1.1743,184819.0,55977200.0,367.636377
373,-23.5505,-46.6333,178840.0,45919049.0,140.343804
12,46.2276,2.2137,161090.0,65273512.0,590.42881
603,-0.7893,113.9213,160886.0,273523621.0,24.620232
525,19.449759,76.108221,148421.0,123144223.0,66.080225
479,32.427908,53.688046,144804.0,83992953.0,90.084581
262,-38.4161,-63.6167,130448.0,45195777.0,222.19191
321,51.9194,19.1451,118826.0,37846605.0,169.112421
521,-30.5595,22.9375,102595.0,59308690.0,68.461283
284,38.9637,35.2433,101492.0,84339067.0,202.073874


In [71]:
clean_data_cases_per_1000 = clean_data_deaths.drop(['Deaths','Population'], axis=1)
clean_data_cases_per_1000

Unnamed: 0,Lat,Long_,Cases_per_1000
118,52.355500,-1.174300,367.636377
373,-23.550500,-46.633300,140.343804
12,46.227600,2.213700,590.428810
603,-0.789300,113.921300,24.620232
525,19.449759,76.108221,66.080225
...,...,...,...
87,50.850300,4.351700,417.173752
56,-19.054400,-169.867200,467.272727
581,41.902900,12.453400,35.846724
80,50.400000,4.350000,429.190855


In [72]:
#The mistake made here is Im rendering EACH AND EVERY covid case 
#Instead of that, create an object with long, lat and total case count.
#Hence we should only have approx 4000 json objects instead of 600 million+
#TODO: Figure out how mapbox is gonna render it



for i in range(len(clean_data_case_fatality)):
    
    longitude = clean_data_case_fatality.iloc[i]['Long_']
    latitude = clean_data_case_fatality.iloc[i]['Lat']
    data_points = int(clean_data_case_fatality.iloc[i]['Case_Fatality_Ratio'])
    
    
    for x in range(data_points):
        geometry = {
        "geometry": {
          "coordinates": [
            longitude,
            latitude
          ]
        }
      }
        geoJSONDict["features"].append(geometry)
    

json_object = json.dumps(geoJSONDict, separators=(',', ":"))
with open("../mapbox-protype2/src/data/case_fatality_ratio.json", "w") as outfile:
    outfile.write(json_object)

    
geoJSONDict = {

  "type": "FeatureCollection",
  "crs": {
    "type": "name",
    "properties": {
      "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
    }
  },
  "features": []
}
    
    
    

for i in range(len(clean_data_deaths)):
    
    longitude = clean_data_deaths.iloc[i]['Long_']
    latitude = clean_data_deaths.iloc[i]['Lat']
    data_points = int(clean_data_deaths.iloc[i]['Deaths'])
    
    
    for x in range(data_points):
        geometry = {
        "geometry": {
          "coordinates": [
            longitude,
            latitude
          ]
        }
      }
        geoJSONDict["features"].append(geometry)
    

json_object = json.dumps(geoJSONDict, separators=(',', ":"))
with open("../mapbox-protype2/src/data/deaths.json", "w") as outfile:
    outfile.write(json_object)
    
geoJSONDict = {

  "type": "FeatureCollection",
  "crs": {
    "type": "name",
    "properties": {
      "name": "urn:ogc:def:crs:OGC:1.3:CRS84"
    }
  },
  "features": []
}
    
    
for i in range(len(clean_data_cases_per_1000)):
    
    longitude = clean_data_case_fatality.iloc[i]['Long_']
    latitude = clean_data_case_fatality.iloc[i]['Lat']
    data_points = int(clean_data_case_fatality.iloc[i]['Cases_per_1000'])
    
    
    for x in range(data_points):
        geometry = {
        "geometry": {
          "coordinates": [
            longitude,
            latitude
          ]
        }
      }
        geoJSONDict["features"].append(geometry)
    

json_object = json.dumps(geoJSONDict, separators=(',', ":"))
with open("../mapbox-protype2/src/data/cases_per_1000.json", "w") as outfile:
    outfile.write(json_object)
    

## Creating the graphs

In [73]:
vaccination_data = pd.read_csv("vaccinations-by-manufacturer.csv")

In [74]:
vaccination_data

Unnamed: 0,location,date,vaccine,total_vaccinations
0,Argentina,2020-12-29,Oxford/AstraZeneca,1
1,Argentina,2020-12-29,Sinopharm/Beijing,1
2,Argentina,2020-12-29,Sputnik V,20491
3,Argentina,2020-12-30,Sputnik V,40592
4,Argentina,2020-12-31,Sputnik V,43398
...,...,...,...,...
57665,European Union,2023-03-21,Sanofi/GSK,4532
57666,European Union,2023-03-21,Sinopharm/Beijing,2319921
57667,European Union,2023-03-21,Sinovac,8891
57668,European Union,2023-03-21,Sputnik V,1845386


In [75]:
vaccination_data.dropna()

Unnamed: 0,location,date,vaccine,total_vaccinations
0,Argentina,2020-12-29,Oxford/AstraZeneca,1
1,Argentina,2020-12-29,Sinopharm/Beijing,1
2,Argentina,2020-12-29,Sputnik V,20491
3,Argentina,2020-12-30,Sputnik V,40592
4,Argentina,2020-12-31,Sputnik V,43398
...,...,...,...,...
57665,European Union,2023-03-21,Sanofi/GSK,4532
57666,European Union,2023-03-21,Sinopharm/Beijing,2319921
57667,European Union,2023-03-21,Sinovac,8891
57668,European Union,2023-03-21,Sputnik V,1845386


In [76]:
vaccination_data.drop(['location','date'],axis=1)

Unnamed: 0,vaccine,total_vaccinations
0,Oxford/AstraZeneca,1
1,Sinopharm/Beijing,1
2,Sputnik V,20491
3,Sputnik V,40592
4,Sputnik V,43398
...,...,...
57665,Sanofi/GSK,4532
57666,Sinopharm/Beijing,2319921
57667,Sinovac,8891
57668,Sputnik V,1845386


In [77]:
vaccines_list = vaccination_data.vaccine.unique().tolist()
vaccine_dict = {}
for i in range(len(vaccines_list)):
    vaccine_dict[vaccines_list[i]] = 0
vaccine_dict

{'Oxford/AstraZeneca': 0,
 'Sinopharm/Beijing': 0,
 'Sputnik V': 0,
 'Pfizer/BioNTech': 0,
 'Moderna': 0,
 'CanSino': 0,
 'Sputnik Light': 0,
 'Johnson&Johnson': 0,
 'Novavax': 0,
 'Sanofi/GSK': 0,
 'Valneva': 0,
 'Medicago': 0,
 'Sinovac': 0,
 'Covaxin': 0,
 'SKYCovione': 0}

In [78]:
vaccine_dict = {'Oxford/AstraZeneca': 0,
 'Sinopharm/Beijing': 0,
 'Pfizer/BioNTech': 0,
 'Moderna': 0,
 'Johnson&Johnson': 0,
 'Others':0}
vaccine_dict

{'Oxford/AstraZeneca': 0,
 'Sinopharm/Beijing': 0,
 'Pfizer/BioNTech': 0,
 'Moderna': 0,
 'Johnson&Johnson': 0,
 'Others': 0}

In [79]:
for i in range(len(vaccination_data)):
    currVaccine = vaccination_data.iloc[i]['vaccine']
    currCount = vaccination_data.iloc[i]['total_vaccinations']
    
    if currVaccine == 'Oxford/AstraZeneca' or currVaccine == 'Sinopharm/Beijing' or currVaccine == 'Pfizer/BioNTech' or currVaccine == 'Moderna' or currVaccine == 'Johnson&Johnson' :
        dictCount = vaccine_dict[currVaccine]
        vaccine_dict[currVaccine] = int(currCount + dictCount)
    else:
        othersCount = vaccine_dict['Others']
        vaccine_dict['Others'] = int(currCount + dictCount)

In [80]:
vaccine_dict

{'Oxford/AstraZeneca': 82093563042,
 'Sinopharm/Beijing': 31021031484,
 'Pfizer/BioNTech': 939904167926,
 'Moderna': 263357674637,
 'Johnson&Johnson': 26013600382,
 'Others': 31018721338}

In [81]:
json_object = json.dumps(vaccine_dict, separators=(',', ":"))
with open("../mapbox-protype2/src/data/vaccines.json", "w") as outfile:
    outfile.write(json_object)

In [82]:
from datetime import date

totalCases = int(data["Confirmed"].sum())
totalDeaths = int(data["Deaths"].sum())
statsDict ={
    'totalCases': f"{totalCases:,}",
    'totalDeaths':f"{totalDeaths:,}",
    'cases_per_1000':int((totalCases/data["Population"].sum())*1000),
    'lastUpdated': date.today().strftime("%d/%m/%Y")
}
statsDict

{'totalCases': '570,920,151',
 'totalDeaths': '5,745,235',
 'cases_per_1000': 77,
 'lastUpdated': '25/03/2023'}

In [83]:
json_object = json.dumps(statsDict, separators=(',', ":"))
with open("../mapbox-protype2/src/data/stats.json", "w") as outfile:
    outfile.write(json_object)

In [84]:
data['Cases_per_1000'].sum()

136448.18359682785

In [85]:
clean_data_cases_per_1000.quantile(0.25)

Lat                7.117675
Long_            -53.446300
Cases_per_1000    63.181786
Name: 0.25, dtype: float64

In [86]:
clean_data_cases_per_1000.median()

Lat                31.876456
Long_              18.938650
Cases_per_1000    150.807089
dtype: float64

In [87]:
clean_data_cases_per_1000.quantile(0.75).round()

Lat                49.0
Long_              76.0
Cases_per_1000    280.0
Name: 0.75, dtype: float64

In [88]:
clean_data_case_fatality.quantile(0.25)

Lat                         7.117675
Long_                     -53.446300
Case_Fatality_Ratio        42.000000
Population             759621.500000
Cases_per_1000             63.181786
Name: 0.25, dtype: float64

In [96]:
clean_data_case_fatality.median().round()

Lat                         32.0
Long_                       19.0
Case_Fatality_Ratio        104.0
Population             1863491.0
Cases_per_1000             151.0
dtype: float64

In [92]:
clean_data_case_fatality.quantile(0.75).round()

Lat                         49.0
Long_                       76.0
Case_Fatality_Ratio        200.0
Population             6687597.0
Cases_per_1000             280.0
Name: 0.75, dtype: float64

In [97]:
clean_data_deaths.quantile(0.25).round()

Lat                    7.0
Long_                -53.0
Deaths               568.0
Population        759622.0
Cases_per_1000        63.0
Name: 0.25, dtype: float64

In [98]:
clean_data_deaths.median().round()

Lat                    32.0
Long_                  19.0
Deaths               2418.0
Population        1863491.0
Cases_per_1000        151.0
dtype: float64

In [99]:
clean_data_deaths.quantile(0.75).round()

Lat                    49.0
Long_                  76.0
Deaths               7309.0
Population        6687597.0
Cases_per_1000        280.0
Name: 0.75, dtype: float64