In [1]:
!wget https://covid.ourworldindata.org/data/owid-covid-data.csv

--2022-02-04 19:24:06--  https://covid.ourworldindata.org/data/owid-covid-data.csv
Resolving covid.ourworldindata.org (covid.ourworldindata.org)... 188.114.96.12, 188.114.97.12, 2a06:98c1:3121::c, ...
Connecting to covid.ourworldindata.org (covid.ourworldindata.org)|188.114.96.12|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/csv]
Saving to: ‘owid-covid-data.csv’

owid-covid-data.csv     [              <=>   ]  43.20M  16.2MB/s    in 2.7s    

2022-02-04 19:24:09 (16.2 MB/s) - ‘owid-covid-data.csv’ saved [45303410]



In [6]:
import pandas as pd
import numpy as np
import geopandas as gpd
import plotly.express as px
import streamlit as st

In [7]:
gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

Unnamed: 0,pop_est,continent,name,iso_a3,gdp_md_est,geometry
0,920938,Oceania,Fiji,FJI,8374.0,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000..."
1,53950935,Africa,Tanzania,TZA,150600.0,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982..."
2,603253,Africa,W. Sahara,ESH,906.5,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948..."
3,35623680,North America,Canada,CAN,1674000.0,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
4,326625791,North America,United States of America,USA,18560000.0,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."
...,...,...,...,...,...,...
172,7111024,Europe,Serbia,SRB,101800.0,"POLYGON ((18.82982 45.90887, 18.82984 45.90888..."
173,642550,Europe,Montenegro,MNE,10610.0,"POLYGON ((20.07070 42.58863, 19.80161 42.50009..."
174,1895250,Europe,Kosovo,-99,18490.0,"POLYGON ((20.59025 41.85541, 20.52295 42.21787..."
175,1218208,North America,Trinidad and Tobago,TTO,43570.0,"POLYGON ((-61.68000 10.76000, -61.10500 10.890..."


In [30]:
def read_data():
    df = pd.read_csv("owid-covid-data.csv")
    # Access built-in Natural Earth data via GeoPandas
    world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

    # Get a list (dataframe) of country centroids
    centroids = world.centroid
    centroid_list = pd.concat([world.name, world.iso_a3, centroids], axis=1)
    centroid_list['lon'] = centroid_list[0].apply(lambda point: point.x)
    centroid_list['lat'] = centroid_list[0].apply(lambda point: point.y)
    centroid_list = centroid_list.rename(columns={"iso_a3":"iso_code"})
    df = pd.merge(df, centroid_list[["iso_code", "lon", "lat"]], on="iso_code", how="inner")
    df = df.fillna(0)
    df["new_deaths"] = df["new_deaths"].clip(lower=0)
    df = df.sort_values("date")
    df['formatted_date'] = pd.to_datetime(df['date'])
    df['year'] = df['formatted_date'].dt.isocalendar().year.astype(str)
    df['week'] = df['formatted_date'].dt.isocalendar().week.astype(str)
    df['week'] = df['week'].apply(lambda x: x.zfill(2))
    df["yw"] = df['year'] + df['week']
    df.reset_index(inplace=True, drop=True)
    return df

In [61]:
def transform_data(df, kpis_agg):
    df_yw = (df.groupby(["yw", "iso_code", "continent", "location", "lon", "lat"])
                       .agg(kpis_agg).reset_index())
    df_yw[list(kpis_agg.keys())] = df_yw[list(kpis_agg.keys())].clip(0)
    df_yw[list(kpis_agg.keys())] = df_yw[list(kpis_agg.keys())].round(2)
    return df_yw

In [52]:
kpis_agg = {}
kpis_agg["total_cases"] = "max"
kpis_agg["new_cases"] = "sum"
kpis_agg["new_cases_smoothed"] = "sum"
kpis_agg["total_deaths"] = "max"
kpis_agg["new_deaths"] = "sum"
kpis_agg["new_deaths_smoothed"] = "sum"
kpis_agg["total_cases_per_million"] = "max"
kpis_agg["new_cases_per_million"] = "max"
kpis_agg["new_cases_smoothed_per_million"] = "max"
kpis_agg["total_deaths_per_million"] = "max"
kpis_agg["new_deaths_per_million"] = "max"
kpis_agg["new_deaths_smoothed_per_million"] = "max"
kpis_agg["hosp_patients"] = "sum"
kpis_agg["hosp_patients_per_million"] = "max"
kpis_agg["weekly_icu_admissions"] = "max"
kpis_agg["weekly_icu_admissions_per_million"] = "max"
kpis_agg["new_tests"] = "sum"
kpis_agg["total_tests"] = "max"
kpis_agg["total_tests_per_thousand"] = "max"
kpis_agg["new_tests_per_thousand"] = "max"
kpis_agg["positive_rate"] = "max"
kpis_agg["total_vaccinations"] = "max"
kpis_agg["people_vaccinated"] = "max"
kpis_agg["people_fully_vaccinated"] = "max"
kpis_agg["new_vaccinations"] = "sum"
kpis_agg["total_vaccinations_per_hundred"] = "max"
kpis_agg["people_vaccinated_per_hundred"] = "max"
kpis_agg["people_fully_vaccinated_per_hundred"] = "max"
kpis_agg["stringency_index"] = "max"
kpis_agg["population"] = "max"
kpis_agg["population_density"] = "max"

In [31]:
df = read_data()


Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [62]:
df_yw = transform_data(df, kpis_agg)

In [63]:
df_yw.head()

Unnamed: 0,yw,iso_code,continent,location,lon,lat,total_cases,new_cases,new_cases_smoothed,total_deaths,...,total_vaccinations,people_vaccinated,people_fully_vaccinated,new_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,stringency_index,population,population_density
0,202001,ARG,South America,Argentina,-65.175361,-35.446821,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45605823.0,16.18
1,202001,MEX,North America,Mexico,-102.57635,23.935372,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,130262220.0,66.44
2,202001,PER,South America,Peru,-74.391806,-9.191563,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,33359415.0,25.13
3,202001,THA,Asia,Thailand,101.006134,15.016975,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,69950844.0,135.13
4,202002,ARG,South America,Argentina,-65.175361,-35.446821,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45605823.0,16.18


In [33]:
df.to_csv("owid-covid-data-gps.csv", index=False)

In [64]:
df_yw.to_csv("owid-covid-data-gps-weekly.csv", index=False)