In [20]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

pd.set_option('max_colwidth', 800)

In [21]:
# source: https://github.com/owid/covid-19-data/tree/master/public/data/
df = pd.read_csv('../data/owid-covid-data.csv', 
                 usecols=['location', 'date', 'new_cases_smoothed', 
                          'new_tests_smoothed', 'positive_rate'])

In [22]:
df.head()

Unnamed: 0,location,date,new_cases_smoothed,new_tests_smoothed,positive_rate
0,Afghanistan,2020-02-24,,,
1,Afghanistan,2020-02-25,,,
2,Afghanistan,2020-02-26,,,
3,Afghanistan,2020-02-27,,,
4,Afghanistan,2020-02-28,,,


In [23]:
df.shape

(59130, 5)

In [24]:
df['location'].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia',
       'Cameroon', 'Canada', 'Cape Verde', 'Central African Republic',
       'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo',
       'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus',
       'Czechia', 'Democratic Republic of Congo', 'Denmark', 'Djibouti',
       'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon',
       'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Grenada',
       'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana',

In [25]:
df = df[df['location'].isin(['Estonia', 'Lithuania', 'Finland'])]

In [26]:
df.shape

(985, 5)

In [27]:
et_data = df[df['location'] == 'Estonia']

In [28]:
et_data

Unnamed: 0,location,date,new_cases_smoothed,new_tests_smoothed,positive_rate
17168,Estonia,2020-02-26,,,
17169,Estonia,2020-02-27,,,
17170,Estonia,2020-02-28,,,
17171,Estonia,2020-02-29,,,
17172,Estonia,2020-03-01,,,
...,...,...,...,...,...
17483,Estonia,2021-01-06,487.286,4121.0,0.118
17484,Estonia,2021-01-07,537.286,4421.0,0.122
17485,Estonia,2021-01-08,590.143,,
17486,Estonia,2021-01-09,614.286,,


In [29]:
lt_data = df[df['location'] == 'Lithuania']

In [30]:
lt_data

Unnamed: 0,location,date,new_cases_smoothed,new_tests_smoothed,positive_rate
31119,Lithuania,2020-02-23,,,
31120,Lithuania,2020-02-28,,,
31121,Lithuania,2020-02-29,,,
31122,Lithuania,2020-03-01,,,
31123,Lithuania,2020-03-02,,,
...,...,...,...,...,...
31432,Lithuania,2021-01-05,2173.143,10389.0,0.209
31433,Lithuania,2021-01-06,1999.429,10557.0,0.189
31434,Lithuania,2021-01-07,1995.857,10590.0,0.188
31435,Lithuania,2021-01-08,1805.143,,


In [31]:
fi_data = df[df['location'] == 'Finland']

In [32]:
fi_data

Unnamed: 0,location,date,new_cases_smoothed,new_tests_smoothed,positive_rate
18439,Finland,2020-01-29,,,
18440,Finland,2020-01-30,,,
18441,Finland,2020-01-31,,,
18442,Finland,2020-02-01,,,
18443,Finland,2020-02-02,,,
...,...,...,...,...,...
18781,Finland,2021-01-05,256.857,9935.0,0.026
18782,Finland,2021-01-06,241.571,9289.0,0.026
18783,Finland,2021-01-07,237.857,,
18784,Finland,2021-01-08,237.857,,


In [33]:
def check_na(df):
    print(df[df['new_cases_smoothed'].isna()].shape)
    print(df[df['new_tests_smoothed'].isna()].shape)
    print(df[df['positive_rate'].isna()].shape)

In [34]:
check_na(et_data)

(7, 5)
(10, 5)
(10, 5)


In [35]:
check_na(lt_data)

(6, 5)
(30, 5)
(36, 5)


In [36]:
check_na(fi_data)

(5, 5)
(39, 5)
(39, 5)


In [40]:
for data in [et_data, lt_data, fi_data]:
    country = data['location'].iloc[0]
    fig = px.line(data, x="date", y="positive_rate", title=country)
    fig.show()