In [28]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
from math import pi
import math
import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy.stats  as stats
from scipy.stats import pearsonr
from sklearn import preprocessing
from datetime import datetime
import calendar
import os
from os import listdir
from os.path import isfile, join
import plotly.express as px

pd.set_option('display.max_rows', 6000)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

pd.options.display.float_format = '{:,.2f}'.format

In [17]:
df_centr = pd.read_csv('eigenvector_centrality.csv').rename(columns={'country':'Country'})

df_centr.sort_values('centrality')

Unnamed: 0,Country,centrality
116,Anguilla,0.0
141,Namibia,0.0
128,Iraq,0.0
45,Montenegro,0.0
52,Western Sahara,0.0
87,Belize,0.0
41,Laos,0.0
66,Mozambique,0.0
32,Tajikistan,0.0
5,Palestine,0.0


In [14]:
df_cases = pd.read_csv('total_cases_countries_normalized.csv')[['Name', 'Day', 'DailyCases']].rename(columns={'Name':'Country'})

df_cases

Unnamed: 0,Country,Day,DailyCases
0,Afghanistan,0,0.00
1,Afghanistan,1,0.00
2,Afghanistan,2,0.00
3,Afghanistan,3,0.00
4,Afghanistan,4,0.00
...,...,...,...
17422,Zimbabwe,135,0.00
17423,Zimbabwe,136,0.00
17424,Zimbabwe,137,5.00
17425,Zimbabwe,138,2.00


In [15]:
df_cases['ActiveCases'] = df_cases.groupby(['Country'])['DailyCases'].rolling(14).sum().reset_index(drop=True).fillna(0)

df_cases

Unnamed: 0,Country,Day,DailyCases,ActiveCases
0,Afghanistan,0,0.00,0.00
1,Afghanistan,1,0.00,0.00
2,Afghanistan,2,0.00,0.00
3,Afghanistan,3,0.00,0.00
4,Afghanistan,4,0.00,0.00
...,...,...,...,...
17422,Zimbabwe,135,0.00,3.00
17423,Zimbabwe,136,0.00,3.00
17424,Zimbabwe,137,5.00,8.00
17425,Zimbabwe,138,2.00,10.00


In [35]:
df_potential = df_cases.merge(df_centr)

df_potential['ViralPotential'] = df_potential['ActiveCases'] * df_potential['centrality']

df_potential['ViralPotential'] = df_potential['ViralPotential']/df_potential['ViralPotential'].max()

df_potential.sort_values('ViralPotential', ascending=False).head(10)

Unnamed: 0,Country,Day,DailyCases,ActiveCases,centrality,ViralPotential
12522,United States,113,37289.0,426232.0,0.15,1.0
12517,United States,108,31667.0,425791.0,0.15,1.0
12518,United States,109,30833.0,424199.0,0.15,1.0
12516,United States,107,30148.0,422943.0,0.15,0.99
12519,United States,110,32922.0,422849.0,0.15,0.99
12520,United States,111,24601.0,422052.0,0.15,0.99
12515,United States,106,26922.0,419898.0,0.15,0.99
12521,United States,112,28065.0,419556.0,0.15,0.98
12514,United States,105,25023.0,417974.0,0.15,0.98
12513,United States,104,27620.0,414546.0,0.15,0.97


In [36]:
df_pct_loss = pd.read_csv('data/flights_pct_loss_week5-22.csv').rename(columns={'pais':'Country'})

df_pct_loss['Day'] = ((df_pct_loss['valor_semana']-1) * 7) + df_pct_loss['dia_semana']

df_pct_loss

Unnamed: 0,Country,valor_semana,dia_semana,voos_soma,jan_mean,pct_loss,continent,Day
0,Algeria,5,0,117.00,130.08,0.10,Africa,28
1,Algeria,5,1,174.00,130.08,-0.34,Africa,29
2,Algeria,5,2,174.00,130.08,-0.34,Africa,30
3,Algeria,5,3,177.00,130.08,-0.36,Africa,31
4,Algeria,5,4,158.00,130.08,-0.21,Africa,32
...,...,...,...,...,...,...,...,...
8540,Malta,14,5,1.00,109.77,0.99,Europe,96
8541,Morocco,22,0,2.00,110.65,0.98,Africa,147
8542,Singapore,19,6,6.00,1038.46,0.99,Asia,132
8543,Slovakia,11,6,2.00,49.38,0.96,Europe,76


In [37]:
df_pct_loss = df_pct_loss[['Country', 'Day', 'continent', 'pct_loss']]

df_pct_loss

Unnamed: 0,Country,Day,continent,pct_loss
0,Algeria,28,Africa,0.10
1,Algeria,29,Africa,-0.34
2,Algeria,30,Africa,-0.34
3,Algeria,31,Africa,-0.36
4,Algeria,32,Africa,-0.21
...,...,...,...,...
8540,Malta,96,Europe,0.99
8541,Morocco,147,Africa,0.98
8542,Singapore,132,Asia,0.99
8543,Slovakia,76,Europe,0.96


In [38]:
df = df_potential[['Country', 'Day', 'ViralPotential']].merge(df_pct_loss)

df

Unnamed: 0,Country,Day,ViralPotential,continent,pct_loss
0,Algeria,28,0.00,Africa,0.10
1,Algeria,29,0.00,Africa,-0.34
2,Algeria,30,0.00,Africa,-0.34
3,Algeria,31,0.00,Africa,-0.36
4,Algeria,32,0.00,Africa,-0.21
...,...,...,...,...,...
6575,United States,135,0.82,North America,0.45
6576,United States,136,0.82,North America,0.44
6577,United States,137,0.80,North America,0.43
6578,United States,138,0.79,North America,0.48


In [39]:
df.corr()

Unnamed: 0,Day,ViralPotential,pct_loss
Day,1.0,0.17,0.76
ViralPotential,0.17,1.0,0.12
pct_loss,0.76,0.12,1.0


In [42]:
df = df.sort_values(['Day', 'Country'])

fig = px.scatter(df, x="ViralPotential", y="pct_loss", animation_frame="Day", animation_group="Country",
           color="continent", hover_name="Country",
                 title='Reduction in mobility vs. Total deaths',
           range_x=[0,1], range_y=[-1,1]) #, range_x=[100,100000], range_y=[25,90]


fig.show()