In [2]:
import pandas as pd
import numpy as np
import networkx as nx
import random
from scipy.stats import pearsonr
from scipy.spatial import distance
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize']=20,20

Carrega a base de voos

In [3]:
international_flights = pd.read_csv('international_graph.csv')
international_flights = international_flights[['pais_origem', 'pais_destino', 'qtde_voos']]
international_flights = international_flights.set_index(['pais_origem', 'pais_destino'])

Descomentar para testes:

countries_loc = ['Brazil', 'Italy', 'China', 'United States', 'France', 'Germany', 'Iran', 'United Kingdom']
international_flights_loc = pd.DataFrame(columns=['pais_origem', 'pais_destino', 'qtde_voos'])

for index in international_flights.index:
    if index[0] in countries_loc and index[1] in countries_loc:
        df = pd.DataFrame({'pais_origem': [index[0]], 'pais_destino': [index[1]], 'qtde_voos': [float(international_flights.loc[index])]})
        international_flights_loc = international_flights_loc.append(df, ignore_index=True)

international_flights = international_flights_loc.set_index(['pais_origem', 'pais_destino'])

In [4]:
international_flights.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,qtde_voos
pais_origem,pais_destino,Unnamed: 2_level_1
Algeria,Argentina,0.00137
Algeria,Austria,0.173973
Algeria,Belgium,0.253425
Algeria,Brazil,0.021918
Algeria,Canada,0.272603


Constrói o vetor de correlação com a média de voos entre os países

In [5]:
international_flights_avg = pd.DataFrame(columns = ['source', 'target', 'value'])
indexes = international_flights.index
international_flights_avg_list = international_flights_avg[['source', 'target']].values.tolist()


for index in indexes:
    if list(index) not in international_flights_avg_list:
        if (index[1], index[0]) not in indexes:
            value_1 = [[index[0], index[1], international_flights.loc[(index[0], index[1])].qtde_voos]]
            value_2 = [[index[1], index[0], international_flights.loc[(index[0], index[1])].qtde_voos]]
        else:
            base_value =  (international_flights.loc[(index[0], index[1])].qtde_voos + international_flights.loc[(index[1], index[0])].qtde_voos)/2
            value_1 = [[index[0], index[1], base_value]]
            value_2 = [[index[1], index[0], base_value]]

        df = pd.DataFrame(value_1, columns=['source', 'target', 'value'])
        international_flights_avg = international_flights_avg.append(df, ignore_index = True)
        df = pd.DataFrame(value_2, columns=['source', 'target', 'value'])
        international_flights_avg = international_flights_avg.append(df, ignore_index = True) 
        
        international_flights_avg_list = international_flights_avg[['source', 'target']].values.tolist()
        

In [13]:
international_flights_avg.to_csv('transition_matrix_edge_list.csv', index=False)
international_flights_avg.head()

Unnamed: 0,source,target,value
0,Algeria,Argentina,0.005479
1,Argentina,Algeria,0.005479
2,Algeria,Austria,0.141781
3,Austria,Algeria,0.141781
4,Algeria,Belgium,0.307534


Constrói o grafo com a probabilidade de voos entre países

In [14]:
country_international_flights = international_flights_avg.groupby(['source']).sum() 
transition_matrix = international_flights_avg.copy()
transition_matrix = transition_matrix.drop(columns= ['value'])

transition_matrix['prob'] = 0.

for index, row in international_flights_avg.iterrows():
    weight = float(country_international_flights.loc[row['source']])
    transition_matrix.loc[index,'prob'] = row['value']/weight


In [15]:

transition_matrix.head()

Unnamed: 0,source,target,prob
0,Algeria,Argentina,0.0003
1,Argentina,Algeria,0.000166
2,Algeria,Austria,0.007769
3,Austria,Algeria,0.00059
4,Algeria,Belgium,0.016851


In [16]:
transition_matrix_crosstab = pd.crosstab(transition_matrix['target'], transition_matrix['source'], transition_matrix['prob'], aggfunc=sum)
transition_matrix_crosstab = transition_matrix_crosstab.fillna(0)

In [17]:
transition_matrix_crosstab

source,Algeria,Anguilla,Argentina,Armenia,Aruba,Australia,Austria,Azerbaijan,Bahamas,Bahrain,...,United Kingdom,United States,Uruguay,Uzbekistan,Venezuela,Vietnam,Virgin Islands,West Bank,Western Sahara,Zambia
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Algeria,0.000000,0.0,0.000166,0.000000,0.000000,0.000000,0.000590,0.000000,0.000000,0.000128,...,0.000451,0.000012,0.000307,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
Anguilla,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
Argentina,0.000300,0.0,0.000000,0.000000,0.000466,0.000675,0.000006,0.000000,0.000000,0.000000,...,0.000894,0.008633,0.302989,0.000000,0.084052,0.000000,0.000000,0.000000,0.0,0.000000
Armenia,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000097,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
Aruba,0.000000,0.0,0.000042,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000008,0.001917,0.000613,0.000000,0.159483,0.000000,0.000000,0.000000,0.0,0.000000
Australia,0.000000,0.0,0.002537,0.000000,0.000000,0.000000,0.000211,0.000000,0.000000,0.000043,...,0.001893,0.022735,0.000000,0.000000,0.004310,0.000000,0.000000,0.000000,0.0,0.016336
Austria,0.007769,0.0,0.000042,0.021533,0.000000,0.000409,0.000000,0.009050,0.000000,0.000321,...,0.015732,0.004552,0.000000,0.000557,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
Azerbaijan,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000006,0.000000,0.000000,0.000000,...,0.000005,0.000003,0.000000,0.000557,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
Bahamas,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.001948,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000
Bahrain,0.000225,0.0,0.000000,0.000000,0.000000,0.000011,0.000043,0.000000,0.000000,0.000000,...,0.001231,0.000103,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000


Carrega o arquivo com o total diário de casos e pega o dia específico - 77

In [18]:
total_cases = pd.read_csv('total_cases_countries_normalized.csv')
total_cases = total_cases[['Name', 'Day', 'DailyCases']].set_index(['Name', 'Day'])

countries = transition_matrix_crosstab.columns.to_list()

indexes = []
for index in total_cases.index:
    indexes.append(index)

daily_cases = pd.DataFrame(columns=['n_cases'], index=[countries])

for country in countries:
    if (country, 77) not in indexes:
        daily_cases.loc[country, 'n_cases'] = 0. 
    else:
        daily_cases.loc[country, 'n_cases'] = total_cases.loc[country, 77][0]

In [19]:
daily_cases

Unnamed: 0,n_cases
Algeria,10
Anguilla,0
Argentina,9
Armenia,22
Aruba,0
Australia,77
Austria,156
Azerbaijan,0
Bahamas,0
Bahrain,7


Faz o random walk

In [20]:
def new_state_df(initial_number, transition_matrix_crosstab):

    state_df = pd.DataFrame(index=transition_matrix_crosstab.columns, columns = ['n_cases'])

    for country in state_df.index:
        if country == 'China':
            state_df.loc[country, 'n_cases'] = float(initial_number)
        else:
            state_df.loc[country, 'n_cases'] = 0.
            
    return state_df


Cria o dataframe a ser utilizado e roda o gridsearch

In [None]:
contamination_rate = [float(x)/10.0 for x in range(10,31,1)]
initial_number = [x for x in range(10,510,10)] 
correlation_df = pd.DataFrame(columns=['V', 'R', 'Euclidean Distance'])

for v in initial_number:
    for r in contamination_rate: 
        state_df = new_state_df(v, transition_matrix_crosstab)
        for i in range(78):
            if i == 0:
                state_df = transition_matrix_crosstab.dot(state_df)
            else:
                state_df = transition_matrix_crosstab.dot(state_df)
                state_df = state_df*r
                
        dist_euclidean = distance.euclidean(daily_cases, state_df)
        
        df = pd.DataFrame([{'V': v, 'R': r, 'Euclidean Distance': dist_euclidean}])
        correlation_df = correlation_df.append(df, ignore_index=True)
        
        if v==10 and r == 1.:
            min_dist = dist_euclidean
            v_min_dist = v
            r_min_dist = r
            
        elif abs(dist_euclidean)<abs(min_dist):
            min_dist = dist_euclidean
            v_min_dist = v
            r_min_dist = r
        
print('Menor distancia Euclideana: ' + str(min_dist))
print('V: ' + str(v_min_dist))
print('R: ' + str(r_min_dist))

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


In [None]:
correlation_pivot = correlation_df.pivot(index='V', columns='R', values='Euclidean Distance')
correlation_pivot

In [None]:
heatmap = sns.heatmap(correlation_pivot, annot=True)

In [None]:
state_df = new_state_df(10, transition_matrix_crosstab)

for i in range(78):
    if i == 0:
        state_df = transition_matrix_crosstab.dot(state_df)
    else:
        state_df = transition_matrix_crosstab.dot(state_df)
        state_df = state_df*1.1
        
state_df.to_csv('state_vector.csv')

In [None]:
state_df.head()