In [1]:
import pandas as pd
from scipy.sparse import *
from pandas.api.types import CategoricalDtype
import numpy as np
import networkx as nx

In [2]:
def compute_probable_connectivity(input_model_name, out_folder):
    """
    Apply the interlocking world city network model to simulated data to obtain a possible world city network and
    compute node degree centrality for each simulated network
    """
    ordered_log_model = pd.read_csv('../Data/ProcessingData/{out_folder}/{input_model_name}'.format(out_folder=out_folder, input_model_name=input_model_name))
    
    original_connectivity_df = pd.read_csv('../Data/ProcessingData/{out_folder}/diad_connectivity.csv'.format(out_folder=out_folder))
    original_centrality_df = pd.read_csv('../Data/ProcessingData/{out_folder}/node_degree_centrality.csv'.format(out_folder=out_folder))
    
    
    #create interlocking world city networks for simulated data
    City_c = CategoricalDtype(sorted(ordered_log_model.City.unique()), ordered=True)
    Firm_c = CategoricalDtype(sorted(ordered_log_model.Firm.unique()), ordered=True)

    row = ordered_log_model.City.astype(City_c).cat.codes
    col = ordered_log_model.Firm.astype(Firm_c).cat.codes
    
    connectivity_dfs = []
    connectivity_alpha = []
    
    node_degree_centrality_dfs = []
    node_degree_cenrality_alpha = []
    
    for i in range(17, ordered_log_model.shape[1]):
        sparse_matrix = csr_matrix((ordered_log_model.iloc[:,i], (row, col)), shape=(City_c.categories.size, Firm_c.categories.size))
        connectivity_matrix = sparse_matrix * sparse_matrix.transpose()
        connectivity_matrix = tril(connectivity_matrix, k=-1)
        
        co_connectivity_matrix = connectivity_matrix.tocoo()
        connectivity_df = pd.DataFrame({'x_City':co_connectivity_matrix.row, 'y_City':co_connectivity_matrix.col, 'connectivity':co_connectivity_matrix.data})

        city_df = pd.DataFrame({'id':range(City_c.categories.size), 'City':City_c.categories})
        connectivity_df = connectivity_df.merge(city_df, how='left', left_on='x_City', right_on='id')
        connectivity_df = connectivity_df.merge(city_df, how='left', left_on='y_City', right_on='id')

        connectivity_df = connectivity_df.iloc[:,[2,4,6]]
        
        connectivity_df.columns = ['connectivity'+str(i), 'City_x', "City_y"]
        
        connectivity_dfs.append(connectivity_df)
        
        
        #calc node degree centrality
        G = nx.from_pandas_edgelist(connectivity_df, 'City_x', 'City_y', ['connectivity' + str(i)])

        node_degree_centrality = G.degree(weight='connectivity' + str(i))

        #save degree centrality
        node_degree_centrality_dict = dict(node_degree_centrality)
        node_degree_centrality_df = pd.DataFrame({'city':list(node_degree_centrality_dict.keys()), 'centrality':list(node_degree_centrality_dict.values())})

        node_degree_centrality_dfs.append(node_degree_centrality_df)
    
    #join all connectivyt_df with original connectivity
    result_connectivity_df = original_connectivity_df
    for c in connectivity_dfs:
        result_connectivity_df = result_connectivity_df.merge(c, on=['City_x','City_y'], how='left').fillna(0)
    
    connectivity_alpha = [0] * result_connectivity_df.shape[0]
    
    for i in range(4, result_connectivity_df.shape[1]):
        connectivity_alpha = connectivity_alpha + (result_connectivity_df.iloc[:,1].apply(int) > result_connectivity_df.iloc[:,i].apply(int)).apply(int)
    
    result_connectivity_df['alpha'] = connectivity_alpha
    
    result_connectivity_df.to_excel('../Data/CleanData/{out_folder}/result_diad_connectivity.xlsx'.format(out_folder=out_folder))
    
    #join all node degree centrality
    result_node_degree_centrality_df = original_centrality_df
    for d in node_degree_centrality_dfs:
        result_node_degree_centrality_df = result_node_degree_centrality_df.merge(d, on=['city'], how='left').fillna(0)
    
    alpha = [0] * result_node_degree_centrality_df.shape[0]
    for i in range(4, result_node_degree_centrality_df.shape[1]):
        alpha = alpha + (result_node_degree_centrality_df.iloc[:,2].apply(int) > result_node_degree_centrality_df.iloc[:,i].apply(int)).apply(int)
    
    result_node_degree_centrality_df['alpha'] = alpha
    
    result_node_degree_centrality_df.to_excel('../Data/CleanData/{out_folder}/result_node_degree_centrality.xlsx'.format(out_folder=out_folder))
    
    return result_node_degree_centrality_df

In [None]:
node_degree_dfs_2018 = compute_probable_connectivity('ordered_logistic_regression_modeling_result.csv', 'data_2018')

In [88]:
node_degree_dfs_2015 = compute_probable_connectivity('ordered_logistic_regression_modeling_result.csv', 'data_2015')

In [59]:
result_node_degree_centrality_df = pd.read_excel('../Data/CleanData/data_2018/result_node_degree_centrality.xlsx')

In [78]:
result_connectivity_df = pd.read_excel('../Data/CleanData/data_2018/result_diad_connectivity.xlsx')

In [19]:
node_degree_dfs_2018.head()

Unnamed: 0.1,Unnamed: 0,city,centrality_x,weighted_centrality,centrality_y,centrality_x.1,centrality_y.1,centrality_x.2,centrality_y.2,centrality_x.3,...,centrality_x.4,centrality_y.3,centrality_x.5,centrality_y.4,centrality_x.6,centrality_y.5,centrality_x.7,centrality_y.6,centrality,alpha
0,0,Aktobe,434,0.002263,460,479.0,70.0,412.0,1040.0,602.0,...,279.0,795.0,242,720.0,773.0,690.0,395.0,511.0,271.0,319
1,1,Aktau,290,0.001512,774,741.0,598.0,466.0,77.0,352.0,...,6.0,487.0,342,454.0,663.0,336.0,1081.0,30.0,284.0,236
2,2,Almaty,2542,0.013256,3528,3304.0,3220.0,3120.0,3706.0,3615.0,...,3294.0,1908.0,3107,3136.0,2663.0,3324.0,2934.0,3193.0,3225.0,25
3,3,Alytus,85,0.000443,1009,1138.0,762.0,195.0,540.0,578.0,...,594.0,0.0,52,463.0,488.0,56.0,328.0,814.0,159.0,92
4,4,Ashgabat,352,0.001836,832,704.0,677.0,804.0,724.0,493.0,...,530.0,672.0,609,410.0,599.0,657.0,266.0,895.0,916.0,263
