In [1]:
import networkx as nx
import pandas as pd
import numpy as np
import sqlite3
import matplotlib.pyplot as plt
import diplomatic_exchanges as de

In [2]:
conn = sqlite3.connect('diplomatic.db')
data = de.get_data_for_regression(conn, 2005)

In [3]:
data.columns

Index(['index', 'pagerank', 'betweenness', 'closeness', 'degree', 'in_degree',
       'out_degreee', 'node_id', 'year', 'index', 'destination country',
       'destination city', 'description', 'time', 'year', 'year_aggregate',
       'ccode', 'index', 'countrycode', 'country', 'currency_unit', 'year',
       'rgdpe', 'rgdpo', 'pop', 'emp', 'avh', 'hc', 'ccon', 'cda', 'cgdpe',
       'cgdpo', 'cn', 'ck', 'ctfp', 'cwtfp', 'rgdpna', 'rconna', 'rdana',
       'rnna', 'rkna', 'rtfpna', 'rwtfpna', 'labsh', 'irr', 'delta', 'xr',
       'pl_con', 'pl_da', 'pl_gdpo', 'i_cig', 'i_xm', 'i_xr', 'i_outlier',
       'i_irr', 'cor_exp', 'statcap', 'csh_c', 'csh_i', 'csh_g', 'csh_x',
       'csh_m', 'csh_r', 'pl_c', 'pl_i', 'pl_g', 'pl_x', 'pl_m', 'pl_n',
       'pl_k', 'ccode', 'index', 'stateabb', 'ccode', 'year', 'milex',
       'milper', 'irst', 'pec', 'tpop', 'upop', 'cinc', 'version'],
      dtype='object')

In [4]:
data['president_visit_binary'] = data['destination country'].map(lambda x: 0 if x is None else 1)

In [5]:
data

Unnamed: 0,index,pagerank,betweenness,closeness,degree,in_degree,out_degreee,node_id,year,index.1,...,year.1,milex,milper,irst,pec,tpop,upop,cinc,version,president_visit_binary
0,24,1.000000,7.301305e-04,1.0,1.000000,0.948718,1.000000,2,2005,,...,2005,495326000,1474,94897,3267376,298166.0,164533.0,1.563021e-01,2021,0
1,48,0.521256,3.509150e-02,1.0,0.600454,0.621795,0.701754,20,2005,506.0,...,2005,13224000,62,15327,370302,32253.0,18647.0,1.091951e-02,2021,1
2,73,0.014564,9.309024e-06,1.0,0.014756,0.025641,0.017544,31,2005,,...,2005,36000,1,0,266,329.0,0.0,2.476260e-05,2021,0
3,1615,0.407570,2.397368e-01,1.0,0.457435,0.564103,0.415205,40,2005,,...,2005,1444000,49,245,12063,11292.0,2927.0,1.351150e-03,2021,0
4,104,0.046998,0.000000e+00,1.0,0.063564,0.070513,0.087719,41,2005,,...,2005,-9,0,0,3954,9261.0,2171.0,5.836711e-04,2021,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,1666,0.000000,2.754619e-08,1.0,0.000000,0.006410,0.005848,970,2005,,...,2005,0,0,0,4,10.0,0.0,2.949441e-07,2021,0
188,1601,0.021359,0.000000e+00,1.0,0.020431,0.032051,0.017544,983,2005,,...,2005,0,0,0,12,52.0,0.0,1.450065e-06,2021,0
189,1604,0.023034,0.000000e+00,0.0,0.021566,0.025641,0.017544,986,2005,,...,2005,0,0,0,15,20.0,0.0,6.564231e-07,2021,0
190,1607,0.007736,1.725024e-05,1.0,0.007946,0.019231,0.017544,987,2005,,...,2005,0,0,0,10,502.0,0.0,1.299264e-05,2021,0


In [6]:
columns_for_PCA = ['pagerank', 'betweenness', 'closeness', 'degree', 'in_degree', 'out_degreee']

In [7]:
X = data[columns_for_PCA].values

In [8]:
square = np.dot(np.transpose(X),X)
np.linalg.eig(square)

(array([2.35360109e+02, 2.67356003e+01, 1.36278176e+00, 4.09527704e-01,
        2.02222300e-02, 1.13066428e-01]),
 array([[-0.1980397 ,  0.42202528, -0.08549149,  0.23361905,  0.38960363,
          0.7543191 ],
        [-0.02917674,  0.03548439,  0.95822251, -0.19480931, -0.08549253,
          0.18557898],
        [-0.88740224, -0.46001544, -0.01313597,  0.01046749, -0.00777522,
          0.02367447],
        [-0.22775924,  0.46218364, -0.07634449,  0.16197447, -0.83633031,
          0.05476681],
        [-0.2562887 ,  0.46066302,  0.2113814 ,  0.42606517,  0.34748556,
         -0.61249151],
        [-0.23428208,  0.42801539, -0.15432983, -0.83641775,  0.1436836 ,
         -0.13363163]]))

In [9]:
from sklearn.decomposition import PCA

In [10]:
pca = PCA(n_components=2)

In [11]:
pca.fit(X)

In [12]:
X_pca = pca.transform(X)

In [13]:
pca_df = pd.DataFrame(data=X_pca, columns=['PC1','PC2'])
pca_df

Unnamed: 0,PC1,PC2
0,1.524585,-0.129111
1,0.779701,-0.046965
2,-0.407457,-0.012093
3,0.492171,0.201756
4,-0.309330,-0.019993
...,...,...
187,-0.437679,-0.011962
188,-0.397993,-0.011777
189,-0.412477,-0.050110
190,-0.417521,-0.012320


In [14]:
data['PC1'] = pca_df['PC1']
data['PC2'] = pca_df['PC2']

In [15]:
data['per_capita'] = data['rgdpna']/data['pop']

In [16]:
data['per_capita']

0      54449.451941
1      45565.692084
2      35576.570576
3               NaN
4       1574.206085
           ...     
187             NaN
188             NaN
189             NaN
190             NaN
191             NaN
Name: per_capita, Length: 192, dtype: float64

In [17]:
sum(data["per_capita"].isna())

28

In [18]:
data = data[data["per_capita"].notna()]
data

Unnamed: 0,index,pagerank,betweenness,closeness,degree,in_degree,out_degreee,node_id,year,index.1,...,irst,pec,tpop,upop,cinc,version,president_visit_binary,PC1,PC2,per_capita
0,24,1.000000,0.000730,1.0,1.000000,0.948718,1.000000,2,2005,,...,94897,3267376,298166.0,164533.0,0.156302,2021,0,1.524585,-0.129111,54449.451941
1,48,0.521256,0.035092,1.0,0.600454,0.621795,0.701754,20,2005,506.0,...,15327,370302,32253.0,18647.0,0.010920,2021,1,0.779701,-0.046965,45565.692084
2,73,0.014564,0.000009,1.0,0.014756,0.025641,0.017544,31,2005,,...,0,266,329.0,0.0,0.000025,2021,0,-0.407457,-0.012093,35576.570576
4,104,0.046998,0.000000,1.0,0.063564,0.070513,0.087719,41,2005,,...,0,3954,9261.0,2171.0,0.000584,2021,0,-0.309330,-0.019993,1574.206085
5,147,0.136362,0.002928,1.0,0.161180,0.185897,0.157895,42,2005,,...,0,6039,9343.0,2819.0,0.000820,2021,0,-0.122271,-0.018905,9945.891176
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,1570,0.267671,0.010502,1.0,0.312145,0.352564,0.321637,840,2005,526.0,...,470,35953,85821.0,16203.0,0.005195,2021,1,0.184685,-0.024675,4950.987895
176,1575,0.378317,0.015015,1.0,0.451759,0.480769,0.444444,850,2005,529.0,...,3675,207738,224481.0,29974.0,0.014128,2021,1,0.435790,-0.032541,6518.595408
178,1583,0.390267,0.075917,1.0,0.423383,0.442308,0.479532,900,2005,530.0,...,7757,181092,20521.0,13667.0,0.007516,2021,1,0.426349,0.013414,46135.263007
180,1591,0.178444,0.019219,1.0,0.192963,0.250000,0.175439,920,2005,,...,889,20923,4134.0,1905.0,0.000915,2021,0,-0.043229,0.001428,33447.358963


In [39]:
import statsmodels.formula.api as smf
log_reg_1 = smf.logit("president_visit_binary ~ PC1 + PC2 + cinc + per_capita", data=data).fit()
print(log_reg_1.summary())

Optimization terminated successfully.
         Current function value: 0.443399
         Iterations 6
                             Logit Regression Results                             
Dep. Variable:     president_visit_binary   No. Observations:                  164
Model:                              Logit   Df Residuals:                      159
Method:                               MLE   Df Model:                            4
Date:                    Sat, 25 Feb 2023   Pseudo R-squ.:                  0.2598
Time:                            23:02:48   Log-Likelihood:                -72.718
converged:                           True   LL-Null:                       -98.247
Covariance Type:                nonrobust   LLR p-value:                 2.170e-10
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.2095      0.288     -4.198      0.000      -1.774      -0.6

In [52]:
log_reg_2 = smf.logit("president_visit_binary ~ PC1 + cinc + per_capita", data=data).fit()
print(log_reg_2.summary())
print(list(log_reg_2.params))
list(log_reg_2.bse)

Optimization terminated successfully.
         Current function value: 0.443660
         Iterations 6
                             Logit Regression Results                             
Dep. Variable:     president_visit_binary   No. Observations:                  164
Model:                              Logit   Df Residuals:                      160
Method:                               MLE   Df Model:                            3
Date:                    Sat, 25 Feb 2023   Pseudo R-squ.:                  0.2594
Time:                            23:25:34   Log-Likelihood:                -72.760
converged:                           True   LL-Null:                       -98.247
Covariance Type:                nonrobust   LLR p-value:                 4.957e-11
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.2044      0.288     -4.189      0.000      -1.768      -0.6

[0.28750837352983, 0.779474894498114, 11.64993172705481, 9.961531127302768e-06]

In [55]:
from sklearn.decomposition import PCA
import statsmodels.formula.api as smf
import csv
#Set the principal component analysis
pca = PCA(n_components=2)
#Initialize the list of results.
#It is a list of list, so we can use writerows to directly write into csv.
List_Param_1 = []
List_Std_1 = []
List_Param_2 = []
List_Std_2 = []
#The create the connection.
conn = sqlite3.connect('diplomatic.db')
for i in range (1970,2010,5):
    #Retrieve the data.
    data = de.get_data_for_regression(conn, i)
    #Generate dummys.
    data['president_visit_binary'] = data['destination country'].map(lambda x: 0 if x is None else 1)
    #Extract the columns for PCA.
    columns_for_PCA = ['pagerank', 'betweenness', 'closeness', 'degree', 'in_degree', 'out_degreee']
    X = data[columns_for_PCA].values
    #Fit the principal component.
    pca.fit(X)
    X_pca = pca.transform(X)
    #Obtain the principal component and assign it to data.
    pca_df = pd.DataFrame(data=X_pca, columns=['PC1','PC2'])
    data['PC1'] = pca_df['PC1']
    data['PC2'] = pca_df['PC2']
    #Generate the economic variable.
    data['per_capita'] = data['rgdpna']/data['pop']
    #Drop those empty GDP per capita.
    data = data[data["per_capita"].notna()]
    #Fit the logistic regression.
    log_reg_1 = smf.logit("president_visit_binary ~ PC1 + PC2 + cinc + per_capita", data=data).fit()
    log_reg_2 = smf.logit("president_visit_binary ~ PC1 + cinc + per_capita", data=data).fit()
    #Add the result into list.
    List_Param_1.append(list(log_reg_1.params))
    List_Param_2.append(list(log_reg_1.params))
    List_Std_1.append(list(log_reg_2.bse))
    List_Std_2.append(list(log_reg_2.bse))
#Change "data/..." into a parameter: FILENAME
with open ("data/Std_Error_1",'w',newline='') as f:
        write = csv.writer(f, delimiter=",")
        write.writerows(List_Std_1)
        f.close()

Optimization terminated successfully.
         Current function value: 0.445705
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.445711
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.332685
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.335600
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.305576
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.310457
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.290679
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.290820
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.341603
         Iterations 7
Optimization terminated successfully.
         Current function value: 0.341814
  

[[-1.2840518478681862,
  2.1520308105714663,
  0.054260811586177404,
  -10.69468669790581,
  -2.466613682699512e-05],
 [-2.148290670187983,
  2.8012839008947554,
  2.146437164763458,
  5.541199376307336,
  5.561931714199929e-06],
 [-2.3641322489501615,
  2.9790290722415556,
  -8.230731694018695,
  -12.21303389852714,
  3.092634148213096e-06],
 [-2.5029172247641074,
  3.3047174421037835,
  0.39677391589118466,
  -15.492157066925454,
  1.0580910644766968e-05],
 [-2.0608933661807134,
  4.003720060026095,
  -0.322724520636188,
  -21.877034434550918,
  1.0951580862709849e-05],
 [-2.031111953910869,
  4.849341570639142,
  -2.1067495943626398,
  -51.21970200209548,
  2.351139065035e-05],
 [-0.3616135079616139,
  5.197987983355619,
  4.8339171627545285,
  -39.86285762041579,
  -6.705943179467575e-06],
 [-1.2095407285442603,
  4.088601141742986,
  -1.1986759582902722,
  -30.212005691891225,
  4.067950822269846e-06]]

In [61]:
import csv
with open ("data/Std_Error_1",'w',newline='') as f:
        write = csv.writer(f, delimiter=",")
        write.writerows(List_Std_1)
        f.close()