In [12]:
import logistic_func
from pathlib import Path
import networkx as nx
from collections import defaultdict
from collections import Counter
from tqdm import tqdm
import pickle as pk
import numpy as np
import pandas as pd
from datetime import datetime as dt
from geopy.distance import great_circle
from random import choices
from random import choice
import importlib
import timeit
import importlib
import copy

import statsmodels.api as sm
import statsmodels.formula.api as smf

In [13]:
importlib.reload(logistic_func)

<module 'logistic_func' from '/home/quint/twitter_network/Soc science paper/logistic_func.py'>

In [46]:
G = pk.load(open("../Pickles/follower_graph.pk", "rb"))    

In [30]:
# remove nodes without latitude or longitude
lat = nx.get_node_attributes(G,"lat")
lon = nx.get_node_attributes(G,"lon")
nodes_rem = [key for key,val in lat.items() if pd.isna(val) or pd.isna(lon[key])]
G.remove_nodes_from(nodes_rem)

In [54]:
rand_pop = pk.load(open("../Pickles/radiation_pop_rand_labels_2.pk", "rb"))
radiation_pop = nx.get_edge_attributes(G, "radiation_pop")
radiation_pop.update(rand_pop)

In [57]:
out = logistic_func.get_dyads_rad(G, rand_pop)
neg_dyads = out[0]
pos_dyads = out[1]

In [58]:
len(neg_dyads)

17408294

In [59]:
len(pos_dyads)

17547086

In [65]:
X = logistic_func.get_features(neg_dyads, pos_dyads, G, radiation_pop, distance = True)

Initializing data
Getting edge features


 72%|█████████████████████████████████████████████████████████▍                      | 25111001/34955380 [09:34<02:44, 59915.83it/s]IOStream.flush timed out
 89%|███████████████████████████████████████████████████████████████████████         | 31029138/34955380 [12:05<00:57, 68252.04it/s]IOStream.flush timed out
100%|████████████████████████████████████████████████████████████████████████████████| 34955380/34955380 [12:59<00:00, 44814.92it/s]


Filtering NAs
Building dataframe
Adding edge population
Adding outcome and cleaning NA's


In [67]:
X = X.sample(frac=1)
X.loc[X.radiation_pop < 0.001, "radiation_pop"] = 1

In [68]:
X.loc[X.distance < 0.001, "distance"] = 1

In [69]:
X["gender_hom"] = X["gender_hom"].apply(lambda x: 1 if x!=0 else 0)

In [70]:
max(np.log10(X.radiation_pop))

8.494809935491762

In [71]:
pk.dump(X, open("../Pickles/features_soc_sci.pk", "wb"))

## RUN REGRESSIONS

In [14]:
X = pd.read_pickle("../Pickles/features_soc_sci.pk")


In [3]:
X.y.value_counts()

y
1    15795627
0    15396117
Name: count, dtype: int64

## Radiation pop vs distance models

In [4]:
start = timeit.default_timer()
mod = smf.logit(formula = "y ~ np.log10(radiation_pop)", data = X)
res = mod.fit(cov_type='HC3')
stop = timeit.default_timer()
print('Time: ', stop - start)  

df = pd.concat([np.exp(res.params), res.pvalues], axis = 1)
print(round(df,2))

Optimization terminated successfully.
         Current function value: 0.624753
         Iterations 6
Time:  38.496465353062376
                               0    1
Intercept                2661.60  0.0
np.log10(radiation_pop)     0.37  0.0


In [5]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ np.log10(distance)", data = X)
res = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

df = pd.concat([np.exp(res.params), res.pvalues], axis = 1)
print(round(df,2))

Optimization terminated successfully.
         Current function value: 0.631184
         Iterations 6
Time:  38.54440634907223
                        0    1
Intercept           21.90  0.0
np.log10(distance)   0.35  0.0


In [6]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ np.log10(distance) + np.log10(radiation_pop)", data = X)
res= mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

df = pd.concat([np.exp(res.params), res.pvalues], axis = 1)
print(round(df,2))

Optimization terminated successfully.
         Current function value: 0.624751
         Iterations 6
Time:  45.013772170990705
                               0    1
Intercept                2881.33  0.0
np.log10(distance)          1.02  0.0
np.log10(radiation_pop)     0.36  0.0


## Age models

In [7]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ age_diff", data = X)
res_age_diff = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

df = pd.concat([np.exp(res_age_diff.params), res_age_diff.pvalues], axis = 1)
print(round(df,2))

Optimization terminated successfully.
         Current function value: 0.673691
         Iterations 4
Time:  30.119741610018536
              0    1
Intercept  1.67  0.0
age_diff   0.96  0.0


In [8]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ age_diff + age_ego", data = X)
res_age_ego = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

df = pd.concat([np.exp(res_age_ego.params), res_age_ego.pvalues], axis = 1)
print(round(df,2))

Optimization terminated successfully.
         Current function value: 0.656346
         Iterations 5
Time:  38.647067387006246
              0    1
Intercept  0.54  0.0
age_diff   0.96  0.0
age_ego    1.03  0.0


In [9]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ age_diff + age_alter", data = X)
res_age_alt = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

df = pd.concat([np.exp(res_age_alt.params), res_age_alt.pvalues], axis = 1)
print(round(df,2))

Optimization terminated successfully.
         Current function value: 0.654048
         Iterations 5
Time:  38.66013568395283
              0    1
Intercept  0.49  0.0
age_diff   0.95  0.0
age_alter  1.03  0.0


In [10]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ age_diff + age_ego + age_alter", data = X)
res_age = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

df = pd.concat([np.exp(res_age.params), res_age.pvalues], axis = 1)
print(round(df,2))

Optimization terminated successfully.
         Current function value: 0.641675
         Iterations 5
Time:  40.9698366750963
              0    1
Intercept  0.21  0.0
age_diff   0.95  0.0
age_ego    1.03  0.0
age_alter  1.03  0.0


In [11]:
res_age.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,31191744.0
Model:,Logit,Df Residuals:,31191740.0
Method:,MLE,Df Model:,3.0
Date:,"Mon, 16 Sep 2024",Pseudo R-squ.:,0.07415
Time:,11:51:46,Log-Likelihood:,-20015000.0
converged:,True,LL-Null:,-21618000.0
Covariance Type:,HC3,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.5723,0.002,-946.027,0.000,-1.576,-1.569
age_diff,-0.0521,3.8e-05,-1372.370,0.000,-0.052,-0.052
age_ego,0.0254,2.98e-05,850.780,0.000,0.025,0.025
age_alter,0.0279,3.01e-05,926.566,0.000,0.028,0.028


In [12]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ age_diff_log", data = X)
res_age = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

df = pd.concat([np.exp(res_age.params), res_age.pvalues], axis = 1)
print(round(df,2))

Optimization terminated successfully.
         Current function value: 0.656577
         Iterations 5
Time:  33.937361738993786
                 0    1
Intercept     2.08  0.0
age_diff_log  0.21  0.0


In [13]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ age_diff_log + np.log2(age_ego)", data = X)
res_age = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

df = pd.concat([np.exp(res_age.params), res_age.pvalues], axis = 1)
print(round(df,2))

Optimization terminated successfully.
         Current function value: 0.644327
         Iterations 5
Time:  39.79756749898661
                     0    1
Intercept         0.04  0.0
age_diff_log      0.20  0.0
np.log2(age_ego)  2.05  0.0


In [14]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ age_diff_log + np.log2(age_alter)", data = X)
res_age = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

df = pd.concat([np.exp(res_age.params), res_age.pvalues], axis = 1)
print(round(df,2))

Optimization terminated successfully.
         Current function value: 0.641524
         Iterations 5
Time:  39.421345336013474
                       0    1
Intercept           0.03  0.0
age_diff_log        0.20  0.0
np.log2(age_alter)  2.24  0.0


In [15]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ age_diff_log + np.log2(age_ego) + np.log2(age_alter)", data = X)
res_age = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

df = pd.concat([np.exp(res_age.params), res_age.pvalues], axis = 1)
print(round(df,2))

Optimization terminated successfully.
         Current function value: 0.634789
         Iterations 5
Time:  42.51007950503845
                       0    1
Intercept           0.00  0.0
age_diff_log        0.20  0.0
np.log2(age_ego)    1.74  0.0
np.log2(age_alter)  1.95  0.0


In [16]:
res_age.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,31191744.0
Model:,Logit,Df Residuals:,31191740.0
Method:,MLE,Df Model:,3.0
Date:,"Mon, 16 Sep 2024",Pseudo R-squ.:,0.08408
Time:,11:55:24,Log-Likelihood:,-19800000.0
converged:,True,LL-Null:,-21618000.0
Covariance Type:,HC3,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-5.8010,0.006,-999.690,0.000,-5.812,-5.790
age_diff_log,-1.6027,0.001,-1434.767,0.000,-1.605,-1.601
np.log2(age_ego),0.5543,0.001,644.734,0.000,0.553,0.556
np.log2(age_alter),0.6668,0.001,769.078,0.000,0.665,0.668


## Multivariate Models

In [17]:
##### Just party

start = timeit.default_timer()

mod = smf.logit(formula = "y ~ C(party_hom) + C(party_alter) + C(party_ego)", data = X)
res_0 = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

Optimization terminated successfully.
         Current function value: 0.675567
         Iterations 4
Time:  401.0656213160837


In [18]:
res_0.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,31191744.0
Model:,Logit,Df Residuals:,31191736.0
Method:,MLE,Df Model:,7.0
Date:,"Mon, 16 Sep 2024",Pseudo R-squ.:,0.02525
Time:,12:03:07,Log-Likelihood:,-21072000.0
converged:,True,LL-Null:,-21618000.0
Covariance Type:,HC3,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.2332,0.002,-127.111,0.000,-0.237,-0.230
C(party_hom)[T.Democrat],0.7231,0.002,374.030,0.000,0.719,0.727
C(party_hom)[T.Independent],-0.0116,0.004,-3.227,0.001,-0.019,-0.005
C(party_hom)[T.Republican],0.4782,0.002,226.900,0.000,0.474,0.482
C(party_alter)[T.Independent],-0.1258,0.002,-73.094,0.000,-0.129,-0.122
C(party_alter)[T.Republican],-0.2247,0.002,-123.687,0.000,-0.228,-0.221
C(party_ego)[T.Independent],0.0896,0.002,51.487,0.000,0.086,0.093
C(party_ego)[T.Republican],0.0301,0.002,16.531,0.000,0.027,0.034


In [19]:
##### Just demographics

start = timeit.default_timer()

mod = smf.logit(formula = "y ~ age_diff_log + np.log2(age_ego) + np.log2(age_alter) + C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego)", data = X)
res_01 = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

Optimization terminated successfully.
         Current function value: 0.625977
         Iterations 6
Time:  846.7948780689621


In [20]:
res_01.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,31191744.0
Model:,Logit,Df Residuals:,31191724.0
Method:,MLE,Df Model:,19.0
Date:,"Mon, 16 Sep 2024",Pseudo R-squ.:,0.0968
Time:,12:18:17,Log-Likelihood:,-19525000.0
converged:,True,LL-Null:,-21618000.0
Covariance Type:,HC3,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-6.3562,0.008,-844.166,0.000,-6.371,-6.341
C(race_hom)[T.African-American],1.4074,0.006,221.608,0.000,1.395,1.420
C(race_hom)[T.Asian],1.1018,0.018,61.039,0.000,1.066,1.137
C(race_hom)[T.Caucasian],0.3656,0.005,79.366,0.000,0.357,0.375
C(race_hom)[T.Hispanic],1.4496,0.010,146.041,0.000,1.430,1.469
C(race_hom)[T.Other],1.6198,0.057,28.263,0.000,1.507,1.732
"C(race_alter, Treatment(reference='Caucasian'))[T.African-American]",-0.0474,0.005,-10.256,0.000,-0.056,-0.038
"C(race_alter, Treatment(reference='Caucasian'))[T.Asian]",0.4387,0.005,86.901,0.000,0.429,0.449
"C(race_alter, Treatment(reference='Caucasian'))[T.Hispanic]",-0.3512,0.005,-72.711,0.000,-0.361,-0.342


In [21]:
##### party + demographics

start = timeit.default_timer()

mod = smf.logit(formula = "y ~ C(party_hom) + C(party_alter) + C(party_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) + C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego)", data = X)
res_011 = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

Optimization terminated successfully.
         Current function value: 0.610806
         Iterations 6
Time:  1215.3684823469957


In [22]:
res_011.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,31191744.0
Model:,Logit,Df Residuals:,31191717.0
Method:,MLE,Df Model:,26.0
Date:,"Mon, 16 Sep 2024",Pseudo R-squ.:,0.1187
Time:,12:39:35,Log-Likelihood:,-19052000.0
converged:,True,LL-Null:,-21618000.0
Covariance Type:,HC3,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-6.3936,0.008,-803.283,0.000,-6.409,-6.378
C(party_hom)[T.Democrat],0.7151,0.002,346.719,0.000,0.711,0.719
C(party_hom)[T.Independent],-0.0282,0.004,-7.430,0.000,-0.036,-0.021
C(party_hom)[T.Republican],0.4571,0.002,202.148,0.000,0.453,0.462
C(party_alter)[T.Independent],-0.0791,0.002,-42.744,0.000,-0.083,-0.076
C(party_alter)[T.Republican],-0.2759,0.002,-141.782,0.000,-0.280,-0.272
C(party_ego)[T.Independent],0.1478,0.002,78.992,0.000,0.144,0.151
C(party_ego)[T.Republican],0.0006,0.002,0.330,0.741,-0.003,0.004
C(race_hom)[T.African-American],1.3384,0.006,207.774,0.000,1.326,1.351


In [23]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~C(party_hom) + C(party_alter) + C(party_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) + C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego) + np.log10(radiation_pop)", data = X)
res_1 = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

Optimization terminated successfully.
         Current function value: 0.546498
         Iterations 6
Time:  1227.997532219044


In [24]:
res_1.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,31191744.0
Model:,Logit,Df Residuals:,31191716.0
Method:,MLE,Df Model:,27.0
Date:,"Mon, 16 Sep 2024",Pseudo R-squ.:,0.2115
Time:,13:01:05,Log-Likelihood:,-17046000.0
converged:,True,LL-Null:,-21618000.0
Covariance Type:,HC3,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.3022,0.009,33.830,0.000,0.285,0.320
C(party_hom)[T.Democrat],0.5720,0.002,260.320,0.000,0.568,0.576
C(party_hom)[T.Independent],-0.1345,0.004,-33.116,0.000,-0.142,-0.127
C(party_hom)[T.Republican],0.3870,0.002,161.359,0.000,0.382,0.392
C(party_alter)[T.Independent],-0.2248,0.002,-114.292,0.000,-0.229,-0.221
C(party_alter)[T.Republican],-0.4308,0.002,-207.783,0.000,-0.435,-0.427
C(party_ego)[T.Independent],0.0799,0.002,40.215,0.000,0.076,0.084
C(party_ego)[T.Republican],-0.0759,0.002,-36.565,0.000,-0.080,-0.072
C(race_hom)[T.African-American],1.0967,0.007,159.170,0.000,1.083,1.110


In [25]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ C(party_hom) + C(party_alter) + C(party_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) + C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego) + np.log10(radiation_pop) + C(same_state) ", data = X)
res_2 = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start)  

Optimization terminated successfully.
         Current function value: 0.538019
         Iterations 6
Time:  1334.8349412879907


In [26]:
res_2.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,31191744.0
Model:,Logit,Df Residuals:,31191715.0
Method:,MLE,Df Model:,28.0
Date:,"Mon, 16 Sep 2024",Pseudo R-squ.:,0.2237
Time:,13:24:23,Log-Likelihood:,-16782000.0
converged:,True,LL-Null:,-21618000.0
Covariance Type:,HC3,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-3.8905,0.011,-360.283,0.000,-3.912,-3.869
C(party_hom)[T.Democrat],0.5690,0.002,257.106,0.000,0.565,0.573
C(party_hom)[T.Independent],-0.1524,0.004,-37.194,0.000,-0.160,-0.144
C(party_hom)[T.Republican],0.3908,0.002,161.893,0.000,0.386,0.396
C(party_alter)[T.Independent],-0.1967,0.002,-99.337,0.000,-0.201,-0.193
C(party_alter)[T.Republican],-0.4131,0.002,-197.975,0.000,-0.417,-0.409
C(party_ego)[T.Independent],0.0979,0.002,48.941,0.000,0.094,0.102
C(party_ego)[T.Republican],-0.0710,0.002,-33.953,0.000,-0.075,-0.067
C(race_hom)[T.African-American],1.1253,0.007,160.051,0.000,1.112,1.139


In [27]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ C(party_hom) + C(party_alter) + C(party_ego) + np.log10(radiation_pop) + C(same_state) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) + C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego) + C(ruca_hom) + C(ruca_alter) + C(ruca_ego) + C(dens_diff)", data = X)
res_3 = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start) 

Optimization terminated successfully.
         Current function value: 0.527731
         Iterations 6
Time:  1855.1990886229323


In [28]:
res_3.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,31191744.0
Model:,Logit,Df Residuals:,31191703.0
Method:,MLE,Df Model:,40.0
Date:,"Mon, 16 Sep 2024",Pseudo R-squ.:,0.2386
Time:,13:56:21,Log-Likelihood:,-16461000.0
converged:,True,LL-Null:,-21618000.0
Covariance Type:,HC3,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-4.4991,0.013,-356.597,0.000,-4.524,-4.474
C(party_hom)[T.Democrat],0.5137,0.002,230.062,0.000,0.509,0.518
C(party_hom)[T.Independent],-0.1461,0.004,-35.454,0.000,-0.154,-0.138
C(party_hom)[T.Republican],0.3863,0.002,158.909,0.000,0.382,0.391
C(party_alter)[T.Independent],-0.0471,0.002,-23.461,0.000,-0.051,-0.043
C(party_alter)[T.Republican],-0.2510,0.002,-118.627,0.000,-0.255,-0.247
C(party_ego)[T.Independent],0.1188,0.002,58.905,0.000,0.115,0.123
C(party_ego)[T.Republican],-0.0415,0.002,-19.676,0.000,-0.046,-0.037
C(same_state)[T.1],1.3875,0.002,620.639,0.000,1.383,1.392


Party Reg Model

In [19]:
X_party_reg = X.loc[~(X.party_reg_hom == "NA")]
X_party_reg.y.value_counts()

y
1    7678914
0    5043124
Name: count, dtype: int64

In [20]:
X_party_reg.shape[0]

12722038

In [21]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ C(party_reg_hom) + C(party_reg_alter) + C(party_reg_ego) + np.log10(radiation_pop) + C(same_state) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) + C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego) + C(ruca_hom) + C(ruca_alter) + C(ruca_ego) + C(dens_diff)",
               data = X_party_reg)
res_party_reg = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start) 

Optimization terminated successfully.
         Current function value: 0.503388
         Iterations 7
Time:  773.7592902840115


In [22]:
res_party_reg.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,12722038.0
Model:,Logit,Df Residuals:,12721994.0
Method:,MLE,Df Model:,43.0
Date:,"Wed, 18 Sep 2024",Pseudo R-squ.:,0.2504
Time:,09:52:10,Log-Likelihood:,-6404100.0
converged:,True,LL-Null:,-8543200.0
Covariance Type:,HC3,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-3.2059,0.020,-161.821,0.000,-3.245,-3.167
C(party_reg_hom)[T.Democrat],0.4851,0.003,159.330,0.000,0.479,0.491
C(party_reg_hom)[T.Independent],-0.2211,0.004,-60.357,0.000,-0.228,-0.214
C(party_reg_hom)[T.Other],0.0784,0.029,2.668,0.008,0.021,0.136
C(party_reg_hom)[T.Republican],0.7710,0.004,203.984,0.000,0.764,0.778
C(party_reg_alter)[T.Independent],0.1648,0.003,62.667,0.000,0.160,0.170
C(party_reg_alter)[T.Other],-0.0127,0.005,-2.495,0.013,-0.023,-0.003
C(party_reg_alter)[T.Republican],-0.5155,0.003,-192.634,0.000,-0.521,-0.510
C(party_reg_ego)[T.Independent],0.1361,0.003,50.489,0.000,0.131,0.141


In [30]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ C(party_reg_hom) + C(party_reg_alter) + C(party_reg_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) + C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego)",
               data = X_party_reg)
res_party_reg_no_geo = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start) 

Optimization terminated successfully.
         Current function value: 0.594796
         Iterations 6
Time:  544.3868834780296


Party Diff Model

In [31]:
start = timeit.default_timer()

mod = smf.logit(formula = "y ~ party_diff + C(party_alter) + C(party_ego) + np.log10(radiation_pop) + C(same_state) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) + C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego) + C(ruca_hom) + C(ruca_alter) + C(ruca_ego) + C(dens_diff)", data = X)
res_party_diff = mod.fit(cov_type='HC3')

stop = timeit.default_timer()
print('Time: ', stop - start) 

Optimization terminated successfully.
         Current function value: 0.527380
         Iterations 6
Time:  1829.5688805360114


In [32]:
res_party_diff.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,31191744.0
Model:,Logit,Df Residuals:,31191705.0
Method:,MLE,Df Model:,38.0
Date:,"Mon, 16 Sep 2024",Pseudo R-squ.:,0.2391
Time:,14:50:12,Log-Likelihood:,-16450000.0
converged:,True,LL-Null:,-21618000.0
Covariance Type:,HC3,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-4.0412,0.012,-324.381,0.000,-4.066,-4.017
C(party_alter)[T.Independent],-0.3442,0.001,-239.895,0.000,-0.347,-0.341
C(party_alter)[T.Republican],-0.3049,0.001,-296.660,0.000,-0.307,-0.303
C(party_ego)[T.Independent],-0.1786,0.001,-127.812,0.000,-0.181,-0.176
C(party_ego)[T.Republican],-0.0957,0.001,-95.440,0.000,-0.098,-0.094
C(same_state)[T.1],1.3885,0.002,621.054,0.000,1.384,1.393
C(race_hom)[T.African-American],1.1623,0.007,162.379,0.000,1.148,1.176
C(race_hom)[T.Asian],0.7531,0.021,35.297,0.000,0.711,0.795
C(race_hom)[T.Caucasian],0.1815,0.005,34.692,0.000,0.171,0.192


Coefficient tables

In [45]:
df = pd.concat([np.exp(res_0.params), res_0.pvalues], axis = 1)
df = df.loc[(df.index.map(lambda x: not ("ego" in x) and not ("alter" in x)))]
print(round(df,2))

                                0    1
Intercept                    0.79  0.0
C(party_hom)[T.Democrat]     2.06  0.0
C(party_hom)[T.Independent]  0.99  0.0
C(party_hom)[T.Republican]   1.61  0.0


In [46]:
df = pd.concat([np.exp(res_01.params), res_01.pvalues], axis = 1)
df = df.loc[(df.index.map(lambda x: not ("ego" in x) and not ("alter" in x)))]
print(round(df,2))

                                    0    1
Intercept                        0.00  0.0
C(race_hom)[T.African-American]  4.09  0.0
C(race_hom)[T.Asian]             3.01  0.0
C(race_hom)[T.Caucasian]         1.44  0.0
C(race_hom)[T.Hispanic]          4.26  0.0
C(race_hom)[T.Other]             5.05  0.0
C(gender_hom)[T.1]               1.36  0.0
age_diff_log                     0.20  0.0


In [47]:
df = pd.concat([np.exp(res_011.params), res_011.pvalues], axis = 1)
df = df.loc[(df.index.map(lambda x: not ("ego" in x) and not ("alter" in x)))]
print(round(df,2))

                                    0    1
Intercept                        0.00  0.0
C(party_hom)[T.Democrat]         2.04  0.0
C(party_hom)[T.Independent]      0.97  0.0
C(party_hom)[T.Republican]       1.58  0.0
C(race_hom)[T.African-American]  3.81  0.0
C(race_hom)[T.Asian]             3.08  0.0
C(race_hom)[T.Caucasian]         1.35  0.0
C(race_hom)[T.Hispanic]          4.52  0.0
C(race_hom)[T.Other]             5.10  0.0
C(gender_hom)[T.1]               1.36  0.0
age_diff_log                     0.21  0.0


In [48]:
df = pd.concat([np.exp(res_1.params), res_1.pvalues], axis = 1)
df = df.loc[(df.index.map(lambda x: not ("ego" in x) and not ("alter" in x)))]
print(round(df,2))

                                    0    1
Intercept                        1.35  0.0
C(party_hom)[T.Democrat]         1.77  0.0
C(party_hom)[T.Independent]      0.87  0.0
C(party_hom)[T.Republican]       1.47  0.0
C(race_hom)[T.African-American]  2.99  0.0
C(race_hom)[T.Asian]             2.06  0.0
C(race_hom)[T.Caucasian]         1.21  0.0
C(race_hom)[T.Hispanic]          2.80  0.0
C(race_hom)[T.Other]             2.02  0.0
C(gender_hom)[T.1]               1.37  0.0
age_diff_log                     0.24  0.0
np.log10(radiation_pop)          0.34  0.0


In [49]:
df = pd.concat([np.exp(res_2.params), res_2.pvalues], axis = 1)
df = df.loc[(df.index.map(lambda x: not ("ego" in x) and not ("alter" in x)))]
print(round(df,2))

                                    0    1
Intercept                        0.02  0.0
C(party_hom)[T.Democrat]         1.77  0.0
C(party_hom)[T.Independent]      0.86  0.0
C(party_hom)[T.Republican]       1.48  0.0
C(race_hom)[T.African-American]  3.08  0.0
C(race_hom)[T.Asian]             2.02  0.0
C(race_hom)[T.Caucasian]         1.21  0.0
C(race_hom)[T.Hispanic]          2.70  0.0
C(race_hom)[T.Other]             1.91  0.0
C(gender_hom)[T.1]               1.37  0.0
C(same_state)[T.1]               4.27  0.0
age_diff_log                     0.24  0.0
np.log10(radiation_pop)          0.56  0.0


In [50]:
df = pd.concat([np.exp(res_3.params), res_3.pvalues], axis = 1)
df = df.loc[(df.index.map(lambda x: not ("ego" in x) and not ("alter" in x)))]
print(round(df,2))

                                    0    1
Intercept                        0.01  0.0
C(party_hom)[T.Democrat]         1.67  0.0
C(party_hom)[T.Independent]      0.86  0.0
C(party_hom)[T.Republican]       1.47  0.0
C(same_state)[T.1]               4.00  0.0
C(race_hom)[T.African-American]  3.26  0.0
C(race_hom)[T.Asian]             2.11  0.0
C(race_hom)[T.Caucasian]         1.21  0.0
C(race_hom)[T.Hispanic]          2.77  0.0
C(race_hom)[T.Other]             1.91  0.0
C(gender_hom)[T.1]               1.38  0.0
C(ruca_hom)[T.metropolitan]      1.74  0.0
C(ruca_hom)[T.micropolitan]      1.19  0.0
C(ruca_hom)[T.small_town/rural]  1.36  0.0
C(dens_diff)[T.1]                3.81  0.0
C(dens_diff)[T.2]                1.27  0.0
C(dens_diff)[T.3]                1.95  0.0
np.log10(radiation_pop)          0.55  0.0
age_diff_log                     0.24  0.0


In [51]:
df = pd.concat([np.exp(res_party_reg.params), res_party_reg.pvalues], axis = 1)
df = df.loc[(df.index.map(lambda x: not ("ego" in x) and not ("alter" in x)))]
print(round(df,2))

                                    0     1
Intercept                        0.04  0.00
C(party_reg_hom)[T.Democrat]     1.62  0.00
C(party_reg_hom)[T.Independent]  0.80  0.00
C(party_reg_hom)[T.Other]        1.08  0.01
C(party_reg_hom)[T.Republican]   2.16  0.00
C(same_state)[T.1]               2.64  0.00
C(race_hom)[T.African-American]  2.75  0.00
C(race_hom)[T.Asian]             1.84  0.00
C(race_hom)[T.Caucasian]         1.26  0.00
C(race_hom)[T.Hispanic]          2.90  0.00
C(race_hom)[T.Other]             2.21  0.00
C(gender_hom)[T.1]               1.35  0.00
C(ruca_hom)[T.metropolitan]      2.22  0.00
C(ruca_hom)[T.micropolitan]      1.21  0.00
C(ruca_hom)[T.small_town/rural]  1.62  0.00
C(dens_diff)[T.1]                3.66  0.00
C(dens_diff)[T.2]                1.36  0.00
C(dens_diff)[T.3]                1.89  0.00
np.log10(radiation_pop)          0.51  0.00
age_diff_log                     0.24  0.00


In [52]:
df = pd.concat([np.exp(res_party_reg_no_geo.params), res_party_reg_no_geo.pvalues], axis = 1)
df = df.loc[(df.index.map(lambda x: not ("ego" in x) and not ("alter" in x)))]
print(round(df,2))

                                    0    1
Intercept                        0.01  0.0
C(party_reg_hom)[T.Democrat]     1.76  0.0
C(party_reg_hom)[T.Independent]  0.82  0.0
C(party_reg_hom)[T.Other]        1.34  0.0
C(party_reg_hom)[T.Republican]   2.55  0.0
C(race_hom)[T.African-American]  3.42  0.0
C(race_hom)[T.Asian]             2.27  0.0
C(race_hom)[T.Caucasian]         1.47  0.0
C(race_hom)[T.Hispanic]          4.32  0.0
C(race_hom)[T.Other]             5.97  0.0
C(gender_hom)[T.1]               1.33  0.0
age_diff_log                     0.20  0.0


In [53]:
df = pd.concat([np.exp(res_party_diff.params), res_party_diff.pvalues], axis = 1)
df = df.loc[(df.index.map(lambda x: not ("ego" in x) and not ("alter" in x)))]
print(round(df,4))


                                      0    1
Intercept                        0.0176  0.0
C(same_state)[T.1]               4.0087  0.0
C(race_hom)[T.African-American]  3.1971  0.0
C(race_hom)[T.Asian]             2.1235  0.0
C(race_hom)[T.Caucasian]         1.1990  0.0
C(race_hom)[T.Hispanic]          2.7892  0.0
C(race_hom)[T.Other]             1.9222  0.0
C(gender_hom)[T.1]               1.3772  0.0
C(ruca_hom)[T.metropolitan]      1.7321  0.0
C(ruca_hom)[T.micropolitan]      1.1938  0.0
C(ruca_hom)[T.small_town/rural]  1.3580  0.0
C(dens_diff)[T.1]                3.7806  0.0
C(dens_diff)[T.2]                1.2677  0.0
C(dens_diff)[T.3]                1.9560  0.0
party_diff                       0.9937  0.0
np.log10(radiation_pop)          0.5525  0.0
age_diff_log                     0.2425  0.0


## KHB Method

In [42]:
X["latent_3"] = res_3.fittedvalues.values
X["latent_2"] = res_2.fittedvalues.values
X["latent_1"] = res_1.fittedvalues.values
X["latent_011"] = res_011.fittedvalues.values
X["latent_01"] = res_01.fittedvalues.values
X["latent_0"] = res_0.fittedvalues.values

In [43]:
X["latent_party_reg"] = None
X["latent_party_reg_no_geo"] = None
X.loc[~(X.party_reg_hom == "NA"), "latent_party_reg"] = res_party_reg.fittedvalues.values
X.loc[~(X.party_reg_hom == "NA"), "latent_party_reg_no_geo"] = res_party_reg_no_geo.fittedvalues.values


In [44]:
X.to_pickle("../Pickles/features_soc_sci.pk")

In [3]:
X = pd.read_pickle("../Pickles/features_soc_sci.pk")

In [28]:
def fit_khb_model(fmla, df = X):
    mod_khb= smf.ols(formula = fmla, data = df)
    res_khb= mod_khb.fit()
    return(res_khb)

In [6]:
def return_khb_comp(res_red, res_full):
    df = pd.concat([res_red.params, res_full.params], axis = 1)
    df["perc"] = 100*(df[1] - df[0])/df[0]
    df = df.loc[(df.index.map(lambda x: not ("ego" in x) and not ("alter" in x) and not ("Other" in x) and not ("Native" in x)))]
    print(round(df,2))

Model 2 to model 3 difference

In [46]:
fmla_khb_3_3 = """latent_3 ~ C(party_hom) + C(party_alter) + C(party_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) +
C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego) +
np.log10(radiation_pop) + C(same_state) + C(ruca_hom) + C(ruca_alter) + C(ruca_ego) + C(dens_diff)"""
res_khb_3_3 = fit_khb_model(fmla_khb_3_3, X)
res_khb_3_3.save("../Pickles/res_khb_3_3.pickle")

In [47]:
fmla_khb_2_3 = """latent_3 ~ C(party_hom) + C(party_alter) + C(party_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) +
C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego) +
np.log10(radiation_pop) + C(same_state)"""
res_khb_2_3 = fit_khb_model(fmla_khb_2_3, X)
res_khb_2_3.save("../Pickles/res_khb_2_3.pickle")

In [None]:
res_khb_3_3 = sm.load_pickle("../Pickles/res_khb_3_3.pickle")
res_khb_2_3 = sm.load_pickle("../Pickles/res_khb_2_3.pickle")

In [14]:
return_khb_comp(res_khb_2_3, res_khb_3_3)

                                    0     1   perc
Intercept                       -3.86 -4.50  16.55
C(party_hom)[T.Democrat]         0.61  0.51 -15.60
C(party_hom)[T.Independent]     -0.16 -0.15  -6.91
C(party_hom)[T.Republican]       0.39  0.39  -0.95
C(race_hom)[T.African-American]  1.12  1.18   5.21
C(race_hom)[T.Asian]             0.65  0.75  15.19
C(race_hom)[T.Caucasian]         0.18  0.19   2.99
C(race_hom)[T.Hispanic]          1.00  1.02   1.52
C(gender_hom)[T.1]               0.32  0.32  -1.22
C(same_state)[T.1]               1.50  1.39  -7.38
age_diff_log                    -1.47 -1.42  -3.12
np.log10(radiation_pop)         -0.60 -0.60   0.18
C(ruca_hom)[T.metropolitan]       NaN  0.55    NaN
C(ruca_hom)[T.micropolitan]       NaN  0.18    NaN
C(ruca_hom)[T.small_town/rural]   NaN  0.30    NaN
C(dens_diff)[T.1]                 NaN  1.34    NaN
C(dens_diff)[T.2]                 NaN  0.24    NaN
C(dens_diff)[T.3]                 NaN  0.67    NaN


Model 011 to model 3 difference

In [49]:
fmla_khb_011_3 = """latent_3 ~ C(party_hom) + C(party_alter) + C(party_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) +
C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego) """
res_khb_011_3 = fit_khb_model(fmla_khb_011_3, X)
res_khb_011_3.save("../Pickles/res_khb_011_3.pickle")

In [None]:
res_khb_011_3 = sm.load_pickle("../Pickles/res_khb_011_3.pickle")

In [15]:
return_khb_comp(res_khb_011_3, res_khb_3_3)

                                    0     1    perc
Intercept                       -7.26 -4.50  -37.99
C(party_hom)[T.Democrat]         0.90  0.51  -43.23
C(party_hom)[T.Independent]     -0.03 -0.15  338.62
C(party_hom)[T.Republican]       0.55  0.39  -29.53
C(race_hom)[T.African-American]  1.67  1.18  -29.23
C(race_hom)[T.Asian]             1.54  0.75  -51.51
C(race_hom)[T.Caucasian]         0.34  0.19  -44.46
C(race_hom)[T.Hispanic]          1.88  1.02  -45.84
C(gender_hom)[T.1]               0.36  0.32  -12.06
age_diff_log                    -1.85 -1.42  -23.37
C(same_state)[T.1]                NaN  1.39     NaN
C(ruca_hom)[T.metropolitan]       NaN  0.55     NaN
C(ruca_hom)[T.micropolitan]       NaN  0.18     NaN
C(ruca_hom)[T.small_town/rural]   NaN  0.30     NaN
C(dens_diff)[T.1]                 NaN  1.34     NaN
C(dens_diff)[T.2]                 NaN  0.24     NaN
C(dens_diff)[T.3]                 NaN  0.67     NaN
np.log10(radiation_pop)           NaN -0.60     NaN


Model 1 to model 2 difference

In [51]:
fmla_khb_2_2 = """latent_2 ~ C(party_hom) + C(party_alter) + C(party_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) +
C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego) +
np.log10(radiation_pop) + C(same_state)"""
res_khb_2_2 = fit_khb_model(fmla_khb_2_2, X)
res_khb_2_2.save("../Pickles/res_khb_2_2.pickle")

In [52]:
fmla_khb_1_2 = """latent_2 ~ C(party_hom) + C(party_alter) + C(party_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) +
C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego) +
np.log10(radiation_pop)"""
res_khb_1_2 = fit_khb_model(fmla_khb_1_2, X)
res_khb_1_2.save("../Pickles/res_khb_1_2.pickle")

In [None]:
res_khb_2_2 = sm.load_pickle("../Pickles/res_khb_2_2.pickle")
res_khb_1_2 = sm.load_pickle("../Pickles/res_khb_1_2.pickle")

In [16]:
return_khb_comp(res_khb_1_2, res_khb_2_2)

                                    0     1      perc
Intercept                       -0.01 -3.89  41729.25
C(party_hom)[T.Democrat]         0.59  0.57     -3.33
C(party_hom)[T.Independent]     -0.14 -0.15      7.20
C(party_hom)[T.Republican]       0.39  0.39      0.43
C(race_hom)[T.African-American]  1.14  1.13     -0.88
C(race_hom)[T.Asian]             0.72  0.70     -2.71
C(race_hom)[T.Caucasian]         0.19  0.19     -1.44
C(race_hom)[T.Hispanic]          1.06  0.99     -6.37
C(gender_hom)[T.1]               0.33  0.32     -2.20
age_diff_log                    -1.47 -1.44     -2.08
np.log10(radiation_pop)         -1.06 -0.57    -45.91
C(same_state)[T.1]                NaN  1.45       NaN


Model 011 to model 1 difference

In [54]:
fmla_khb_1_1 = """latent_1 ~ C(party_hom) + C(party_alter) + C(party_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) +
C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego) +
np.log10(radiation_pop) """
res_khb_1_1 = fit_khb_model(fmla_khb_1_1, X)
res_khb_1_1.save("../Pickles/res_khb_1_1.pickle")

In [55]:
fmla_khb_011_1 = """latent_1 ~ C(party_hom) + C(party_alter) + C(party_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) +
C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego)"""
res_khb_011_1 = fit_khb_model(fmla_khb_011_1, X)
res_khb_011_1.save("../Pickles/res_khb_011_1.pickle")

In [24]:
res_khb_1_1 = sm.load_pickle("../Pickles/res_khb_1_1.pickle")
res_khb_011_1 = sm.load_pickle("../Pickles/res_khb_011_1.pickle")

In [25]:
return_khb_comp(res_khb_011_1, res_khb_1_1)

                                    0     1    perc
Intercept                       -6.95  0.30 -104.35
C(party_hom)[T.Democrat]         0.84  0.57  -32.11
C(party_hom)[T.Independent]     -0.02 -0.13  468.24
C(party_hom)[T.Republican]       0.54  0.39  -28.82
C(race_hom)[T.African-American]  1.62  1.10  -32.40
C(race_hom)[T.Asian]             1.58  0.72  -54.29
C(race_hom)[T.Caucasian]         0.34  0.19  -43.86
C(race_hom)[T.Hispanic]          1.82  1.03  -43.45
C(gender_hom)[T.1]               0.35  0.32   -9.18
age_diff_log                    -1.79 -1.45  -19.42
np.log10(radiation_pop)           NaN -1.07     NaN


Model 01 to 011 difference

In [17]:
fmla_khb_011_011 = """latent_011 ~ C(party_hom) + C(party_alter) + C(party_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) +
C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego)"""
res_khb_011_011 = fit_khb_model(fmla_khb_011_011 , X)
res_khb_011_011.save("../Pickles/res_khb_011_011.pickle")

In [None]:
fmla_khb_01_011 = """latent_011 ~ age_diff_log + np.log2(age_ego) + np.log2(age_alter) +
C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego)"""
res_khb_01_011 = fit_khb_model(fmla_khb_01_011 , X)
res_khb_01_011.save("../Pickles/res_khb_01_011.pickle")

In [7]:
res_khb_011_011 = sm.load_pickle("../Pickles/res_khb_011_011.pickle")
res_khb_01_011 = sm.load_pickle("../Pickles/res_khb_01_011.pickle")

In [8]:
return_khb_comp(res_khb_01_011, res_khb_011_011)

                                    0     1   perc
Intercept                       -6.54 -6.39  -2.25
C(race_hom)[T.African-American]  1.43  1.34  -6.60
C(race_hom)[T.Asian]             1.12  1.13   0.59
C(race_hom)[T.Caucasian]         0.38  0.30 -21.84
C(race_hom)[T.Hispanic]          1.48  1.51   1.63
C(gender_hom)[T.1]               0.31  0.31  -2.57
age_diff_log                    -1.65 -1.58  -4.49
C(party_hom)[T.Democrat]          NaN  0.72    NaN
C(party_hom)[T.Independent]       NaN -0.03    NaN
C(party_hom)[T.Republican]        NaN  0.46    NaN


Model 0 to 011 difference

In [None]:
fmla_khb_1_011 = """latent_011 ~ C(party_hom) + C(party_alter) + C(party_ego)"""
res_khb_1_011 = fit_khb_model(fmla_khb_1_011 , X)
res_khb_1_011.save("../Pickles/res_khb_1_011.pickle")

In [9]:
res_khb_1_011 = sm.load_pickle("../Pickles/res_khb_1_011.pickle")

In [10]:
return_khb_comp(res_khb_1_011, res_khb_011_011)

                                    0     1     perc
Intercept                       -0.28 -6.39  2189.83
C(party_hom)[T.Democrat]         0.82  0.72   -13.21
C(party_hom)[T.Independent]     -0.01 -0.03   144.05
C(party_hom)[T.Republican]       0.55  0.46   -16.74
C(race_hom)[T.African-American]   NaN  1.34      NaN
C(race_hom)[T.Asian]              NaN  1.13      NaN
C(race_hom)[T.Caucasian]          NaN  0.30      NaN
C(race_hom)[T.Hispanic]           NaN  1.51      NaN
C(gender_hom)[T.1]                NaN  0.31      NaN
age_diff_log                      NaN -1.58      NaN


Party reg comparisons

In [26]:
X_party_reg = X.loc[~(X.party_reg_hom == "NA")]
X_party_reg["latent_party_reg"] = pd.to_numeric(X_party_reg["latent_party_reg"])
X_party_reg["latent_party_reg_no_geo"] = pd.to_numeric(X_party_reg["latent_party_reg_no_geo"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_party_reg["latent_party_reg"] = pd.to_numeric(X_party_reg["latent_party_reg"])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_party_reg["latent_party_reg_no_geo"] = pd.to_numeric(X_party_reg["latent_party_reg_no_geo"])


In [None]:
fmla_khb_party_reg = """latent_party_reg ~ C(party_reg_hom) + C(party_reg_alter) + C(party_reg_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) +
C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego) +
np.log10(radiation_pop) + C(same_state) + C(ruca_hom) + C(ruca_alter) + C(ruca_ego) + C(dens_diff)"""
res_khb_party_reg = fit_khb_model(fmla_khb_party_reg, X_party_reg)

In [29]:
fmla_khb_party_reg_no_geo = """latent_party_reg ~ C(party_reg_hom) + C(party_reg_alter) + C(party_reg_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) +
C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego)"""
res_khb_party_reg_no_geo = fit_khb_model(fmla_khb_party_reg_no_geo, X_party_reg)

In [34]:
return_khb_comp(res_khb_party_reg_no_geo, res_khb_party_reg)

                                    0     1   perc
Intercept                       -5.60 -3.21 -42.74
C(party_reg_hom)[T.Democrat]     0.71  0.49 -31.52
C(party_reg_hom)[T.Independent] -0.24 -0.22  -6.27
C(party_reg_hom)[T.Republican]   1.17  0.77 -34.22
C(race_hom)[T.African-American]  1.56  1.01 -35.20
C(race_hom)[T.Asian]             1.04  0.61 -41.58
C(race_hom)[T.Caucasian]         0.45  0.23 -48.65
C(race_hom)[T.Hispanic]          1.84  1.06 -42.25
C(gender_hom)[T.1]               0.35  0.30 -12.70
age_diff_log                    -1.95 -1.42 -26.94
C(same_state)[T.1]                NaN  0.97    NaN
C(ruca_hom)[T.metropolitan]       NaN  0.80    NaN
C(ruca_hom)[T.micropolitan]       NaN  0.19    NaN
C(ruca_hom)[T.small_town/rural]   NaN  0.48    NaN
C(dens_diff)[T.1]                 NaN  1.30    NaN
C(dens_diff)[T.2]                 NaN  0.31    NaN
C(dens_diff)[T.3]                 NaN  0.64    NaN
np.log10(radiation_pop)           NaN -0.67    NaN


In [30]:
fmla_khb_party_reg_no_dems = """latent_party_reg_no_geo ~ C(party_reg_hom) + C(party_reg_alter) + C(party_reg_ego) + age_diff_log + np.log2(age_ego) + np.log2(age_alter) +
C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego)"""
res_khb_party_reg_no_dems = fit_khb_model(fmla_khb_party_reg_no_dems, X_party_reg)

In [31]:
fmla_khb_party_reg_only = """latent_party_reg_no_geo ~ C(party_reg_hom) + C(party_reg_alter) + C(party_reg_ego)"""
res_khb_party_reg_only = fit_khb_model(fmla_khb_party_reg_only, X_party_reg)

In [35]:
return_khb_comp(res_khb_party_reg_only, res_khb_party_reg_no_dems)

                                    0     1     perc
Intercept                        0.42 -5.18 -1330.62
C(party_reg_hom)[T.Democrat]     0.64  0.56   -11.68
C(party_reg_hom)[T.Independent] -0.21 -0.20    -5.49
C(party_reg_hom)[T.Republican]   1.08  0.94   -13.07
C(race_hom)[T.African-American]   NaN  1.23      NaN
C(race_hom)[T.Asian]              NaN  0.82      NaN
C(race_hom)[T.Caucasian]          NaN  0.39      NaN
C(race_hom)[T.Hispanic]           NaN  1.46      NaN
C(gender_hom)[T.1]                NaN  0.29      NaN
age_diff_log                      NaN -1.61      NaN


In [33]:
fmla_khb_party_reg_dems_only = """latent_party_reg_no_geo ~ age_diff_log + np.log2(age_ego) + np.log2(age_alter) +
C(race_hom) + C(race_alter, Treatment(reference = 'Caucasian')) + C(race_ego, Treatment(reference = 'Caucasian')) + C(gender_hom) + C(gender_alter) + C(gender_ego)"""
res_khb_party_reg_dems_only = fit_khb_model(fmla_khb_party_reg_dems_only, X_party_reg)

In [36]:
return_khb_comp(res_khb_party_reg_dems_only, res_khb_party_reg_no_dems)

                                    0     1  perc
Intercept                       -5.40 -5.18 -4.04
C(race_hom)[T.African-American]  1.29  1.23 -4.78
C(race_hom)[T.Asian]             0.80  0.82  2.31
C(race_hom)[T.Caucasian]         0.41  0.39 -6.76
C(race_hom)[T.Hispanic]          1.44  1.46  1.95
C(gender_hom)[T.1]               0.30  0.29 -2.87
age_diff_log                    -1.68 -1.61 -4.26
C(party_reg_hom)[T.Democrat]      NaN  0.56   NaN
C(party_reg_hom)[T.Independent]   NaN -0.20   NaN
C(party_reg_hom)[T.Republican]    NaN  0.94   NaN


In [None]:
res_khb_party_reg.save("../Pickles/res_khb_party_reg.pickle")
res_khb_party_reg_no_geo.save("../Pickles/res_khb_party_reg_no_geo.pickle")
res_khb_party_reg_no_dems.save("../Pickles/res_khb_party_reg_no_dems.pickle")
fmla_khb_party_reg_only.save("../Pickles/res_khb_party_reg_only.pickle")
fmla_khb_party_reg_dems_only.save("../Pickles/res_khb_party_reg_dems_only.pickle")