In [1]:
from correlate_tweets import data_for_model

import warnings
warnings.simplefilter('ignore')

import matplotlib.pyplot as plt
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (15, 5),
         'axes.labelsize': 'x-large',
         'axes.titlesize':'x-large',
         'xtick.labelsize':'x-large',
         'ytick.labelsize':'x-large'}
plt.rcParams.update(params)

In [2]:
ts_all = data_for_model()

In [3]:
print(ts_all.columns)
print('total clinton tweets: {}'.format(ts_all.clinton.sum()))
print('total trump tweets: {}'.format(ts_all.trump.sum()))

Index(['days_from_debate', 'trump', 'clinton', 'metvi_all', 'metvi_trump_subj',
       'metvi_trump_obj', 'metvi_clinton_subj', 'metvi_clinton_obj',
       'metvi_msnbc', 'metvi_cnn', 'metvi_foxnews'],
      dtype='object')
total clinton tweets: 2084
total trump tweets: 1017


In [4]:
print(ts_all.head())

            days_from_debate  trump  clinton  metvi_all  metvi_trump_subj  \
2016-09-01                25     12       20   0.833333               1.0   
2016-09-02                24      5        9   1.800000               1.0   
2016-09-03                23      5        7   1.500000               1.0   
2016-09-04                22     14        6        NaN               0.0   
2016-09-05                21      6       12   1.000000               1.0   

            metvi_trump_obj  metvi_clinton_subj  metvi_clinton_obj  \
2016-09-01              1.0                 1.0                1.0   
2016-09-02              1.0                 0.0                1.5   
2016-09-03              0.0                 0.0                2.0   
2016-09-04              0.0                 0.0                0.0   
2016-09-05              0.0                 0.0                0.0   

            metvi_msnbc  metvi_cnn  metvi_foxnews  
2016-09-01          1.5        0.0            1.0  
2016-09-02  

In [20]:
import statsmodels.formula.api as smf

mod1 = smf.ols(formula='metvi_all ~ trump + clinton + days_from_debate',
              data=ts_all)

res1 = mod1.fit()

res1.aic

mod2 = smf.ols(formula='metvi_all ~ trump + clinton', data=ts_all)

res2 = mod2.fit()

print(res1.aic)
print(res2.aic)
print(np.exp(-abs(res1.aic - res2.aic)/2.0))

259.65573810434466
267.3218091620158
0.021643815385390522


In [23]:
print(res1.summary())

                            OLS Regression Results                            
Dep. Variable:              metvi_all   R-squared:                       0.441
Model:                            OLS   Adj. R-squared:                  0.417
Method:                 Least Squares   F-statistic:                     18.14
Date:                Sun, 16 Sep 2018   Prob (F-statistic):           8.80e-09
Time:                        23:49:30   Log-Likelihood:                -125.83
No. Observations:                  73   AIC:                             259.7
Df Residuals:                      69   BIC:                             268.8
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept            2.7121      0.462  

In [26]:
print(res1.params)
print(res1.rsquared)
print(res1.pvalues)

Intercept           2.712059
trump               0.054217
clinton             0.004831
days_from_debate   -0.057709
dtype: float64
0.44089221934300193
Intercept           1.368239e-07
trump               2.069181e-03
clinton             7.023570e-01
days_from_debate    2.596681e-03
dtype: float64


In [27]:
print(res2.params)
print(res2.rsquared)
print(res2.pvalues)

Intercept    1.531391
trump        0.052981
clinton      0.022549
dtype: float64
0.3617345921354125
Intercept    7.630842e-07
trump        4.319648e-03
clinton      6.275690e-02
dtype: float64


In [31]:
import statsmodels.formula.api as smf

msnbc = smf.ols(formula='metvi_msnbc ~ trump + clinton + days_from_debate',
              data=ts_all)

cnn = smf.ols(formula='metvi_cnn ~ trump + clinton + days_from_debate',
              data=ts_all)

foxnews = smf.ols(formula='metvi_foxnews ~ trump + clinton + days_from_debate',
              data=ts_all)

nets = ['msnbc', 'cnn', 'foxnews']

for idx, mod in enumerate([msnbc, cnn, foxnews]):
    res = mod.fit()
    print(nets[idx].upper())
    print(res.params, '\n')
    print(res.rsquared, '\n')
    print('Pvalues:\n', res.pvalues)
    print()

MSNBC
Intercept           1.488377
trump               0.028003
clinton             0.009853
days_from_debate   -0.017647
dtype: float64 

0.11385061239319261 

Intercept           0.006126
trump               0.373796
clinton             0.432930
days_from_debate    0.323905
dtype: float64

CNN
Intercept           2.233348
trump               0.051464
clinton             0.019136
days_from_debate   -0.034463
dtype: float64 

0.1608032942683042 

Intercept           0.013463
trump               0.306520
clinton             0.355842
days_from_debate    0.239220
dtype: float64

FOXNEWS
Intercept           3.817247
trump               0.041602
clinton             0.002816
days_from_debate   -0.097321
dtype: float64 

0.32108489986791244 

Intercept           7.438514e-07
trump               1.073845e-01
clinton             8.830670e-01
days_from_debate    8.875436e-04
dtype: float64



In [41]:
msnbc = smf.ols(formula='metvi_msnbc ~ trump',
              data=ts_all)

mres = msnbc.fit()
print(mres.summary())

msnbc = smf.ols(formula='metvi_msnbc ~ clinton',
              data=ts_all)

mres = msnbc.fit()
print(mres.summary())

msnbc = smf.ols(formula='metvi_msnbc ~ days_from_debate',
              data=ts_all)

mres = msnbc.fit()
mres.summary()

                            OLS Regression Results                            
Dep. Variable:            metvi_msnbc   R-squared:                       0.070
Model:                            OLS   Adj. R-squared:                  0.055
Method:                 Least Squares   F-statistic:                     4.615
Date:                Mon, 17 Sep 2018   Prob (F-statistic):             0.0357
Time:                        00:04:05   Log-Likelihood:                -102.56
No. Observations:                  63   AIC:                             209.1
Df Residuals:                      61   BIC:                             213.4
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.2251      0.289      4.235      0.0

0,1,2,3
Dep. Variable:,metvi_msnbc,R-squared:,0.078
Model:,OLS,Adj. R-squared:,0.063
Method:,Least Squares,F-statistic:,5.153
Date:,"Mon, 17 Sep 2018",Prob (F-statistic):,0.0267
Time:,00:04:05,Log-Likelihood:,-102.3
No. Observations:,63,AIC:,208.6
Df Residuals:,61,BIC:,212.9
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2.1915,0.251,8.717,0.000,1.689,2.694
days_from_debate,-0.0328,0.014,-2.270,0.027,-0.062,-0.004

0,1,2,3
Omnibus:,6.508,Durbin-Watson:,1.304
Prob(Omnibus):,0.039,Jarque-Bera (JB):,5.804
Skew:,0.722,Prob(JB):,0.0549
Kurtosis:,3.351,Cond. No.,27.9


In [42]:
ts_all.head()

Unnamed: 0,days_from_debate,trump,clinton,metvi_all,metvi_trump_subj,metvi_trump_obj,metvi_clinton_subj,metvi_clinton_obj,metvi_msnbc,metvi_cnn,metvi_foxnews
2016-09-01,25,12,20,0.833333,1.0,1.0,1.0,1.0,1.5,0.0,1.0
2016-09-02,24,5,9,1.8,1.0,1.0,0.0,1.5,3.0,2.0,1.0
2016-09-03,23,5,7,1.5,1.0,0.0,0.0,2.0,3.0,,0.0
2016-09-04,22,14,6,,0.0,0.0,0.0,0.0,,,
2016-09-05,21,6,12,1.0,1.0,0.0,0.0,0.0,,2.0,0.0


In [43]:
mod = smf.ols(formula='metvi_clinton_subj ~ trump + clinton + days_from_debate',
              data=ts_all)
res = mod.fit()
print(res.summary())

mod = smf.ols(formula='metvi_trump_subj ~ trump + clinton + days_from_debate',
              data=ts_all)
res = mod.fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:     metvi_clinton_subj   R-squared:                       0.272
Model:                            OLS   Adj. R-squared:                  0.247
Method:                 Least Squares   F-statistic:                     10.81
Date:                Mon, 17 Sep 2018   Prob (F-statistic):           4.14e-06
Time:                        00:08:56   Log-Likelihood:                -102.61
No. Observations:                  91   AIC:                             213.2
Df Residuals:                      87   BIC:                             223.3
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept            0.5284      0.219  

In [44]:
mod = smf.ols(formula='metvi_clinton_obj ~ trump + clinton + days_from_debate',
              data=ts_all)
res = mod.fit()
print(res.summary())

mod = smf.ols(formula='metvi_trump_obj ~ trump + clinton + days_from_debate',
              data=ts_all)
res = mod.fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:      metvi_clinton_obj   R-squared:                       0.255
Model:                            OLS   Adj. R-squared:                  0.229
Method:                 Least Squares   F-statistic:                     9.924
Date:                Mon, 17 Sep 2018   Prob (F-statistic):           1.08e-05
Time:                        00:09:33   Log-Likelihood:                -89.994
No. Observations:                  91   AIC:                             188.0
Df Residuals:                      87   BIC:                             198.0
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
Intercept            0.5568      0.191  

# Scratch work

In [None]:
x = np.array([[2, 3, 5], [10, 2, 3], [11, 16, 8], [0, 6, 15]])
x.shape

In [None]:
x.min(axis=1)

In [None]:
from datetime import datetime

debate_dates = np.array(['2016-09-27', '2016-10-02', '2016-10-06'], 
                        dtype='datetime64[D]')
dates = np.arange('2016-09-26', '2016-10-08', dtype='datetime64[D]')
diff = debate_dates[0] - dates[0]

diff.astype(int)

In [None]:
(debate_dates[0] - dates).astype(int)

In [None]:
dates - debate_dates

In [None]:
days_from_debate = np.array([date - debate_dates
                             for date in dates])


print(days_from_debate)
print(np.absolute(days_from_debate).min(axis=1))

In [None]:
d.astype(float)