In [257]:
import pandas as pd
import statsmodels.api as sm
import datetime as datetime
import numpy as np

def zscore(df, col):
    col_mean = df[col].rolling(window=10).mean()
    col_std = df[col].rolling(window=10).std()
    return (df[col] - col_mean)/col_std

df = pd.read_csv('./data/11-26/daily_plutchik_threshold_5.csv', parse_dates=['month/day'])
approval_rating_df = pd.read_csv('./data/approval_polllist.csv', parse_dates=["enddate"])

df["Anticipation Z-Score"] = zscore(df, "Anticipation")
df["Trust Z-Score"] = zscore(df, "Trust")
df["Surprise Z-Score"] = zscore(df, "Surprise")
df["Sadness Z-Score"] = zscore(df, "Sadness")
df["Joy Z-Score"] = zscore(df, "Joy")
df["Fear Z-Score"] = zscore(df, "Fear")
df["Disgust Z-Score"] = zscore(df, "Disgust")
df["Anger Z-Score"] = zscore(df, "Anger")

start_date = datetime.datetime(2017, 7, 12, 0, 0)
end_date = datetime.datetime(2017, 12, 31, 0, 0)

truncated_df = df[(df["month/day"] > start_date) & (df["month/day"] < end_date)]
truncated_df.head()
original_emotions = truncated_df[['Anger', 'Disgust', 'Fear', 'Joy', 'Sadness', 'Surprise', 'Trust', 'Anticipation']]
original_emotions.head()

Unnamed: 0,Anger,Disgust,Fear,Joy,Sadness,Surprise,Trust,Anticipation
66,0.025688,0.022018,0.036697,0.122936,0.027523,0.13211,0.620183,0.012844
67,0.015248,0.017789,0.054638,0.121982,0.034307,0.113088,0.635324,0.007624
68,0.013717,0.019204,0.046639,0.130316,0.038409,0.116598,0.626886,0.00823
69,0.022744,0.030814,0.038151,0.118855,0.031548,0.131328,0.62069,0.005869
70,0.022989,0.007663,0.034483,0.099617,0.034483,0.061303,0.735632,0.003831


In [258]:
unnormalized_df = truncated_df[['Anticipation', 'Trust', 'Sadness', 'Joy', 'Fear', 'Disgust', 'Anger']]
unnormalized_extra_df = truncated_df[['Anticipation', 'Trust', 'Surprise', 'Sadness', 'Joy', 'Fear', 'Disgust', 'Anger']]
truncated_df = truncated_df[['Anticipation Z-Score', 'Trust Z-Score', 'Surprise Z-Score', 
                             'Sadness Z-Score', 'Joy Z-Score', 'Disgust Z-Score', 'Anger Z-Score']]
truncated_df.head()

Unnamed: 0,Anticipation Z-Score,Trust Z-Score,Surprise Z-Score,Sadness Z-Score,Joy Z-Score,Disgust Z-Score,Anger Z-Score
66,2.042746,-0.365817,0.746002,-0.439052,0.149226,-0.375811,0.840355
67,0.43993,-0.282787,-0.206227,0.724617,1.160845,-0.887824,-0.794612
68,0.569221,-0.479068,-0.084044,1.187529,1.493276,-0.772361,-0.864009
69,-0.159566,-0.980581,0.810779,-0.120901,0.546484,1.081614,0.366512
70,-0.702904,2.429096,-2.111836,0.366478,-1.014103,-2.029214,0.673508


In [259]:
truncated_approvals = approval_rating_df.loc[approval_rating_df['pollster'] == 'Ipsos']
truncated_approvals = truncated_approvals.loc[truncated_approvals['subgroup'] == 'All polls']
truncated_approvals = truncated_approvals[(truncated_approvals['enddate'] > start_date) & (truncated_approvals['enddate'] < end_date)]
non_adjusted_approvals = truncated_approvals['approve']
truncated_approvals = truncated_approvals['adjusted_approve']
truncated_approvals.head()
non_adjusted_approvals.head()

655    40.6
658    41.5
662    40.2
665    40.2
670    37.7
Name: approve, dtype: float64

In [260]:
truncated_approvals = truncated_approvals.reset_index(drop=True)
non_adjusted_approvals = non_adjusted_approvals.reset_index(drop=True)
truncated_df = truncated_df.reset_index(drop=True)
unnormalized_df = unnormalized_df.reset_index(drop=True)
unnormalized_extra_df = unnormalized_extra_df.reset_index(drop=True)
original_emotions = original_emotions.reset_index(drop=True)

training_approvals = truncated_approvals[:154]
test_approvals = truncated_approvals[154:]
training_data = truncated_df[:154]
test_data = truncated_df[154:]
non_adjusted_approvals.head()
print(len(non_adjusted_approvals), len(training_approvals), len(training_data))

171 154 154


In [261]:
trained_model = sm.OLS(training_approvals, training_data).fit()
trained_model.summary()

0,1,2,3
Dep. Variable:,adjusted_approve,R-squared (uncentered):,0.026
Model:,OLS,Adj. R-squared (uncentered):,-0.021
Method:,Least Squares,F-statistic:,0.5533
Date:,"Mon, 02 Dec 2019",Prob (F-statistic):,0.793
Time:,13:02:11,Log-Likelihood:,-775.41
No. Observations:,154,AIC:,1565.0
Df Residuals:,147,BIC:,1586.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Anticipation Z-Score,2.3444,3.227,0.727,0.469,-4.032,8.721
Trust Z-Score,6.5734,5.125,1.283,0.202,-3.555,16.702
Surprise Z-Score,1.2037,4.239,0.284,0.777,-7.173,9.580
Sadness Z-Score,2.7671,3.560,0.777,0.438,-4.268,9.803
Joy Z-Score,7.3837,4.105,1.799,0.074,-0.728,15.496
Disgust Z-Score,4.8174,4.055,1.188,0.237,-3.197,12.831
Anger Z-Score,2.8784,3.740,0.770,0.443,-4.513,10.270

0,1,2,3
Omnibus:,2.421,Durbin-Watson:,0.038
Prob(Omnibus):,0.298,Jarque-Bera (JB):,2.184
Skew:,0.106,Prob(JB):,0.335
Kurtosis:,3.543,Cond. No.,3.49


In [262]:
predictions = trained_model.predict(test_data)
## The line / model
import matplotlib.pyplot as plt
plt.scatter(test_approvals, predictions)
plt.xlabel("True Values")
plt.ylabel("Predictions")
# print("Score:", trained_model.score(test_data, test_approvals))

Text(0,0.5,'Predictions')

In [263]:
model = sm.OLS(truncated_approvals, truncated_df).fit()

In [264]:
predictions = model.predict(truncated_df)

model.summary()

0,1,2,3
Dep. Variable:,adjusted_approve,R-squared (uncentered):,0.03
Model:,OLS,Adj. R-squared (uncentered):,-0.012
Method:,Least Squares,F-statistic:,0.7168
Date:,"Mon, 02 Dec 2019",Prob (F-statistic):,0.658
Time:,13:02:14,Log-Likelihood:,-860.39
No. Observations:,171,AIC:,1735.0
Df Residuals:,164,BIC:,1757.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Anticipation Z-Score,1.2409,2.945,0.421,0.674,-4.575,7.056
Trust Z-Score,5.0968,4.864,1.048,0.296,-4.507,14.701
Surprise Z-Score,1.5615,3.970,0.393,0.695,-6.278,9.401
Sadness Z-Score,1.3518,3.333,0.406,0.686,-5.230,7.933
Joy Z-Score,8.0051,3.947,2.028,0.044,0.212,15.799
Disgust Z-Score,3.2651,3.877,0.842,0.401,-4.389,10.920
Anger Z-Score,4.5966,3.462,1.328,0.186,-2.238,11.432

0,1,2,3
Omnibus:,2.585,Durbin-Watson:,0.046
Prob(Omnibus):,0.275,Jarque-Bera (JB):,2.543
Skew:,-0.062,Prob(JB):,0.28
Kurtosis:,3.585,Cond. No.,3.56


In [265]:
model = sm.OLS(truncated_approvals, unnormalized_extra_df).fit()
model.summary()

0,1,2,3
Dep. Variable:,adjusted_approve,R-squared:,0.084
Model:,OLS,Adj. R-squared:,0.045
Method:,Least Squares,F-statistic:,2.134
Date:,"Mon, 02 Dec 2019",Prob (F-statistic):,0.0428
Time:,13:02:15,Log-Likelihood:,-303.29
No. Observations:,171,AIC:,622.6
Df Residuals:,163,BIC:,647.7
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Anticipation,46.8945,26.920,1.742,0.083,-6.263,100.052
Trust,39.7977,1.054,37.760,0.000,37.716,41.879
Surprise,31.2031,5.646,5.526,0.000,20.054,42.352
Sadness,36.2345,12.394,2.924,0.004,11.762,60.707
Joy,32.5687,3.388,9.612,0.000,25.878,39.260
Fear,33.3753,5.959,5.601,0.000,21.608,45.143
Disgust,13.6908,10.785,1.269,0.206,-7.607,34.988
Anger,63.1816,11.217,5.633,0.000,41.033,85.330

0,1,2,3
Omnibus:,11.07,Durbin-Watson:,0.471
Prob(Omnibus):,0.004,Jarque-Bera (JB):,11.299
Skew:,0.586,Prob(JB):,0.00352
Kurtosis:,3.461,Cond. No.,158.0


# Topic Analysis

In [266]:
topic_df = pd.read_excel('./data/TopicScoresPerDay_Final.xlsx')
dated_topic_df = topic_df[(topic_df['Filename'] > 65) & (topic_df['Filename'] < 237)]
dated_topic_df.head()

Unnamed: 0,Filename,Segment,WC,WPS,Sixltr,Dic,topic1,topic2,topic3,topic4,...,OtherP,Em1,Em2,Em3,Em4,Em5,Em6,Em7,Em8,Gallup Value
65,66,1,46417,46417,15.96,37.16,5.83,0.0,0.0,0.03,...,0.01,,,,,,,,,
66,67,1,13122,13122,15.54,37.54,6.46,0.0,0.0,0.05,...,0.0,,,,,,,,,
67,68,1,19001,19001,16.57,36.51,6.12,0.0,0.0,0.04,...,0.0,,,,,,,,,
68,69,1,17572,17572,16.09,36.94,6.2,0.0,0.01,0.01,...,0.0,,,,,,,,,
69,70,1,35954,35954,17.69,36.89,6.08,0.0,0.0,0.03,...,0.0,,,,,,,,,


In [267]:
truncated_topic_df = dated_topic_df.drop(['Filename', 'Segment', 'WC', 'WPS', 'Sixltr', 'Dic', 'AllPunc', 'Period', 'Comma', 'Colon', 'SemiC', 'QMark', 'Exclam', 'Dash', 'Quote', 'Apostro', 'Parenth', 'OtherP', 'Em1', 'Em2', 'Em3', 'Em4', 'Em5', 'Em6', 'Em7', 'Em8', 'Gallup Value'], axis=1)
full_dates_df = topic_df.drop(['Segment', 'WC', 'WPS', 'Sixltr', 'Dic', 'AllPunc', 'Period', 'Comma', 'Colon', 'SemiC', 'QMark', 'Exclam', 'Dash', 'Quote', 'Apostro', 'Parenth', 'OtherP', 'Em1', 'Em2', 'Em3', 'Em4', 'Em5', 'Em6', 'Em7', 'Em8', 'Gallup Value'], axis=1)
truncated_topic_df.head()

Unnamed: 0,topic1,topic2,topic3,topic4,topic5,topic6,topic7,topic8,topic9,topic10,...,topic492,topic493,topic494,topic495,topic496,topic497,topic498,topic499,topic500,topic501
65,5.83,0.0,0.0,0.03,0.0,0.01,0.47,0.37,0.1,0.67,...,0.05,0.75,0.19,0.63,0.27,0.31,0.29,0.36,0.71,0.26
66,6.46,0.0,0.0,0.05,0.0,0.01,0.64,0.57,0.19,0.65,...,0.06,0.92,0.21,0.55,0.43,0.3,0.25,0.39,0.77,0.3
67,6.12,0.0,0.0,0.04,0.0,0.01,0.46,0.65,0.08,0.49,...,0.06,0.7,0.16,0.56,0.37,0.34,0.32,0.27,0.48,0.31
68,6.2,0.0,0.01,0.01,0.0,0.02,0.61,0.51,0.1,0.69,...,0.06,0.55,0.15,0.73,0.41,0.36,0.28,0.24,0.39,0.38
69,6.08,0.0,0.0,0.03,0.0,0.02,0.47,0.5,0.07,0.67,...,0.05,0.76,0.24,0.76,0.27,0.2,0.28,0.34,0.63,0.29


In [268]:
print(len(truncated_topic_df))
print(len(truncated_approvals))
truncated_topic_df = truncated_topic_df.reset_index(drop=True)
truncated_approvals.head()

171
171


0    41.35173
1    42.25173
2    40.95173
3    40.95173
4    38.45173
Name: adjusted_approve, dtype: float64

In [269]:
model = sm.OLS(truncated_approvals, truncated_topic_df).fit()
model.summary()

  return 1 - (np.divide(self.nobs - self.k_constant, self.df_resid)
  * (1 - self.rsquared))
  return self.ssr/self.df_resid
  return np.dot(wresid, wresid) / self.df_resid


0,1,2,3
Dep. Variable:,adjusted_approve,R-squared:,1.0
Model:,OLS,Adj. R-squared:,
Method:,Least Squares,F-statistic:,0.0
Date:,"Mon, 02 Dec 2019",Prob (F-statistic):,
Time:,13:02:22,Log-Likelihood:,4865.3
No. Observations:,171,AIC:,-9389.0
Df Residuals:,0,BIC:,-8851.0
Df Model:,170,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
topic1,-0.1166,inf,-0,,,
topic2,-1.3e-14,inf,-0,,,
topic3,0.7427,inf,0,,,
topic4,0.5159,inf,0,,,
topic5,-0.3354,inf,-0,,,
topic6,-0.1681,inf,-0,,,
topic7,0.2423,inf,0,,,
topic8,0.5517,inf,0,,,
topic9,-0.2845,inf,-0,,,

0,1,2,3
Omnibus:,0.558,Durbin-Watson:,0.253
Prob(Omnibus):,0.756,Jarque-Bera (JB):,0.255
Skew:,0.011,Prob(JB):,0.88
Kurtosis:,3.188,Cond. No.,523.0


In [270]:
from sklearn.linear_model import ElasticNet
from sklearn.datasets import make_regression
truncated_approvals.head()

0    41.35173
1    42.25173
2    40.95173
3    40.95173
4    38.45173
Name: adjusted_approve, dtype: float64

In [271]:
regr = ElasticNet(random_state=0)
regr.fit(unnormalized_extra_df, truncated_approvals)

ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
      max_iter=1000, normalize=False, positive=False, precompute=False,
      random_state=0, selection='cyclic', tol=0.0001, warm_start=False)

In [272]:
print(regr.coef_)

[-0.  0. -0.  0. -0. -0. -0.  0.]


In [273]:
import statsmodels.stats.stattools as st
st.durbin_watson(unnormalized_extra_df)

array([ 0.4040979974,  0.0053101663,  0.0545414422,  0.1421249161,
        0.0635636747,  0.1185536291,  0.1942353841,  0.1636871334])

In [274]:
st.durbin_watson(truncated_df)

array([ 1.670579925 ,  1.7029164864,  1.4779579957,  1.7649995919,
        1.4354337143,  1.5270837484,  1.6964359888])

In [275]:
from sklearn import linear_model
from sklearn.model_selection import KFold
from sklearn.cross_validation import cross_val_score, cross_val_predict
from sklearn import metrics

lm = linear_model.LinearRegression()
model = lm.fit(training_data, training_approvals)
predictions = lm.predict(test_data)
print(lm.score(test_data, test_approvals))

-0.180977129481


In [276]:
full_data = pd.concat([truncated_df, truncated_topic_df], axis=1)
full_data.head()

Unnamed: 0,Anticipation Z-Score,Trust Z-Score,Surprise Z-Score,Sadness Z-Score,Joy Z-Score,Disgust Z-Score,Anger Z-Score,topic1,topic2,topic3,...,topic492,topic493,topic494,topic495,topic496,topic497,topic498,topic499,topic500,topic501
0,2.042746,-0.365817,0.746002,-0.439052,0.149226,-0.375811,0.840355,5.83,0.0,0.0,...,0.05,0.75,0.19,0.63,0.27,0.31,0.29,0.36,0.71,0.26
1,0.43993,-0.282787,-0.206227,0.724617,1.160845,-0.887824,-0.794612,6.46,0.0,0.0,...,0.06,0.92,0.21,0.55,0.43,0.3,0.25,0.39,0.77,0.3
2,0.569221,-0.479068,-0.084044,1.187529,1.493276,-0.772361,-0.864009,6.12,0.0,0.0,...,0.06,0.7,0.16,0.56,0.37,0.34,0.32,0.27,0.48,0.31
3,-0.159566,-0.980581,0.810779,-0.120901,0.546484,1.081614,0.366512,6.2,0.0,0.01,...,0.06,0.55,0.15,0.73,0.41,0.36,0.28,0.24,0.39,0.38
4,-0.702904,2.429096,-2.111836,0.366478,-1.014103,-2.029214,0.673508,6.08,0.0,0.0,...,0.05,0.76,0.24,0.76,0.27,0.2,0.28,0.34,0.63,0.29


In [286]:
kf = KFold(n_splits=2) # Define the split - into 6 folds 
kf.get_n_splits(full_data) 

# Perform 6-fold cross validation
model = linear_model.LinearRegression()
scores = cross_val_score(model, full_data, truncated_approvals, cv=2)
print("Cross-validated scores:", scores)

elastic_model = ElasticNet(random_state=0)
elastic_scores = cross_val_score(elastic_model, full_data, truncated_approvals, cv=2)
print("Elastic scores:", elastic_scores)

Cross-validated scores: [-1.3036052765 -1.1283350805]
Elastic scores: [-0.3116845849 -0.5968009129]


In [278]:
from statsmodels.tsa.stattools import grangercausalitytests
for (columnName, columnData) in full_dates_df.iteritems():
    if columnData.hasnans:
        print("wtf", columnName)
    if columnName == 'Filename':
        continue
    z_scored = zscore(full_dates_df, columnName)
    z_scored = z_scored[65:236]
    z_scored.fillna(0)
    approvals_array = truncated_approvals.values
    z_score_array = z_scored.values
    yo_mama = np.asarray([approvals_array, z_score_array]).T
#     print(yo_mama)
#     print(z_score_array)
#     print(len(z_score_array))
#     print(approvals_array)
#     print(len(approvals_array))
    x = np.asarray([truncated_approvals.values, z_scored.values]).T
    print(columnName, ":", )
    print(grangercausalitytests(x, maxlag=3))

topic1 :

Granger Causality
number of lags (no zero) 1
ssr based F test:         F=1.3883  , p=0.2404  , df_denom=167, df_num=1
ssr based chi2 test:   chi2=1.4132  , p=0.2345  , df=1
likelihood ratio test: chi2=1.4074  , p=0.2355  , df=1
parameter F test:         F=1.3883  , p=0.2404  , df_denom=167, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=2.1481  , p=0.1200  , df_denom=164, df_num=2
ssr based chi2 test:   chi2=4.4271  , p=0.1093  , df=2
likelihood ratio test: chi2=4.3701  , p=0.1125  , df=2
parameter F test:         F=2.1481  , p=0.1200  , df_denom=164, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=1.2670  , p=0.2876  , df_denom=161, df_num=3
ssr based chi2 test:   chi2=3.9663  , p=0.2651  , df=3
likelihood ratio test: chi2=3.9202  , p=0.2702  , df=3
parameter F test:         F=1.2670  , p=0.2876  , df_denom=161, df_num=3
{1: ({'ssr_ftest': (1.3882990785552258, 0.24036876528210441, 167.0, 1), 'ssr_chi2test

MissingDataError: exog contains inf or nans

In [279]:
model = sm.OLS(truncated_approvals, full_data).fit()
model.summary()

  return 1 - (np.divide(self.nobs - self.k_constant, self.df_resid)
  * (1 - self.rsquared))
  return self.ssr/self.df_resid
  return np.dot(wresid, wresid) / self.df_resid


0,1,2,3
Dep. Variable:,adjusted_approve,R-squared:,1.0
Model:,OLS,Adj. R-squared:,
Method:,Least Squares,F-statistic:,0.0
Date:,"Mon, 02 Dec 2019",Prob (F-statistic):,
Time:,13:02:37,Log-Likelihood:,4738.6
No. Observations:,171,AIC:,-9135.0
Df Residuals:,0,BIC:,-8598.0
Df Model:,170,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Anticipation Z-Score,0.2247,inf,0,,,
Trust Z-Score,-0.1605,inf,-0,,,
Surprise Z-Score,-0.0921,inf,-0,,,
Sadness Z-Score,-0.1765,inf,-0,,,
Joy Z-Score,-0.4645,inf,-0,,,
Disgust Z-Score,-0.0752,inf,-0,,,
Anger Z-Score,-0.1045,inf,-0,,,
topic1,-0.4555,inf,-0,,,
topic2,1.525e-14,inf,0,,,

0,1,2,3
Omnibus:,4.886,Durbin-Watson:,0.045
Prob(Omnibus):,0.087,Jarque-Bera (JB):,5.86
Skew:,0.171,Prob(JB):,0.0534
Kurtosis:,3.84,Cond. No.,510.0


In [280]:
truncated_approvals.head()

0    41.35173
1    42.25173
2    40.95173
3    40.95173
4    38.45173
Name: adjusted_approve, dtype: float64

In [281]:
full_data.head()

Unnamed: 0,Anticipation Z-Score,Trust Z-Score,Surprise Z-Score,Sadness Z-Score,Joy Z-Score,Disgust Z-Score,Anger Z-Score,topic1,topic2,topic3,...,topic492,topic493,topic494,topic495,topic496,topic497,topic498,topic499,topic500,topic501
0,2.042746,-0.365817,0.746002,-0.439052,0.149226,-0.375811,0.840355,5.83,0.0,0.0,...,0.05,0.75,0.19,0.63,0.27,0.31,0.29,0.36,0.71,0.26
1,0.43993,-0.282787,-0.206227,0.724617,1.160845,-0.887824,-0.794612,6.46,0.0,0.0,...,0.06,0.92,0.21,0.55,0.43,0.3,0.25,0.39,0.77,0.3
2,0.569221,-0.479068,-0.084044,1.187529,1.493276,-0.772361,-0.864009,6.12,0.0,0.0,...,0.06,0.7,0.16,0.56,0.37,0.34,0.32,0.27,0.48,0.31
3,-0.159566,-0.980581,0.810779,-0.120901,0.546484,1.081614,0.366512,6.2,0.0,0.01,...,0.06,0.55,0.15,0.73,0.41,0.36,0.28,0.24,0.39,0.38
4,-0.702904,2.429096,-2.111836,0.366478,-1.014103,-2.029214,0.673508,6.08,0.0,0.0,...,0.05,0.76,0.24,0.76,0.27,0.2,0.28,0.34,0.63,0.29


In [282]:
to_upload = pd.concat([truncated_topic_df, truncated_df, original_emotions, truncated_approvals, non_adjusted_approvals], axis=1)
to_upload = to_upload.rename(columns={'adjusted_approve': 'Ipsos Adjusted Approval', 'approve': 'Ipsos Approval'})
to_upload.head()

Unnamed: 0,topic1,topic2,topic3,topic4,topic5,topic6,topic7,topic8,topic9,topic10,...,Anger,Disgust,Fear,Joy,Sadness,Surprise,Trust,Anticipation,Ipsos Adjusted Approval,Ipsos Approval
0,5.83,0.0,0.0,0.03,0.0,0.01,0.47,0.37,0.1,0.67,...,0.025688,0.022018,0.036697,0.122936,0.027523,0.13211,0.620183,0.012844,41.35173,40.6
1,6.46,0.0,0.0,0.05,0.0,0.01,0.64,0.57,0.19,0.65,...,0.015248,0.017789,0.054638,0.121982,0.034307,0.113088,0.635324,0.007624,42.25173,41.5
2,6.12,0.0,0.0,0.04,0.0,0.01,0.46,0.65,0.08,0.49,...,0.013717,0.019204,0.046639,0.130316,0.038409,0.116598,0.626886,0.00823,40.95173,40.2
3,6.2,0.0,0.01,0.01,0.0,0.02,0.61,0.51,0.1,0.69,...,0.022744,0.030814,0.038151,0.118855,0.031548,0.131328,0.62069,0.005869,40.95173,40.2
4,6.08,0.0,0.0,0.03,0.0,0.02,0.47,0.5,0.07,0.67,...,0.022989,0.007663,0.034483,0.099617,0.034483,0.061303,0.735632,0.003831,38.45173,37.7


In [283]:
to_upload.to_csv("All_data_merged.csv")