# Import Relevant Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Prepare inputs and targets

In [2]:
google_sentiment = pd.read_csv("sentiment_time_series/google_sentiment_time_series.csv")
google_sentiment.head()

Unnamed: 0,date,binary_mean,binary_squared_mean,binary_sqrt_mean,prob_mean,prob_squared_mean,prob_sqrt_mean
0,1/1/2020,0.0,0.0,0.0,0.0,0.0,0.0
1,1/2/2020,0.0,0.0,0.0,0.0,0.0,0.0
2,1/5/2020,0.0,0.0,0.0,0.0,0.0,0.0
3,1/6/2020,0.0,0.0,0.0,0.0,0.0,0.0
4,1/7/2020,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
google_lstm = pd.read_csv("GOOGL_LSTM_predictions.csv")
google_lstm.head()

Unnamed: 0.1,Unnamed: 0,Date,Pred,Actual
0,3868,1/2/2020,1328.691596,1367.369995
1,3869,1/3/2020,1361.228533,1360.660034
2,3870,1/6/2020,1349.106885,1394.209961
3,3871,1/7/2020,1385.93088,1393.339966
4,3872,1/8/2020,1379.950078,1404.319946


In [4]:
merged = pd.merge(google_sentiment, google_lstm, left_on='date', right_on='Date')

In [5]:
merged_changed = merged[['date', 'prob_sqrt_mean', 'Pred', 'Actual']]
merged_changed = merged_changed.dropna()

In [6]:
inputs_1 = merged_changed.prob_sqrt_mean.values
inputs_1.shape

(154,)

In [7]:
inputs_2 = merged_changed.Pred.values
inputs_2.shape

(154,)

In [8]:
inputs = np.stack((inputs_1, inputs_2), axis=1)
inputs.shape

(154, 2)

In [9]:
targets = merged_changed.iloc[:, 3].values

# Fit a model

In [10]:
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
inputs = sc.fit_transform(inputs)

inputs = sm.add_constant(inputs)
model = sm.OLS(targets, inputs)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.943
Model:                            OLS   Adj. R-squared:                  0.943
Method:                 Least Squares   F-statistic:                     1259.
Date:                Sat, 13 Feb 2021   Prob (F-statistic):           6.74e-95
Time:                        17:54:39   Log-Likelihood:                -760.53
No. Observations:                 154   AIC:                             1527.
Df Residuals:                     151   BIC:                             1536.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       1443.1364      2.748    525.129      0.0

# Save results

In [11]:
predictions = results.predict()

In [12]:
google_stock_prediction = pd.DataFrame().assign(Date=merged_changed['date'], Predictions=predictions, 
                                                Actual=merged_changed['Actual'])
google_stock_prediction.to_csv("google_stock_prediction_reddit.csv")