# Import Relevant Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Prepare inputs and targets

In [2]:
amazon_sentiment = pd.read_csv("sentiment_time_series/amazon_sentiment_time_series.csv")
amazon_sentiment.head()

Unnamed: 0,date,binary_mean,binary_squared_mean,binary_sqrt_mean,prob_mean,prob_squared_mean,prob_sqrt_mean
0,1/1/2020,0.0,0.0,0.0,0.0,0.0,0.0
1,1/2/2020,0.0,0.0,0.0,0.0,0.0,0.0
2,1/3/2020,0.0,0.0,0.0,0.0,0.0,0.0
3,1/8/2020,0.0,0.0,0.0,0.0,0.0,0.0
4,1/9/2020,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
amazon_lstm = pd.read_csv("AMZN_LSTM_predictions.csv")
amazon_lstm.head()

Unnamed: 0.1,Unnamed: 0,Date,Pred,Actual
0,5694,1/2/2020,1871.227816,1898.01001
1,5695,1/3/2020,1918.217786,1874.969971
2,5696,1/6/2020,1893.154644,1902.880005
3,5697,1/7/2020,1925.583281,1906.859985
4,5698,1/8/2020,1924.934127,1891.969971


In [4]:
merged = pd.merge(amazon_sentiment, amazon_lstm, left_on='date', right_on='Date')

In [5]:
merged_changed = merged[['date', 'prob_sqrt_mean', 'Pred', 'Actual']]
merged_changed = merged_changed.dropna()

In [6]:
inputs_1 = merged_changed.prob_sqrt_mean.values
inputs_1

array([ 0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,  0.   ,
        0.   , -0.053,  0.228,  0.369,  0.476,  0.14 ,  0.364,  0.404,
        0.221,  0.   , -0.171,  0.466,  0.193,  0.28 ,  0.   ,  0.26 ,
       -0.202,  0.303,  0.188,  0.215, -0.074,  0.243,  0.502,  0.378,
        0.146,  0.283,  0.325,  0.265,  0.532,  0.135,  0.5  ,  0.352,
        0.095,  0.295,  0.303,  0.24 ,  0.468,  0.377,  0.3  ,  0.333,
        0.   ,  0.384,  0.407,  0.5  ,  0.   ,  0.662,  0.347,  0.169,
        0.021,  0.4  ,  0.166,  0.375,  0.027,  0.085,  0.125,  0.289,
        0.617,  0.383,  0.33 ,  0.309,  0.313,  0.406,  0.46 ,  0.142,
        0.   ,  0.   ,  0.   ,  0.85 ,  0.   ,  0.371,  0.175,  0.707,
        0.416,  0.343,  0.125,  0.513,  0.477,  0.31 ,  0.406,  0.42 ,
        0.361,  0.337,  0.337,  0.255,  0.365,  0.215,  0.208,  0.202,
        0.297,  0.32 ,  0.384,  0.308,  0.214,  0.736,  0.282,  0.431,
       -0.034,  0.037,  0.454,  0.454,  0.461,  0.489,  0.435,  0.564,
      

In [7]:
inputs_2 = merged_changed.Pred.values
inputs_2.shape

(156,)

In [8]:
inputs = np.stack((inputs_1, inputs_2), axis=1)
inputs.shape

(156, 2)

In [9]:
targets = merged_changed.iloc[:, 3].values

# Fit a model

In [10]:
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
inputs = sc.fit_transform(inputs)

inputs = sm.add_constant(inputs)
model = sm.OLS(targets, inputs)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.987
Model:                            OLS   Adj. R-squared:                  0.987
Method:                 Least Squares   F-statistic:                     5715.
Date:                Sat, 13 Feb 2021   Prob (F-statistic):          1.76e-144
Time:                        17:43:48   Log-Likelihood:                -861.72
No. Observations:                 156   AIC:                             1729.
Df Residuals:                     153   BIC:                             1739.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       2666.4879      4.902    543.927      0.0

# Save Results

In [11]:
predictions = results.predict()

In [12]:
amazon_stock_prediction = pd.DataFrame().assign(Date=merged_changed['date'], Predictions=predictions, 
                                                Actual=merged_changed['Actual'])
amazon_stock_prediction.to_csv("amazon_stock_prediction_reddit.csv")