# Import Relevant Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Prepare inputs and targets

In [2]:
amazon_sentiment = pd.read_csv("../data/Twitter_sentiment/amazon_sentiment_time_series.csv")
amazon_sentiment.head()

Unnamed: 0,date,binary_mean,binary_squared_mean,binary_sqrt_mean,prob_mean,prob_squared_mean,prob_sqrt_mean
0,1/2/2020,0.181,0.128,0.222,0.115,0.106,0.131
1,1/3/2020,0.34,0.328,0.325,0.282,0.271,0.266
2,1/4/2020,-0.509,-0.734,-0.343,-0.259,-0.447,-0.139
3,1/5/2020,-0.21,-0.436,-0.101,-0.151,-0.335,-0.067
4,1/6/2020,0.417,0.149,0.518,0.356,0.28,0.369


In [3]:
amazon_lstm = pd.read_csv("../data/LSTM/AMZN_LSTM_predictions.csv")
amazon_lstm.head()

Unnamed: 0.1,Unnamed: 0,Date,Pred,Actual
0,5694,1/2/2020,1871.227816,1898.01001
1,5695,1/3/2020,1918.217786,1874.969971
2,5696,1/6/2020,1893.154644,1902.880005
3,5697,1/7/2020,1925.583281,1906.859985
4,5698,1/8/2020,1924.934127,1891.969971


In [4]:
merged = pd.merge(amazon_sentiment, amazon_lstm, left_on='date', right_on='Date')

In [5]:
merged_changed = merged[['date', 'prob_sqrt_mean', 'Pred', 'Actual']]
merged_changed = merged_changed.dropna()

In [6]:
inputs_1 = merged_changed.prob_sqrt_mean.values
inputs_1

array([ 0.131,  0.266,  0.369,  0.057,  0.132,  0.196,  0.205,  0.203,
        0.246,  0.2  ,  0.324,  0.224,  0.459,  0.053,  0.241,  0.127,
        0.113,  0.192,  0.567,  0.273,  0.473,  0.342,  0.35 ,  0.275,
        0.228,  0.035,  0.625,  0.129,  0.264,  0.026,  0.076,  0.379,
        0.158,  0.143,  0.421, -0.001,  0.179,  0.221,  0.464,  0.391,
        0.199,  0.225,  0.238,  0.291,  0.301,  0.63 ,  0.483,  0.455,
       -0.088, -0.241,  0.37 ,  0.086,  0.305,  0.104,  0.069,  0.089,
        0.04 ,  0.237,  0.198,  0.396,  0.12 ,  0.206, -0.195,  0.327,
        0.015,  0.234,  0.406,  0.542,  0.074,  0.366,  0.342,  0.337,
        0.152, -0.25 ,  0.1  ,  0.099,  0.298,  0.334,  0.215,  0.148,
        0.248,  0.031,  0.196, -0.038,  0.199,  0.327,  0.277,  0.192,
        0.159,  0.084,  0.38 ,  0.225,  0.081, -0.012,  0.367,  0.097,
        0.204,  0.237,  0.186,  0.16 ,  0.293,  0.139,  0.344,  0.202,
        0.111,  0.223,  0.15 ,  0.134,  0.151,  0.239,  0.095,  0.16 ,
      

In [7]:
inputs_2 = merged_changed.Pred.values
inputs_2.shape

(221,)

In [8]:
inputs = np.stack((inputs_1, inputs_2), axis=1)
inputs.shape

(221, 2)

In [9]:
targets = merged_changed.iloc[:, 3].values

# Fit a model

In [10]:
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
inputs = sc.fit_transform(inputs)

inputs = sm.add_constant(inputs)
model = sm.OLS(targets, inputs)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.985
Model:                            OLS   Adj. R-squared:                  0.985
Method:                 Least Squares   F-statistic:                     7354.
Date:                Mon, 15 Feb 2021   Prob (F-statistic):          8.57e-201
Time:                        22:15:51   Log-Likelihood:                -1239.5
No. Observations:                 221   AIC:                             2485.
Df Residuals:                     218   BIC:                             2495.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       2608.8341      4.469    583.719      0.0

# Save Results

In [11]:
predictions = results.predict()

In [12]:
amazon_stock_prediction = pd.DataFrame().assign(Date=merged_changed['date'], Predictions=predictions, 
                                                Actual=merged_changed['Actual'])
amazon_stock_prediction.to_csv("../Stock Price Predictions with Twitter/amazon_stock_prediction_twitter.csv")