# Import Relevant Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Prepare inputs and targets

In [2]:
apple_sentiment = pd.read_csv("../data/Twitter_sentiment/apple_sentiment_time_series.csv")
apple_sentiment.head()

Unnamed: 0,date,binary_mean,binary_squared_mean,binary_sqrt_mean,prob_mean,prob_squared_mean,prob_sqrt_mean
0,1/2/2020,1.0,1.0,1.0,0.587,0.64,0.564
1,1/3/2020,0.466,0.689,0.335,0.216,0.3,0.16
2,1/4/2020,0.476,0.715,0.337,0.348,0.471,0.271
3,1/5/2020,0.781,0.942,0.603,0.626,0.76,0.476
4,1/6/2020,-0.431,-0.201,-0.553,-0.149,0.075,-0.275


In [3]:
apple_lstm = pd.read_csv("../data/LSTM/AAPL_LSTM_predictions.csv")
apple_lstm.head()

Unnamed: 0.1,Unnamed: 0,Date,Pred,Actual
0,7559,1/2/2020,71.98671,74.444603
1,7560,1/3/2020,73.601325,73.72084
2,7561,1/6/2020,72.764445,74.308266
3,7562,1/7/2020,73.520607,73.958794
4,7563,1/8/2020,73.079492,75.148521


In [4]:
merged = pd.merge(apple_sentiment, apple_lstm, left_on='date', right_on='Date')

In [5]:
merged_changed = merged[['date', 'prob_sqrt_mean', 'Pred', 'Actual']]
merged_changed = merged_changed.dropna()

In [6]:
inputs_1 = merged_changed.prob_sqrt_mean.values
inputs_1

array([ 0.564,  0.16 , -0.275,  0.285,  0.729,  0.416,  0.395, -0.073,
        0.187, -0.285,  0.015,  0.372,  0.351,  0.148,  0.492,  0.208,
        0.498, -0.35 ,  0.304,  0.136,  0.342,  0.252,  0.247,  0.152,
       -0.071,  0.484,  0.501,  0.419,  0.079,  0.741,  0.354,  0.417,
        0.049,  0.201,  0.57 ,  0.567,  0.44 ,  0.334,  0.259,  0.76 ,
        0.116,  0.308,  0.499,  0.055,  0.468,  0.574,  0.314, -0.66 ,
        0.677,  0.518,  0.571,  0.358,  0.585,  0.023, -0.02 ,  0.243,
        0.192, -0.143,  0.04 ,  0.489,  0.151, -0.059,  0.259,  0.215,
        0.294,  0.408,  0.27 ,  0.509,  0.416,  0.131,  0.474,  0.003,
       -0.51 ,  0.187,  0.039,  0.46 , -0.011,  0.046,  0.111,  0.663,
        0.217, -0.224,  0.597,  0.302,  0.153, -0.147,  0.114,  0.367,
        0.118,  0.338,  0.274, -0.055,  0.097,  0.082, -0.355,  0.05 ,
       -0.197,  0.034,  0.042,  0.022,  0.245, -0.27 ,  0.021,  0.442,
        0.01 , -0.033, -0.023, -0.02 ,  0.105, -0.301, -0.549, -0.053,
      

In [7]:
inputs_2 = merged_changed.Pred.values
inputs_2.shape

(221,)

In [8]:
inputs = np.stack((inputs_1, inputs_2), axis=1)
inputs.shape

(221, 2)

In [9]:
targets = merged_changed.iloc[:, 3].values

# Fit a model

In [10]:
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
inputs = sc.fit_transform(inputs)

inputs = sm.add_constant(inputs)
model = sm.OLS(targets, inputs)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.982
Model:                            OLS   Adj. R-squared:                  0.982
Method:                 Least Squares   F-statistic:                     6111.
Date:                Mon, 15 Feb 2021   Prob (F-statistic):          3.60e-192
Time:                        22:20:12   Log-Likelihood:                -529.64
No. Observations:                 221   AIC:                             1065.
Df Residuals:                     218   BIC:                             1075.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         90.8204      0.180    504.463      0.0

# Save Results

In [11]:
predictions = results.predict()

In [12]:
apple_stock_prediction = pd.DataFrame().assign(Date=merged_changed['date'], Predictions=predictions, 
                                                Actual=merged_changed['Actual'])
apple_stock_prediction.to_csv("../Stock Price Predictions with Twitter/apple_stock_prediction_twitter.csv")