# Import Relevant Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Prepare inputs and targets

In [2]:
netflix_sentiment = pd.read_csv("../data/Twitter_sentiment/netflix_sentiment_time_series.csv")
netflix_sentiment.head()

Unnamed: 0,date,binary_mean,binary_squared_mean,binary_sqrt_mean,prob_mean,prob_squared_mean,prob_sqrt_mean
0,1/2/2020,0.509,0.604,0.453,0.351,0.401,0.322
1,1/3/2020,0.055,0.037,0.079,0.05,0.053,0.066
2,1/4/2020,-0.169,-0.048,-0.228,-0.074,-0.01,-0.111
3,1/5/2020,0.058,0.163,0.0,0.123,0.172,0.096
4,1/6/2020,0.42,0.408,0.427,0.328,0.361,0.32


In [3]:
netflix_lstm = pd.read_csv("../data/LSTM/NFLX_LSTM_predictions.csv")
netflix_lstm.head()

Unnamed: 0.1,Unnamed: 0,Date,Pred,Actual
0,4432,1/2/2020,322.256573,329.809998
1,4433,1/3/2020,328.395893,325.899994
2,4434,1/6/2020,323.763554,335.829987
3,4435,1/7/2020,333.938432,330.75
4,4436,1/8/2020,328.003192,339.26001


In [4]:
merged = pd.merge(netflix_sentiment, netflix_lstm, left_on='date', right_on='Date')

In [5]:
merged_changed = merged[['date', 'prob_sqrt_mean', 'Pred', 'Actual']]
merged_changed = merged_changed.dropna()

In [6]:
inputs_1 = merged_changed.prob_sqrt_mean.values
inputs_1

array([ 0.322,  0.066,  0.32 , -0.093,  0.047, -0.001,  0.06 ,  0.202,
        0.043,  0.386,  0.09 , -0.31 ,  0.431, -0.069,  0.01 ,  0.149,
        0.   , -0.218,  0.197,  0.194,  0.264,  0.134,  0.004,  0.187,
       -0.085, -0.111,  0.126,  0.14 ,  0.27 ,  0.154, -0.014,  0.189,
        0.264,  0.098, -0.107, -0.066,  0.212,  0.116, -0.124,  0.003,
        0.226,  0.143,  0.25 ,  0.233,  0.616,  0.259, -0.044,  0.295,
        0.097,  0.077,  0.065,  0.225,  0.001,  0.341,  0.137, -0.006,
       -0.032,  0.343,  0.082,  0.085,  0.182,  0.125,  0.073,  0.344,
       -0.13 , -0.018,  0.073,  0.163,  0.218,  0.076,  0.268,  0.035,
        0.187, -0.227, -0.082,  0.365,  0.431,  0.158,  0.121,  0.169,
        0.091,  0.134,  0.257,  0.244,  0.251,  0.07 , -0.028, -0.14 ,
        0.019,  0.204,  0.127,  0.033,  0.166,  0.011,  0.173,  0.228,
        0.156,  0.005,  0.096, -0.076,  0.162,  0.386,  0.037, -0.058,
       -0.019,  0.061,  0.202, -0.016,  0.029,  0.167, -0.046, -0.089,
      

In [7]:
inputs_2 = merged_changed.Pred.values
inputs_2.shape

(221,)

In [8]:
inputs = np.stack((inputs_1, inputs_2), axis=1)
inputs.shape

(221, 2)

In [9]:
targets = merged_changed.iloc[:, 3].values

# Fit a model

In [10]:
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
inputs = sc.fit_transform(inputs)

inputs = sm.add_constant(inputs)
model = sm.OLS(targets, inputs)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.959
Model:                            OLS   Adj. R-squared:                  0.959
Method:                 Least Squares   F-statistic:                     2571.
Date:                Mon, 15 Feb 2021   Prob (F-statistic):          2.56e-152
Time:                        22:25:43   Log-Likelihood:                -883.42
No. Observations:                 221   AIC:                             1773.
Df Residuals:                     218   BIC:                             1783.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        438.1981      0.892    491.026      0.0

# Save Results

In [11]:
predictions = results.predict()

In [12]:
netflix_stock_prediction = pd.DataFrame().assign(Date=merged_changed['date'], Predictions=predictions, 
                                                Actual=merged_changed['Actual'])
netflix_stock_prediction.to_csv("../Stock Price Predictions with Twitter/netflix_stock_prediction_twitter.csv")