# Import Relevant Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Prepare inputs and targets

In [2]:
tesla_sentiment = pd.read_csv("../data/Twitter_sentiment/tesla_sentiment_time_series.csv")
tesla_sentiment.head()

Unnamed: 0,date,binary_mean,binary_squared_mean,binary_sqrt_mean,prob_mean,prob_squared_mean,prob_sqrt_mean
0,1/3/2020,1.0,1.0,0.999,0.494,0.542,0.468
1,1/4/2020,1.0,1.0,0.999,0.407,0.403,0.409
2,1/5/2020,1.0,1.0,0.999,0.303,0.31,0.299
3,1/6/2020,-1.0,-1.0,-0.999,-0.698,-0.711,-0.69
4,1/7/2020,0.41,0.358,0.45,0.362,0.384,0.348


In [3]:
tesla_lstm = pd.read_csv("../data/LSTM/TSLA_LSTM_predictions.csv")
tesla_lstm.head()

Unnamed: 0.1,Unnamed: 0,Date,Pred,Actual
0,2393,1/2/2020,83.771358,86.052002
1,2394,1/3/2020,86.065369,88.601997
2,2395,1/6/2020,88.515733,90.307999
3,2396,1/7/2020,90.103062,93.811996
4,2397,1/8/2020,93.680037,98.428001


In [4]:
merged = pd.merge(tesla_sentiment, tesla_lstm, left_on='date', right_on='Date')

In [5]:
merged_changed = merged[['date', 'prob_sqrt_mean', 'Pred', 'Actual']]
merged_changed = merged_changed.dropna()

In [6]:
inputs_1 = merged_changed.prob_sqrt_mean.values
inputs_1

array([ 0.468, -0.69 ,  0.348, -0.078, -0.27 ,  0.333, -0.088, -0.39 ,
       -0.022, -0.203,  0.022,  0.242, -0.04 ,  0.592,  0.047,  0.08 ,
       -0.171,  0.039,  0.181,  0.064, -0.268,  0.162, -0.044,  0.316,
       -0.561,  0.369,  0.509,  0.242,  0.379,  0.102,  0.417,  0.136,
       -0.342, -0.089, -0.508, -0.063,  0.154,  0.605, -0.202, -0.362,
        0.785,  0.209, -0.257,  0.373, -0.331,  0.089,  0.189, -0.781,
        0.201,  0.322,  0.443,  0.504,  0.299,  0.082,  0.077, -0.306,
       -0.129,  0.285, -0.001,  0.037,  0.538, -0.022, -0.364,  0.067,
        0.309, -0.109,  0.327, -0.228, -0.267,  0.144, -0.089,  0.039,
        0.423,  0.005,  0.453, -0.154,  0.004, -0.168,  0.18 ,  0.354,
        0.004,  0.179, -0.073, -0.371,  0.36 ,  0.275,  0.396,  0.333,
        0.125,  0.051,  0.363,  0.636, -0.078, -0.21 ,  0.354,  0.108,
       -0.069,  0.31 ,  0.17 , -0.189, -0.112, -0.117,  0.153, -0.066,
        0.12 , -0.034, -0.155,  0.152,  0.106,  0.254,  0.06 , -0.159,
      

In [7]:
inputs_2 = merged_changed.Pred.values
inputs_2.shape

(208,)

In [8]:
inputs = np.stack((inputs_1, inputs_2), axis=1)
inputs.shape

(208, 2)

In [9]:
targets = merged_changed.iloc[:, 3].values

# Fit a model

In [10]:
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
inputs = sc.fit_transform(inputs)

inputs = sm.add_constant(inputs)
model = sm.OLS(targets, inputs)
results = model.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.987
Model:                            OLS   Adj. R-squared:                  0.987
Method:                 Least Squares   F-statistic:                     7612.
Date:                Mon, 15 Feb 2021   Prob (F-statistic):          4.45e-193
Time:                        22:27:04   Log-Likelihood:                -852.17
No. Observations:                 208   AIC:                             1710.
Df Residuals:                     205   BIC:                             1720.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        252.6079      1.017    248.463      0.0

# Save Results

In [11]:
predictions = results.predict()

In [12]:
tesla_stock_prediction = pd.DataFrame().assign(Date=merged_changed['date'], Predictions=predictions, 
                                                Actual=merged_changed['Actual'])
tesla_stock_prediction.to_csv("../Stock Price Predictions with Twitter/tesla_stock_prediction_twitter.csv")