In [21]:
import pandas as pd
import numpy as np
from pathlib import Path
from joblib import dump, load
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor

%matplotlib inline


import warnings
warnings.filterwarnings('ignore')

import pydotplus
from IPython.display import Image

In [22]:
elon_csv_path = Path('../group1-project2-main/elon_df.csv')
elon_signals_df = pd.read_csv(elon_csv_path)
tsla_csv_path = Path('../group1-project2-main/TSLA_prices_df.csv')
tsla_results_df = pd.read_csv(tsla_csv_path)
total_data_path = Path('../group1-project2-main/total_data.csv')
total_data = pd.read_csv(total_data_path)

In [23]:
total_data.head()

Unnamed: 0.1,Unnamed: 0,date,TSLA,earnings flag,tweet_length,encoded_sentiment,sentiment,polairty,tweet
0,1970-01-01 00:00:00.000000361,2011-12-01,-0.028,0.0,118.0,0.0,neutral,0.0,I made the volume on the Model S http://t.co/...
1,1970-01-01 00:00:00.000000362,2011-12-01,-0.028,0.0,141.0,1.0,positive,0.65,Went to Iceland on Sat to ride bumper cars on ...
2,1970-01-01 00:00:00.000000376,2011-12-21,-0.066,0.0,62.0,1.0,positive,0.25,Yum! Even better than deep fried butter: http...
3,1970-01-01 00:00:00.000000377,2011-12-21,-0.066,0.0,140.0,-1.0,negative,-0.025,"Yeah, this really is me, as my Mom will attest..."
4,1970-01-01 00:00:00.000000378,2011-12-21,-0.066,0.0,136.0,-1.0,negative,-0.5,Got called randomly by Kanye West today and re...


In [24]:
total_data = total_data.set_index('date')
total_data.head()

Unnamed: 0_level_0,Unnamed: 0,TSLA,earnings flag,tweet_length,encoded_sentiment,sentiment,polairty,tweet
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2011-12-01,1970-01-01 00:00:00.000000361,-0.028,0.0,118.0,0.0,neutral,0.0,I made the volume on the Model S http://t.co/...
2011-12-01,1970-01-01 00:00:00.000000362,-0.028,0.0,141.0,1.0,positive,0.65,Went to Iceland on Sat to ride bumper cars on ...
2011-12-21,1970-01-01 00:00:00.000000376,-0.066,0.0,62.0,1.0,positive,0.25,Yum! Even better than deep fried butter: http...
2011-12-21,1970-01-01 00:00:00.000000377,-0.066,0.0,140.0,-1.0,negative,-0.025,"Yeah, this really is me, as my Mom will attest..."
2011-12-21,1970-01-01 00:00:00.000000378,-0.066,0.0,136.0,-1.0,negative,-0.5,Got called randomly by Kanye West today and re...


In [25]:
total_data = total_data.drop(columns = ['Unnamed: 0'])

In [26]:
total_data.tail()

Unnamed: 0_level_0,TSLA,earnings flag,tweet_length,encoded_sentiment,sentiment,polairty,tweet
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-03-19,1.710022,1.0,19.0,0.0,neutral,0.0,ð¯
2021-03-19,1.710022,1.0,24.0,0.0,neutral,0.0,
2021-03-19,1.710022,1.0,106.0,0.0,neutral,0.0,"Floor is installed, elevator is operational, w..."
2021-03-19,1.710022,1.0,63.0,1.0,positive,0.28,The high bay is measured to fit!
2021-03-22,15.130005,1.0,86.0,0.0,neutral,0.0,Going well. Lot of work for an extra 20 secs o...


In [27]:
x_var_list = ['encoded_sentiment']

total_data[x_var_list].head()

Unnamed: 0_level_0,encoded_sentiment
date,Unnamed: 1_level_1
2011-12-01,0.0
2011-12-01,1.0
2011-12-21,1.0
2011-12-21,-1.0
2011-12-21,-1.0


In [28]:
y_var_list = ['earnings flag']

total_data[y_var_list].tail()

Unnamed: 0_level_0,earnings flag
date,Unnamed: 1_level_1
2021-03-19,1.0
2021-03-19,1.0
2021-03-19,1.0
2021-03-19,1.0
2021-03-22,1.0


In [29]:
total_data[x_var_list] = total_data[x_var_list].shift(1)
total_data[x_var_list].head()

Unnamed: 0_level_0,encoded_sentiment
date,Unnamed: 1_level_1
2011-12-01,
2011-12-01,0.0
2011-12-21,1.0
2011-12-21,1.0
2011-12-21,-1.0


In [30]:
elon_signals_df.dropna(subset=x_var_list, inplace=True)
elon_signals_df = elon_signals_df.replace([np.inf, -np.inf], np.nan)
elon_signals_df.tail()

Unnamed: 0,date,tweet_length,encoded_sentiment,sentiment,polarity,tweet
12334,2011-12-03,33,0,neutral,0.0,That was a total non sequitur btw
12335,2011-12-03,136,1,positive,0.45,"Great Voltaire quote, arguably better than Twa..."
12336,2011-12-01,118,0,neutral,0.0,I made the volume on the Model S http://t.co/...
12337,2011-12-01,141,1,positive,0.65,Went to Iceland on Sat to ride bumper cars on ...
12338,2010-06-04,92,0,neutral,0.0,"Please ignore prior tweets, as that was someon..."


In [35]:
X_test=elon_signals_df
X_test=X_test.drop(columns=['sentiment','tweet','polarity'])
X_test.head()

Unnamed: 0,date,tweet_length,encoded_sentiment
0,2021-03-22,86,0
1,2021-03-21,50,0
2,2021-03-21,18,1
3,2021-03-21,128,1
4,2021-03-21,170,1


In [32]:
results = tsla_results_df
results.set_index(pd.to_datetime(results['date'], infer_datetime_format=True), inplace=True)
results.drop(columns=['date'], inplace=True)
results.head()

Unnamed: 0_level_0,TSLA,earnings flag
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-06-29,,0
2010-06-30,-0.012,0
2010-07-01,-0.374,0
2010-07-02,-0.552,0
2010-07-06,-0.618,0


In [33]:
model = load('../group1-project2-main/random_forest_model.joblib')
predictions = model.predict(X_test)
predictions

ModuleNotFoundError: No module named 'sklearn.ensemble.forest'

In [None]:
results["Predicted Value"] = predictions
results

In [None]:
results['Predicted Value'].replace(0, -1, inplace=True)
results

In [None]:
(1 + (results['Return'] * results['Predicted Value'])).cumprod().plot()