In [None]:
import pandas as pd
from pathlib import Path
import matplotlib as plot

%store -r

In [None]:
%run PrepareData.ipynb

In [None]:
df = tsla_stock_values_df.copy()
df.head()

In [None]:
#Create daily_return column
df['daily_return']= df['close_value'].pct_change()

In [None]:
#Set up new column 'price_direction', where 0 if 'daily_return' < 0, 
#and 1 if 'daily_return' > 0. 
df['price_direction']= 0
df.loc[df['daily_return'] >= 0, 'price_direction'] =1

## Adding technical indicators to TSLA dataframe

In [None]:
import pandas_ta as ta

# Calculate MACD values using the pandas_ta library
df.ta.macd(close='close_value', fast=12, slow=26, signal=9, append=True)

# Calculate High-Low Percentage values using the pandas_ta library
df['HL_PCT'] = (df['high_value'] - df['low_value']) / df['close_value'] * 100.0

# Calculate RSI values using the pandas_ta library
df['RSI'] = df.ta.rsi(close= 'close_value', length= 14, scalar= 100)

# Calculate VPT values using the pandas_ta library
df['PVT'] = df.ta.pvt(close= 'close_value',volume= 'volume',drift= 1)

# Clean Nan
df= df.dropna()

# View result
display(df.head())
display(df.tail())

In [None]:
df=df.drop(['daily_return','volume','open_value','high_value','low_value'],
           axis=1)
df.head()

## Create train and test sets

In [None]:
# Separate the data into labels and features

# Separate the y variable, the labels
y= df['price_direction']

# Separate the X variable, the features
X= df.drop('price_direction', axis=1)

In [None]:
y.head()

In [None]:
X.head()

In [None]:
# Check the balance of our target values
y.value_counts()

In [None]:
from pandas.tseries.offsets import DateOffset
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

In [None]:
# Select the ending period for the training data with an offset of 3 months
training_end = X.index.min() + DateOffset(months=3)

# Display the training end date
print(training_end)

In [None]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
X_train.head()

In [None]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(hours=1):]
y_test = y.loc[training_end+DateOffset(hours=1):]

# Review the X_test DataFrame
X_test.head()

## Using ML model to predict price movement

In [None]:
from sklearn.preprocessing import StandardScaler

# Creating StandardScaler instance
scaler = StandardScaler()
# Fitting Standard Scaller
X_scaler = scaler.fit(X_train)
# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
X_train_scaled[:5]

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

# Create a random forest classifier
rf_model = RandomForestClassifier(n_estimators=500, random_state=1)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Making predictions using the testing data
y_pred = rf_model.predict(X_test_scaled)
print(y_pred)
print(y_test)

In [None]:
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
# Print the balanced_accuracy score of the model
balanced_accuracy_score(y_test, y_pred)

In [None]:
# Generate a confusion matrix for the model
confusion_matrix(y_test, y_pred)

In [None]:
# Print the classification report for the model
print(classification_report(y_test,y_pred))

## Evaluation of ML model 
* wording here


## Adding Sentimental Score and run ML again

In [None]:
%run SentimentAnalysis.ipynb

In [None]:
tsla_sentiments_df.head()

In [None]:
# Shift polarityScore down 1 row to make polarityScore from the previous day
tsla_sentiments_df= tsla_sentiments_df.shift(1)
tsla_sentiments_df.head()

In [None]:
#Concated tsla_sentiments_df in to the main df
df2= pd.concat([df,tsla_sentiments_df], axis=1)
df2= df2.dropna()

#Separate the y variable (the labels), and X variable (the features)
y= df2['price_direction']
X= df2.drop('price_direction', axis=1)

X.head()

In [None]:
# Rerun ML randomforest model on the new data set

# Create train and test sets
training_begin = X.index.min()
training_end = X.index.min() + DateOffset(months=3)

X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

X_test = X.loc[training_end:]
y_test = y.loc[training_end:]

# Creating StandardScaler instance
scaler = StandardScaler()
# Fitting Standard Scaller
X_scaler = scaler.fit(X_train)
# Scaling data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create a random forest classifier
rf_model = RandomForestClassifier(n_estimators=500, random_state=1)
rf_model = rf_model.fit(X_train_scaled, y_train)
y_pred = rf_model.predict(X_test_scaled)

In [None]:
# Print the classification report for the model
print(classification_report(y_test, y_pred))

### Analysis

* improve recall for 1.0 prediction

In [None]:
%store df2
%store df