In [2]:
import pandas as pd
import talib

# Load data
data = pd.read_csv('TSLA_ver3.csv')

# Calculate OBV feature
obv = talib.OBV(data['Close'], data['Volume'])
data['OBV'] = obv

# Calculate RSI feature
rsi = talib.RSI(data['Close'], timeperiod=14)
data['RSI'] = rsi

# Calculate SMA feature
ma = talib.SMA(data['Close'], timeperiod=20)
data['MA'] = ma

# Add Price Change column
data['Price Change'] = data['Close'].diff()

# Identify primary trend
data['Primary Trend'] = 'Neutral'
data.loc[data['Close'] > data['MA'], 'Primary Trend'] = 'Bullish'
data.loc[data['Close'] < data['MA'], 'Primary Trend'] = 'Bearish'

# Add Direction column based on RSI
data['Direction'] = 0
data.loc[(data['Primary Trend'] == 'Bullish') & (data['RSI'] > 50), 'Direction'] = 1
data.loc[(data['Primary Trend'] == 'Bearish') & (data['RSI'] < 50), 'Direction'] = -1



# Add Target column for trend change
data['Target'] = 0
for i in range(len(data) - 3):
    if data.loc[i, 'Direction'] == -1 and data.loc[i+1:i+3, 'Direction'].sum() == -3:
        data.loc[i+3, 'Target'] = -1  # Change to downtrend
    elif data.loc[i, 'Direction'] == 1 and data.loc[i+1:i+3, 'Direction'].sum() == 3:
        data.loc[i+3, 'Target'] = 1  # Change to uptrend

# Remove rows with NaN values
data = data.dropna()
# Save data
data.to_csv('TSLA_ver3pro.csv', index=False)

print(data.head(10))




          Date      Open      High       Low     Close  Adj Close    Volume  \
19  2010-07-27  1.394000  1.412000  1.350667  1.370000   1.370000   9295500   
20  2010-07-28  1.370000  1.393333  1.367333  1.381333   1.381333   7008000   
21  2010-07-29  1.384667  1.392000  1.333333  1.356667   1.356667   9240000   
22  2010-07-30  1.346667  1.362667  1.303333  1.329333   1.329333   6403500   
23  2010-08-02  1.366667  1.398000  1.355333  1.394667   1.394667  10771500   
24  2010-08-03  1.400000  1.463333  1.388000  1.463333   1.463333  18457500   
25  2010-08-04  1.463333  1.478667  1.390000  1.417333   1.417333  13695000   
26  2010-08-05  1.436000  1.436667  1.336667  1.363333   1.363333  11943000   
27  2010-08-06  1.340000  1.344000  1.301333  1.306000   1.306000  11128500   
28  2010-08-09  1.326667  1.332000  1.296667  1.306667   1.306667  12190500   

            OBV        RSI        MA  Price Change Primary Trend  Direction  \
19 -170926500.0  40.940803  1.324767     -0.026667 

In [3]:
import warnings
warnings.simplefilter('ignore')

In [4]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load data
data = pd.read_csv('TSLA_ver3pro.csv')

# Define features and target
X = data[['Close', 'OBV', 'RSI', 'MA', 'Direction', 'Primary Trend']]
y = data['Target']
X = pd.get_dummies(X, columns=['Primary Trend'])

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit Random Forest model
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

# Make predictions on testing set
y_pred = rf_model.predict(X_test)

# Get the features for the last day in your dataset
last_day_features = X.iloc[-1].values.reshape(1, -1)

predicted_target = rf_model.predict(last_day_features)[0]
# Print the predicted target
print('The predicted target for tomorrow is:', predicted_target)

X_test['Prediction'] = y_pred
print(accuracy_score(y_test, y_pred))

The predicted target for tomorrow is: 0
0.7937984496124031
