In [5]:
#Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import pandas_ta as ta
import yfinance as yf

In [6]:
#Download asset data
df = yf.download(tickers="AAPL", start='2010-01-01', end='2023-12-31')

#calculatre technical indicators
df['MA'] = ta.ema(df['Close'], length=20)
df['rsi'] = ta.rsi(df['Close'], length=14)
df['coppock'] = ta.coppock(df['Close'], length=12)
df['returns'] = df['Close'].pct_change(-1) * 100 * -1
df['forecast_tommorrow'] = np.where(df['returns'] > 0,
                                   1,
                                   0) #1 is for UP and 0 is for down

df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,MA,rsi,coppock,returns,forecast_tommorrow
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2010-01-04 00:00:00-05:00,7.622500,7.660714,7.585000,7.643214,6.470741,493729600,,,,0.172595,1
2010-01-05 00:00:00-05:00,7.664286,7.699643,7.616071,7.656429,6.481929,601904800,,,,-1.616343,0
2010-01-06 00:00:00-05:00,7.656429,7.686786,7.526786,7.534643,6.378825,552160000,,,,-0.185213,0
2010-01-07 00:00:00-05:00,7.562500,7.571429,7.466071,7.520714,6.367031,477131200,,,,0.660442,1
2010-01-08 00:00:00-05:00,7.510714,7.571429,7.466429,7.570714,6.409362,447610800,,,,-0.889999,0
...,...,...,...,...,...,...,...,...,...,...,...
2023-12-22 00:00:00-05:00,195.179993,195.410004,192.970001,193.600006,193.353287,37122800,193.330404,54.609714,5.026601,-0.284902,0
2023-12-26 00:00:00-05:00,193.610001,193.889999,192.830002,193.050003,192.803986,28919300,193.303699,53.026883,3.980161,0.051768,1
2023-12-27 00:00:00-05:00,192.490005,193.500000,191.089996,193.149994,192.903839,48087700,193.289060,53.291938,3.311362,0.222134,1
2023-12-28 00:00:00-05:00,194.139999,194.660004,193.169998,193.580002,193.333298,34049900,193.316769,54.481470,2.499469,-0.545371,0


In [8]:
# Separate features and target variable
X = df.drop(columns=['forecast_tommorrow'])
y = df['forecast_tommorrow']

In [9]:
#split data into trains and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
#scale variables
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
model = XGBClassifier()

In [12]:
model.fit(X_train_scaled, y_train)

In [14]:
y_pred = model.predict(X_test_scaled)
y_pred

array([0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
       1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1,
       0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1,
       0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0,
       0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1,
       0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1,
       0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0,

In [15]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.9985815602836879
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       325
           1       1.00      1.00      1.00       380

    accuracy                           1.00       705
   macro avg       1.00      1.00      1.00       705
weighted avg       1.00      1.00      1.00       705



In [21]:
compare = pd.DataFrame(y_test)
compare['predictions'] = y_pred
compare.head(50)

Unnamed: 0_level_0,forecast_tommorrow,predictions
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-08-30 00:00:00-04:00,0,0
2017-06-30 00:00:00-04:00,0,0
2021-10-14 00:00:00-04:00,1,1
2023-08-18 00:00:00-04:00,1,1
2010-07-23 00:00:00-04:00,0,0
2022-09-27 00:00:00-04:00,0,0
2020-08-12 00:00:00-04:00,1,1
2011-07-26 00:00:00-04:00,0,0
2019-01-31 00:00:00-05:00,1,1
2022-09-13 00:00:00-04:00,1,1
