In [3]:
import pandas as pd
import numpy as np
from openbb import obb
from swifter import swifter
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier

from sklearn import metrics
from sklearn.metrics import classification_report

def encoding(n):
    if n > 0:
        return 1
    else:
        return 0

def get_label(data):
    data['Delta'] = data['close'] - data['open']
    data['up_down'] = data['Delta'].swifter.apply(lambda d: encoding(d))
    return data

def get_sequence_data(data_up_down,lookback):
    shape = (data_up_down.shape[0] - lookback + 1, lookback)
    strides = data_up_down.strides + (data_up_down.strides[-1],)
    return np.lib.stride_tricks.as_strided(data_up_down,shape=shape,strides=strides)

def get_training_data(symbol,start_date,end_date,monthly_bool=True,lookback=10):
    data = obb.equity.price.historical(
        symbol=symbol,
        start_date=start_date,
        end_date=end_date
    ).to_df()
    data = get_label(data)
    data_up_down = data['up_down'].to_numpy()
    training_data = get_sequence_data(data_up_down,lookback)
    return training_data

data = get_training_data(symbol="AAPL",start_date="2023-01-01",end_date="2025-05-11",monthly_bool=False, lookback=10)
#pd.DataFrame(data).to_csv("data/data_aapl.csv")
pd.DataFrame(data).to_csv("data/data_aapl.csv")
data = pd.read_csv("data/data_aapl.csv")
X=data.iloc[:,:-1]
Y=data.iloc[:,-1]
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.33,random_state=4284,stratify=Y)

model_lr=LogisticRegression(random_state=42)
model_lr.fit(X_train,y_train)
y_pred=model_lr.predict(X_test)

#model_xgb = xgb.XGBClassifier(random_state=42)
#model_xgb.fit(X_train,y_train)
#y_pred = model_xgb.predict(X_test)

#model_rf = RandomForestClassifier(random_state = 42)
#model_rf.fit(X_train,y_train)
#y_pred = model_rf.predict(X_test)


cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
cnf_matrix

target_names = ['up', 'down']
print(classification_report(y_test,y_pred,target_names=target_names))


Pandas Apply: 100%|██████████| 590/590 [00:00<00:00, 605194.27it/s]

              precision    recall  f1-score   support

          up       0.36      0.22      0.27        83
        down       0.54      0.71      0.61       109

    accuracy                           0.49       192
   macro avg       0.45      0.46      0.44       192
weighted avg       0.46      0.49      0.47       192




