In [52]:
import pandas as pd
import yfinance as yf

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

data_ticker = yf.download("COMM", period="2y")
data_ticker["Percent Change"] = ((data_ticker["Close"].shift(-5) - data_ticker["Close"])/data_ticker["Close"]) * 100
data_ticker = data_ticker.dropna(subset=["Percent Change"])
data_ticker = data_ticker.drop(columns=["Open", "Adj Close", "High", "Low", "Close"])

data_index = yf.download("^RUT", period="2y")
data_index["Percent Change"] = ((data_index["Close"].shift(-5) - data_index["Close"])/data_index["Close"]) * 100
data_index = data_index.dropna(subset=["Percent Change"])
data_index = data_index.drop(columns=["Open", "Adj Close", "High", "Low", "Close"])

n_days = 10
candlestick_data = pd.DataFrame()
for i in range(1, n_days + 1):
    candlestick_data[f"Day_{i} Ticker Percent Change"] = (data_ticker["Percent Change"].shift(i)).round(2)
    candlestick_data[f"Day_{i} Ticker Volume"] = data_ticker["Volume"].shift(i)
    candlestick_data[f"Day_{i} Index Percent Change"] = (data_index["Percent Change"].shift(i)).round(2)
    candlestick_data[f"Day_{i} Index Volume"] = data_index["Volume"].shift(i)

def categorize_movement(change):
    if abs(change) >= 15.0:
        return 1
    else:
        return 0
    
candlestick_data["Movement"] = data_ticker["Percent Change"].apply(categorize_movement)
candlestick_data = candlestick_data.dropna().reset_index(drop=True)

frequency = candlestick_data["Movement"].value_counts()
print(frequency)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Movement
0    384
1    104
Name: count, dtype: int64





In [53]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, accuracy_score, f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.utils import resample

test_size = 60
train_df = candlestick_data.iloc[:-test_size]
test_df = candlestick_data.iloc[-test_size:]

largest_class = train_df[train_df["Movement"] == 0]  # Majority class
other_classes = train_df[train_df["Movement"] != 0] 

downsample = resample(largest_class, replace=False, n_samples=len(other_classes), random_state=42)
train_df = pd.concat([downsample, other_classes])

train_df = train_df.sample(frac=1, random_state=42).reset_index(drop=True)

x_train = train_df.drop(columns=["Movement"])
y_train = train_df["Movement"]
x_test = test_df.drop(columns=["Movement"])
y_test = test_df["Movement"]

feature_number = x_train.shape[1]
print(feature_number)

model = RandomForestClassifier(random_state=42)
param_grid = {
    'n_estimators': [100, 250, 500, 1000],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5],
    'bootstrap': [True],
    'max_features': ['sqrt', 'log2', 0.5, 0.7, 0.9]
}
grid_search = GridSearchCV(estimator=model, param_grid=param_grid,
                           cv=5, n_jobs=-1, verbose=2)

grid_search.fit(x_train, y_train)
y_pred = grid_search.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

precision = precision_score(y_test, y_pred, average="macro")
print(f'Precision: {precision:.2f}')

f1 = f1_score(y_test, y_pred, average="macro")
print(f'F1: {f1:.2f}')

40
Fitting 5 folds for each of 160 candidates, totalling 800 fits
[CV] END bootstrap=True, max_depth=None, max_features=sqrt, min_samples_split=2, n_estimators=100; total time=   0.1s
[CV] END bootstrap=True, max_depth=None, max_features=sqrt, min_samples_split=2, n_estimators=100; total time=   0.1s
[CV] END bootstrap=True, max_depth=None, max_features=sqrt, min_samples_split=2, n_estimators=100; total time=   0.1s
[CV] END bootstrap=True, max_depth=None, max_features=sqrt, min_samples_split=2, n_estimators=100; total time=   0.1s
[CV] END bootstrap=True, max_depth=None, max_features=sqrt, min_samples_split=2, n_estimators=100; total time=   0.1s
[CV] END bootstrap=True, max_depth=None, max_features=sqrt, min_samples_split=2, n_estimators=250; total time=   0.2s
[CV] END bootstrap=True, max_depth=None, max_features=sqrt, min_samples_split=2, n_estimators=250; total time=   0.2s
[CV] END bootstrap=True, max_depth=None, max_features=sqrt, min_samples_split=2, n_estimators=250; total tim

In [54]:
from sklearn.metrics import confusion_matrix

y_pred = grid_search.predict(x_test)
cm = confusion_matrix(y_test, y_pred)

TP = {}
FP = {}
FN = {}
TN = {}

# Calculate the metrics for each class
for i in range(len(cm)):
    TP[i] = cm[i, i]
    FP[i] = sum(cm[:, i]) - cm[i, i]
    FN[i] = sum(cm[i, :]) - cm[i, i]
    TN[i] = cm.sum() - (FP[i] + FN[i] + TP[i])

# Output the results
for i in range(len(cm)):
    print(f"Class {i}:")
    print(f"TP: {TP[i]}, FP: {FP[i]}, TN: {TN[i]}, FN: {FN[i]}\n")

Class 0:
TP: 19, FP: 1, TN: 27, FN: 13

Class 1:
TP: 27, FP: 13, TN: 19, FN: 1



In [56]:
data_ticker = yf.download("COMM", period="2y")
data_ticker["Percent Change"] = ((data_ticker["Close"].shift(-5) - data_ticker["Close"])/data_ticker["Close"]) * 100
data_ticker = data_ticker.dropna(subset=["Percent Change"])
data_ticker = data_ticker.drop(columns=["Open", "Adj Close", "High", "Low", "Close"])

data_index = yf.download("^RUT", period="2y")
data_index["Percent Change"] = ((data_index["Close"].shift(-5) - data_index["Close"])/data_index["Close"]) * 100
data_index = data_index.dropna(subset=["Percent Change"])
data_index = data_index.drop(columns=["Open", "Adj Close", "High", "Low", "Close"])

n_days = 10
model_candlestick_data = pd.DataFrame()
for i in range(1, n_days + 1):
    model_candlestick_data[f"Day_{i} Ticker Percent Change"] = (data_ticker["Percent Change"].shift(i)).round(2)
    model_candlestick_data[f"Day_{i} Ticker Volume"] = data_ticker["Volume"].shift(i)
    model_candlestick_data[f"Day_{i} Index Percent Change"] = (data_index["Percent Change"].shift(i)).round(2)
    model_candlestick_data[f"Day_{i} Index Volume"] = data_index["Volume"].shift(i)

curr_data_series = model_candlestick_data.iloc[-1]
curr_data = curr_data_series.to_frame().T
print(grid_search.predict(curr_data))



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

[1]



