In [248]:
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, classification_report

In [249]:
def fetch_stock_data(ticker, start_date="1990-01-02 00:00:00-05:00", end_date=None):
    stock = yf.Ticker(ticker)
    stock_data = stock.history(start=start_date, end=end_date)
    stock_data.drop(["Dividends", "Stock Splits"], axis=1, inplace=True)
    return stock_data

In [250]:
def create_target_column(data, horizon=1):
    data["Tomorrow"] = data["Close"].shift(-horizon)
    data["Target"] = (data["Tomorrow"] > data["Close"]).astype(int)
    return data

In [251]:
def train_random_forest_model(train_data, predictors, target):
    model = RandomForestClassifier(n_estimators=500, min_samples_split=50, random_state=1)
    model.fit(train_data[predictors], train_data[target])
    return model

In [252]:
def predict(train, test, predictors, model):
    model.fit(train[predictors], train["Target"])
    preds = model.predict_proba(test[predictors])[:, 1]
    preds[preds >= 0.6] = 1
    preds[preds < 0.6] = 0
    preds = pd.Series(preds, index=test.index, name="Predictions")
    combined = pd.concat([test["Target"], preds], axis=1)
    return combined

In [253]:
def generate_features(data, horizons):
    new_predictors = []

    for horizon in horizons:
        rolling_averages = data["Close"].rolling(horizon).mean()

        ratio_column = f"Close_Ratio_{horizon}"
        data[ratio_column] = data["Close"] / rolling_averages

        trend_column = f"Trend_{horizon}"
        data[trend_column] = data.shift(1).rolling(horizon).sum()["Target"]

        new_predictors += [ratio_column, trend_column]

    data = data.dropna(subset=data.columns[data.columns != "Target"])
    return data, new_predictors

In [254]:
def backtest(data, model, predictors, start=2500, step=250):
    all_predictions = []

    for i in range(start, data.shape[0], step):
        train = data.iloc[0:i].copy()
        test = data.iloc[i:(i + step)].copy()
        predictions = predict(train, test, predictors, model)
        all_predictions.append(predictions)

    return pd.concat(all_predictions)

In [255]:
def calculate_precision(predictions):
    precision = precision_score(predictions["Target"], predictions["Predictions"])
    print("Precision:", precision)

In [256]:
def display_value_counts(predictions):
    value_counts = predictions["Predictions"].value_counts()
    print("Value Counts of Predictions:")
    print(value_counts)

In [257]:
def display_actual_value_counts(data):
    actual_value_counts = data["Target"].value_counts()
    print("Actual Value Counts:")
    print(actual_value_counts)