## Indicator Based Strategy Model

In [1]:
import sys
import os
sys.path.append(os.path.abspath('../util'))

from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score

tqdm.pandas()

In [2]:
# Load Processed Data
prices_df = pd.read_csv('../data/prices.csv', parse_dates=['date'])
price_matrix = prices_df.pivot(index='date', columns='ticker', values='close').sort_index()

In [3]:
# Generates technical indicators from a stock’s historical prices]
def create_features(prices):
    df = pd.DataFrame(index=prices.index)
    df['price'] = prices
    df['return_5d'] = prices.pct_change(5)
    df['sma_20'] = prices.rolling(20).mean()
    df['rsi_14'] = 100 - (100 / (1 + prices.pct_change().rolling(14).mean() / prices.pct_change().rolling(14).std()))
    df['macd'] = prices.ewm(span=12).mean() - prices.ewm(span=26).mean()
    df['volatility_10'] = prices.pct_change().rolling(10).std()
    df['price_sma_ratio'] = prices / df['sma_20']
    return df.dropna()

In [5]:
# Creates classification labels based on future price movement
# - Computes the return over a forward-looking window (lookahead)
# - Assigns label 1 if the return exceeds the given threshold, -1 if the return is below 
#   the -threshold, else 0

def create_labels(prices, threshold=0.001, lookahead=5):
    future_return = prices.shift(-lookahead) / prices - 1
    labels = pd.Series(0, index=prices.index)
    labels[future_return > threshold] = 1
    labels[future_return < -threshold] = -1
    return labels

## Model Training and Signal Generation

Trains a separate binary classifier for each stock using only price-based features. Data is split into training (pre-2018) and testing (2018 onward). A RandomForestClassifier is trained to predict whether the stock will go up (label 1) or not (label 0).

Position Assignment:

- Long (1000) if predicted probability for class 1 > 0.3
- Short (-1000) if probability for class 0 > 0.6
- Otherwise, hold (0)

In [6]:
positions_df = pd.DataFrame()
metrics = {}

for symbol in price_matrix.columns:
    prices = price_matrix[symbol].dropna()
    features = create_features(prices)
    labels = create_labels(prices).reindex(features.index)
    data = features.copy()
    data['label'] = labels
    data = data[data['label'].isin([-1, 1])].dropna()

    train = data[(data.index < '2018-06-01')]
    test = data[data.index >= '2018-06-01']
    X_train, y_train = train.drop('label', axis=1), train['label']
    X_test, y_test = test.drop('label', axis=1), test['label']

    model = RandomForestClassifier(n_estimators=100, class_weight='balanced', max_depth=7, random_state=42)
    model.fit(X_train, y_train)

    probs = model.predict_proba(X_test)
    predictions = []
    for row in probs:
        prob_map = dict(zip(model.classes_, row))
        if prob_map.get(1, 0) > 0.4:
            predictions.append(1000)
        elif prob_map.get(-1, 0) > 0.4:
            predictions.append(-1000)
        else:
            predictions.append(0)

    positions_df[symbol] = pd.Series(predictions, index=X_test.index)
    
    y_pred = pd.Series(predictions, index=X_test.index).map({-1000: -1, 0: 0, 1000: 1})
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro', zero_division=0)
    metrics[symbol] = {'accuracy': acc, 'f1_score': f1}

    importances = model.feature_importances_
    feature_names = X_train.columns
    sorted_idx = np.argsort(importances)[::-1]

metrics_df = pd.DataFrame(metrics).T
print("\nModel Performance Summary:")
print(metrics_df.round(3))


Model Performance Summary:
       accuracy  f1_score
AAPL      0.612     0.440
AMD       0.581     0.378
AMZN      0.545     0.353
ASML      0.536     0.517
CSCO      0.512     0.504
GOOGL     0.457     0.457
INTC      0.588     0.461
MSFT      0.634     0.480
MU        0.545     0.437
NVDA      0.568     0.411


In [7]:
positions_df = positions_df.reindex(index=price_matrix.index, columns=price_matrix.columns)
positions_df = positions_df.ffill().fillna(0).astype(int)
trades_df = positions_df.diff().fillna(positions_df).astype(int)
trades_df.to_csv('../data/indicator_strategy.csv', index=True)