### Stock Recommendation System with Reinforcement Learning - Baseline Models

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.stats import norm

#### Define Evaluation Metrics

In [2]:
# ------------------------
# Evaluation: Simulated Reward / Sharpe
# ------------------------
def evaluate_sharpe(returns):
    returns = np.array(returns)
    if len(returns) == 0 or np.std(returns) == 0:
        return 0.0
    return np.mean(returns) / np.std(returns)

def preference_alignment_score(recommended_stock, current_style, styles):
    """Score whether the recommended stock aligns with current user preference"""
    return 1 if recommended_stock in styles[current_style] else 0

#### Define Baseline Models

In [88]:
# ------------------------
# Static Recommendation (Top-N Most Bought Stocks)
# ------------------------
def static_recommendation(df, top_k=5):
    """Recommend top-K most frequently bought stocks to everyone"""
    top_stocks = df[df['Action'] == 'BUY']['Stock'].value_counts().head(top_k).index.tolist()
    return top_stocks

# ------------------------
# Feature-driven Model (Supervised Learning)
# ------------------------
def train_feature_model(df):
    """Train a feature-based model to predict BUY/SELL (1/0)"""
    df = df.dropna().copy()
    df['Target'] = (df['Action'] == 'BUY').astype(int)
    feature_cols = ["Shares", "Price", "Daily_Return", "Volatility", "Momentum", "MA_10", "Volatility_10", "Risk_Preference"]
    X = df[feature_cols]
    y = df['Target']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    return model, acc

#### Training and Evaluating Baseline Models

In [None]:
# Load data
df = pd.read_csv("data.csv")

##### Static Model

In [87]:
# -----------------------------
# Static Recommendation
# -----------------------------
df_eval = df[df["Date"] >= "2025-01-01"].copy()
all_stocks = df['Stock'].unique().tolist()
static_stocks = static_recommendation(df, top_k=5)

def evaluate_static(df_eval, static_stocks, styles, holding_days=5):
    returns = []
    alignments = []
    for _, row in df_eval.iterrows():
        chosen_stock = static_stocks[0]  # deterministic top stock recommendation
        future_date = row["Date"] + pd.Timedelta(days=holding_days)
        future_row = df[(df["Stock"] == chosen_stock) & (df["Date"] >= future_date)].head(1)
        if not future_row.empty:
            price_now = row["Price"]
            price_future = future_row["Price"].values[0]
            ret = (price_future - price_now) * row["Risk_Preference"]
            returns.append(ret)
            alignments.append(preference_alignment_score(chosen_stock, row["Style"], styles))
    return np.mean(returns), evaluate_sharpe(returns), np.mean(alignments)

print("\n[Static Recommendation Baseline]")
ret_s, sharpe_s, align_s = evaluate_static(df_eval, static_stocks, styles)
print(f"Average Return: {ret_s:.4f}, Sharpe: {sharpe_s:.4f}, Alignment: {align_s:.4f}")


[Static Recommendation Baseline]
Average Return: -80.4650, Sharpe: -0.3752, Alignment: 0.2505


##### Feature Model

In [89]:
# -----------------------------
# Feature Model Evaluation
# -----------------------------
model, acc = train_feature_model(df)

def evaluate_feature_model(model, df, styles, holding_days=5):
    df_eval = df[df["Date"] >= "2025-01-01"].dropna().copy().reset_index(drop=True)
    feature_cols = ["Shares", "Price", "Daily_Return", "Volatility", "Momentum", "MA_10", "Volatility_10", "Risk_Preference"]
    X = df_eval[feature_cols]
    preds = model.predict(X)
    returns = []
    alignments = []
    for i, row in df_eval.iterrows():
        if preds[i] == 1:  # predicted BUY
            future_date = row["Date"] + pd.Timedelta(days=holding_days)
            future_row = df[(df["Stock"] == row["Stock"]) & (df["Date"] >= future_date)].head(1)
            if not future_row.empty:
                price_now = row["Price"]
                price_future = future_row["Price"].values[0]
                ret = (price_future - price_now) * row["Risk_Preference"]
                returns.append(ret)
                alignments.append(preference_alignment_score(row["Stock"], row["Style"], styles))
    return np.mean(returns), evaluate_sharpe(returns), np.mean(alignments)

print("\n[Feature-driven Model Baselin]")
ret_f, sharpe_f, align_f = evaluate_feature_model(model, df, styles)
print(f"Average Return: {ret_f:.4f}, Sharpe: {sharpe_f:.4f}, Alignment: {align_f:.4f}")


[Feature-driven Model Baselin]
Average Return: 0.1167, Sharpe: 0.0075, Alignment: 1.0000
