# SMA Bot vs SMA Bot With Reinforcement Learning
* Model 0: SMA bot
* Model 1: SMA + Volume
* Model 2: SMA + Day Of Week
* Model 3: SMA + Volume + Day Of Week
* Model 4: SMA (RL)


* Model 0 is the pure rules based bot. All others utilize Q-Learning

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import KBinsDiscretizer

In [None]:
# Download AAPL data
data = yf.download("AAPL", period="2y", interval="1d")
data['SMA10'] = data['Close'].rolling(window=10).mean()
data['Volume'] = data['Volume']
data['DayOfWeek'] = data.index.dayofweek
data.dropna(inplace=True)

In [None]:
# Reward function
def get_reward(action, price_today, price_tomorrow):
    if action == 1:
        return price_tomorrow - price_today
    elif action == -1:
        return price_today - price_tomorrow
    return 0

### Models

In [None]:
# Model 0: Rule-based SMA Crossover
def run_sma_bot(df):
    position = 0
    cash = 0
    trades = 0
    rewards = []

    for i in range(1, len(df) - 1):
        price_today = df.iloc[i]['Close']
        price_tomorrow = df.iloc[i + 1]['Close']
        sma = df.iloc[i]['SMA10']

        if price_today > sma and position == 0:
            position = price_today
            trades += 1
        elif price_today < sma and position != 0:
            cash += price_today - position
            rewards.append(price_today - position)
            position = 0
            trades += 1

    if position != 0:
        cash += df.iloc[-1]['Close'] - position
        rewards.append(df.iloc[-1]['Close'] - position)

    return {
        "Model": "Model0 (Rule SMA)",
        "Total Return ($)": round(cash, 2),
        "Avg Daily Reward": round(np.mean(rewards), 4),
        "# Trades": trades,
        "Cumulative Reward": round(np.sum(rewards), 2)
    }

# RL Models
def preprocess_features(df, model):
    features = ['SMA10']
    if model == 1:
        features.append('Volume')
    elif model == 2:
        features.append('DayOfWeek')
    elif model == 3:
        features += ['Volume', 'DayOfWeek']
    elif model == 4:
        features = ['SMA10']  # Only SMA for RL

    X = df[features]
    est = KBinsDiscretizer(n_bins=5, encode='ordinal', strategy='uniform')
    X_binned = est.fit_transform(X)
    return X_binned.astype(int)

def run_rl_model(df, model_num):
    states = preprocess_features(df, model_num)
    n_states = tuple([5] * states.shape[1])
    actions = [-1, 0, 1]  # Sell, Hold, Buy

    Q_table = np.zeros(n_states + (len(actions),))

    alpha = 0.1
    gamma = 0.9
    epsilon = 0.1
    cash = 0
    position = 0
    trades = 0
    rewards = []

    for t in range(len(states) - 1):
        state = tuple(states[t])
        next_state = tuple(states[t + 1])
        price_today = df.iloc[t]['Close']
        price_tomorrow = df.iloc[t + 1]['Close']

        if np.random.rand() < epsilon:
            action_idx = np.random.choice(len(actions))
        else:
            action_idx = np.argmax(Q_table[state])
        action = actions[action_idx]

        reward = get_reward(action, price_today, price_tomorrow)
        rewards.append(reward)

        Q_table[state + (action_idx,)] += alpha * (
            reward + gamma * np.max(Q_table[next_state]) - Q_table[state + (action_idx,)]
        )

        if action == 1 and position == 0:
            position = price_today
            trades += 1
        elif action == -1 and position != 0:
            cash += price_today - position
            position = 0
            trades += 1

    if position != 0:
        cash += df.iloc[-1]['Close'] - position

    return {
        "Model": f"Model{model_num} (RL)",
        "Total Return ($)": round(cash, 2),
        "Avg Daily Reward": round(np.mean(rewards), 4),
        "# Trades": trades,
        "Cumulative Reward": round(np.sum(rewards), 2)
    }

### Results

In [None]:
# Run all models
results = []
results.append(run_sma_bot(data))  # Model0
for model_num in range(1, 5):  # Model1 to Model4 (RL)
    results.append(run_rl_model(data, model_num))

# Summary
summary = pd.DataFrame(results)
print(summary)