# Backtesting Moving Average Crossover Strategy

This notebook implements a simple back-testing framework for a moving average crossover strategy. The idea is to:

- Calculate two moving averages: a short-term SMA (S-day) and a long-term SMA (L-day).
- Generate a **buy signal** when the short-term SMA crosses above the long-term SMA.
- Generate a **sell signal** when the short-term SMA crosses below the long-term SMA.

We then back-test the strategy across many stocks (e.g., those in the S&P index) and across various combinations of SMA periods. For each parameter combination, we calculate:
  - The average return per trade (P&L)
  - The variance of the returns

Realistic transaction costs are incorporated (for example, a 0.1% cost on each side of a trade). Finally, we visualize the results using heatmaps.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

# Set the default plot style
plt.style.use('default')

In [None]:
# Load the fully cleaned historical market data
data = pd.read_csv('fully_cleaned_stock_data.csv')

# Preview the first few rows
data.head()

In [None]:
# Convert the 'Date' column to datetime (if not already) and sort the data
if not np.issubdtype(data['Date'].dtype, np.datetime64):
    data['Date'] = pd.to_datetime(data['Date'])

data.sort_values(by=['Ticker', 'Date'], inplace=True)
data.reset_index(drop=True, inplace=True)

data.head()

In [None]:
def backtest_strategy(df, short_window, long_window, tc=0.001):
    """
    Backtests a simple moving average crossover strategy for a single stock.
    
    Parameters:
        df (pd.DataFrame): DataFrame for one stock with columns such as ['Date', 'Open', 'High', 'Low', 'Close']
        short_window (int): Lookback period for the short-term SMA
        long_window (int): Lookback period for the long-term SMA
        tc (float): Transaction cost rate per trade side (default 0.1%)
        
    Returns:
        trades (list): List of returns for each completed trade
    """
    df = df.copy().reset_index(drop=True)
    
    # Compute moving averages (using min_periods=1 to get a value from the first day)
    df['SMA_short'] = df['Close'].rolling(window=short_window, min_periods=1).mean()
    df['SMA_long'] = df['Close'].rolling(window=long_window, min_periods=1).mean()
    
    # Generate a simple signal: 1 when short SMA is above long SMA, otherwise 0
    df['signal'] = 0
    df.loc[df['SMA_short'] > df['SMA_long'], 'signal'] = 1
    
    trades = []
    position = 0
    buy_price = 0
    
    # Loop over the DataFrame (from index 1 to len-2 to allow next day open access)
    for i in range(1, len(df) - 1):
        # Buy signal: when signal changes from 0 to 1
        if position == 0 and df.loc[i-1, 'signal'] == 0 and df.loc[i, 'signal'] == 1:
            # Buy at the next day's open price
            buy_price = df.loc[i+1, 'Open']
            position = 1
        # Sell signal: when signal changes from 1 to 0
        elif position == 1 and df.loc[i-1, 'signal'] == 1 and df.loc[i, 'signal'] == 0:
            # Sell at the next day's open price
            sell_price = df.loc[i+1, 'Open']
            # Calculate trade return (percentage change minus transaction costs on both sides)
            ret = (sell_price - buy_price) / buy_price - 2 * tc
            trades.append(ret)
            position = 0
    
    # If a position is still open at the end, exit at the last available close price
    if position == 1:
        sell_price = df.iloc[-1]['Close']
        ret = (sell_price - buy_price) / buy_price - 2 * tc
        trades.append(ret)
        position = 0
    
    return trades

# Test the backtest function on a sample stock
sample_ticker = data['Ticker'].unique()[0]
sample_df = data[data['Ticker'] == sample_ticker].copy()
sample_trades = backtest_strategy(sample_df, short_window=10, long_window=50)
print(f"Ticker: {sample_ticker}, Number of trades: {len(sample_trades)}")
if sample_trades:
    print(f"Average return per trade: {np.mean(sample_trades):.2%}")
    print(f"Return variance: {np.var(sample_trades):.6f}")

In [None]:
# Define ranges for the short-term and long-term moving averages
short_windows = [5, 10, 15, 20]
long_windows = [30, 50, 100, 200]

# Dictionary to store results for each combination
results = {}

tickers = data['Ticker'].unique()

for s in short_windows:
    for l in long_windows:
        if s < l:  # Consider only valid combinations where short_window < long_window
            all_trades = []
            
            # Process each ticker
            for ticker in tqdm(tickers, desc=f"Processing SMA {s}/{l}", leave=False):
                df_stock = data[data['Ticker'] == ticker].copy()
                trades = backtest_strategy(df_stock, short_window=s, long_window=l)
                if trades:  # Only include if there was at least one trade
                    all_trades.extend(trades)
            
            if all_trades:
                avg_return = np.mean(all_trades)
                var_return = np.var(all_trades)
            else:
                avg_return = np.nan
                var_return = np.nan
            
            results[(s, l)] = {
                'avg_return': avg_return, 
                'var_return': var_return, 
                'num_trades': len(all_trades)
            }

# Convert the results dictionary to a DataFrame
results_df = pd.DataFrame.from_dict(results, orient='index')
results_df.index = pd.MultiIndex.from_tuples(results_df.index, names=['Short_SMA', 'Long_SMA'])
results_df.sort_index(inplace=True)
results_df

In [None]:
# Visualize the results using heatmaps for average return and variance
avg_return_df = results_df['avg_return'].unstack()
var_return_df = results_df['var_return'].unstack()

fig, ax = plt.subplots(figsize=(8,6))
cax = ax.matshow(avg_return_df, cmap='viridis')
fig.colorbar(cax)
ax.set_xticks(range(len(avg_return_df.columns)))
ax.set_xticklabels(avg_return_df.columns)
ax.set_yticks(range(len(avg_return_df.index)))
ax.set_yticklabels(avg_return_df.index)
ax.set_xlabel('Long SMA')
ax.set_ylabel('Short SMA')
ax.set_title('Average Return per Trade')
plt.show()

fig, ax = plt.subplots(figsize=(8,6))
cax = ax.matshow(var_return_df, cmap='magma')
fig.colorbar(cax)
ax.set_xticks(range(len(var_return_df.columns)))
ax.set_xticklabels(var_return_df.columns)
ax.set_yticks(range(len(var_return_df.index)))
ax.set_yticklabels(var_return_df.index)
ax.set_xlabel('Long SMA')
ax.set_ylabel('Short SMA')
ax.set_title('Variance of Return per Trade')
plt.show()