<a href="https://colab.research.google.com/github/negbuna/models/blob/main/backtesting_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install yfinance --quiet

In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import seaborn as sns
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score

In [3]:
sns.set_theme(style="darkgrid")
plt.style.use("dark_background")

In [4]:
df = yf.download("AAPL", start="2022-01-01", end="2023-01-01")
prices = df["Close"]
df.head()
df.columns = ['_'.join(col).strip() if col[1] else col[0] for col in df.columns.values]

ticker = 'AAPL'
close = df[f'Close_{ticker}']
volume = df[f'Volume_{ticker}']

# 1. Bollinger bands (middle band)
df['Bollinger_Mid'] = close.rolling(window=20).mean()

# 2. RSI
delta = close.diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))
df['RSI'].fillna(0, inplace=True)

# 3. MACD
df['EMA_12'] = close.ewm(span=12, adjust=False).mean()
df['EMA_26'] = close.ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']
df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()

# 4. On-Balance Volume
df['OBV'] = np.where(
    close > close.shift(1),
    volume,
    np.where(close < close.shift(1), -volume, 0)
).cumsum()

df.dropna(inplace=True)

YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['RSI'].fillna(0, inplace=True)


In [5]:
def simple_moving_average_strategy(prices, short_window=20, long_window=50):
    short_ma = prices.rolling(short_window).mean()
    long_ma = prices.rolling(long_window).mean()
    signal = (short_ma > long_ma).astype(int)
    return signal

def run_backtest(prices, signals, initial_cash=10000):
    position = 0
    cash = initial_cash
    portfolio_value = []

    for price, signal in zip(prices.values, signals.values):
        if signal == 1 and cash >= price:
            position += 1
            transaction_fee = 0.001 * price
            cash -= price * position + transaction_fee
        elif signal == 0 and position > 0:
            transaction_fee = 0.001 * price
            cash += price * position
            position = 0
        total_value = cash + position * price
        portfolio_value.append(total_value)

    return portfolio_value

In [23]:
def stochastic_oscillator(df, period=14, ticker='AAPL'):
    high = df[(f'High_{ticker}')]
    low = df[(f'Low_{ticker}')]
    close = df[(f'Close_{ticker}')]

    high_roll = high.rolling(period).max()
    low_roll = low.rolling(period).min()

    # Calculate %K
    percent_k = ((close - low_roll) * 100 / (high_roll - low_roll))
    percent_d = percent_k.rolling(3).mean()

    # Add back into the df (under a new level maybe)
    df[('%K', ticker)] = percent_k
    df[('%D', ticker)] = percent_d

    return df[[('%K', ticker), ('%D', ticker)]]

In [7]:
def williams_r(df, ticker, period=14):
    high = df[f'High_{ticker}']
    low = df[f'Low_{ticker}']
    close = df[f'Close_{ticker}']

    highest_high = high.rolling(period).max()
    lowest_low = low.rolling(period).min()

    df[f'Williams_%R_{ticker}'] = -100 * (highest_high - close) / (highest_high - lowest_low)
    return df

In [8]:
def commodity_channel_index(df, ticker, period=20):
    high = df[f'High_{ticker}']
    low = df[f'Low_{ticker}']
    close = df[f'Close_{ticker}']

    tp = (high + low + close) / 3
    mean_tp = tp.rolling(period).mean()
    std_tp = tp.rolling(period).std()

    df[f'CCI_{ticker}'] = (tp - mean_tp) / (0.015 * std_tp)
    return df

In [9]:
def money_flow_index(df, ticker, period=14):
    high = df[f'High_{ticker}']
    low = df[f'Low_{ticker}']
    close = df[f'Close_{ticker}']
    volume = df[f'Volume_{ticker}']

    typical_price = (high + low + close) / 3
    raw_money_flow = typical_price * volume

    money_flow = np.where(typical_price > typical_price.shift(1), raw_money_flow, -raw_money_flow)
    df['Money_Flow'] = money_flow

    pos_flow = pd.Series(money_flow).where(money_flow > 0, 0).rolling(period).sum()
    neg_flow = pd.Series(money_flow).where(money_flow < 0, 0).rolling(period).sum().abs()

    money_flow_ratio = pos_flow / neg_flow
    df[f'MFI_{ticker}'] = 100 - (100 / (1 + money_flow_ratio))
    return df.drop(columns='Money_Flow')

In [10]:
def average_directional_index(df, ticker, period=14):
    high = df[f'High_{ticker}']
    low = df[f'Low_{ticker}']
    close = df[f'Close_{ticker}']

    up_move = high.diff()
    down_move = -low.diff()

    plus_dm = np.where((up_move > down_move) & (up_move > 0), up_move, 0)
    minus_dm = np.where((down_move > up_move) & (down_move > 0), down_move, 0)

    tr = pd.concat([high - low, (high - close.shift(1)).abs(), (low - close.shift(1)).abs()], axis=1).max(axis=1)

    atr = tr.rolling(period).mean()

    plus_di = 100 * pd.Series(plus_dm).ewm(alpha=1 / period, adjust=False).mean() / atr
    minus_di = 100 * pd.Series(minus_dm).ewm(alpha=1 / period, adjust=False).mean() / atr

    dx = 100 * (plus_di - minus_di).abs() / (plus_di + minus_di)
    adx = dx.ewm(alpha=1 / period, adjust=False).mean()

In [15]:
def generate_signals(df):
    df['Signal'] = 0.0  # Initialize signal column to 0 (hold)
    df.loc[((df['%K'] > df['%D']) & (df['%K'] < 20) & (df['%D'] < 20)), 'Signal'] = 1.0  # Buy signal
    df.loc[((df['%K'] < df['%D']) & (df['%K'] > 80) & (df['%D'] > 80)), 'Signal'] = -1.0 # Sell signal
    return df

# 2. Backtest the Strategy
def backtest_strategy(df, initial_cash=10000, ticker='AAPL'):
    position = 0
    cash = initial_cash
    portfolio_value = []

    for index, row in df.iterrows():
        price = row[f'Close_{ticker}']
        signal = row['Signal']

        if signal == 1 and cash >= price:  # Buy
            position += 1
            cash -= price
        elif signal == -1 and position > 0:  # Sell
            cash += price * position
            position = 0

        portfolio_value.append(cash + position * price)

    return portfolio_value

In [None]:
def calculate_sharpe_ratio(portfolio_values, risk_free_rate=0.0):
    daily_returns = pd.Series(portfolio_values).pct_change()
    excess_returns = daily_returns - risk_free_rate
    sharpe_ratio = np.sqrt(252) * excess_returns.mean() / excess_returns.std()
    return sharpe_ratio

portfolio_values = run_backtest(prices, signals)
sharpe_ratio = calculate_sharpe_ratio(portfolio_values)
print(f"Sharpe Ratio: {sharpe_ratio}")

In [24]:
df = stochastic_oscillator(df, ticker='AAPL')
generate_signals(df)
port_vals = backtest_strategy(df)
dates = df.index

plt.plot(dates, portfolio_values)
plt.title('Portfolio Value Over Time')
plt.xlabel('Date')
plt.ylabel('Portfolio Value')
plt.grid(True)  # Add a grid for better readability
plt.show()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Signal'] = 0.0  # Initialize signal column to 0 (hold)


KeyError: '%K'