<a href="https://colab.research.google.com/github/kridtapon/Indicator-Optimization-and-Selection/blob/main/Indicator_Optimization_and_Selection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
pip install backtesting

Collecting backtesting
  Downloading Backtesting-0.5.0-py3-none-any.whl.metadata (6.3 kB)
Downloading Backtesting-0.5.0-py3-none-any.whl (178 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.5/178.5 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: backtesting
Successfully installed backtesting-0.5.0


In [10]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from backtesting import Backtest, Strategy

# Define the stock symbol and time period
symbol = 'META'
start_date = '2019-01-01'
end_date = '2025-01-01'

# Download the data
data = yf.download(symbol, start=start_date, end=end_date)
data.columns = ['Close', 'High', 'Low', 'Open', 'Volume']
data.ffill(inplace=True)

# === Manually calculate RSI ===
def calculate_rsi(close, period=14):
    close = pd.Series(close)  # Ensure compatibility
    delta = close.diff()
    gain = delta.where(delta > 0, 0).rolling(window=period).mean()
    loss = -delta.where(delta < 0, 0).rolling(window=period).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

# === Manually calculate MACD ===
def calculate_macd(close, fast_period=12, slow_period=26, signal_period=9):
    close = pd.Series(close)  # Ensure compatibility
    ema_fast = close.ewm(span=fast_period, adjust=False).mean()
    ema_slow = close.ewm(span=slow_period, adjust=False).mean()
    macd = ema_fast - ema_slow
    macd_signal = macd.ewm(span=signal_period, adjust=False).mean()
    return macd, macd_signal

# === Manually calculate Bollinger Bands ===
def calculate_bollinger_bands(close, period=20, num_std_dev=2):
    close = pd.Series(close)  # Ensure compatibility
    sma = close.rolling(window=period).mean()
    std_dev = close.rolling(window=period).std()
    upper_band = sma + (num_std_dev * std_dev)
    lower_band = sma - (num_std_dev * std_dev)
    return upper_band, sma, lower_band

# Generate indicators
data['RSI_14'] = calculate_rsi(data['Close'], period=14)
data['MACD'], data['MACD_signal'] = calculate_macd(data['Close'])
data['BB_upper'], data['BB_middle'], data['BB_lower'] = calculate_bollinger_bands(data['Close'])

# Create a target column for binary classification
data['Target'] = (data['Close'].shift(-5) > data['Close']).astype(int)

# Prepare the data
features = ['RSI_14', 'MACD', 'MACD_signal', 'BB_upper', 'BB_middle', 'BB_lower']
X = data[features].dropna()
y = data['Target'][X.index]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate and display feature importance
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))

importances = pd.DataFrame({'Feature': features, 'Importance': model.feature_importances_})
print(importances.sort_values(by='Importance', ascending=False))

# === Grid search to optimize RSI period ===
best_period = None
best_accuracy = 0
for period in range(5, 51):  # Iterate over RSI periods from 5 to 50
    data['RSI'] = calculate_rsi(data['Close'], period=period)
    X = data[['RSI']].dropna()
    y = data['Target'][X.index]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    accuracy = accuracy_score(y_test, model.predict(X_test))

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_period = period

print("Best RSI period:", best_period)

# === Backtest the strategy ===
class OptimizedStrategy(Strategy):
    def init(self):
        close = self.data.Close
        self.rsi = self.I(lambda x: calculate_rsi(pd.Series(x), period=best_period), close)

    def next(self):
        if self.rsi[-1] < 30:
            self.buy()
        elif self.rsi[-1] > 70:
            self.sell()

# Backtest
bt = Backtest(data, OptimizedStrategy, cash=10000, commission=0.002)
stats = bt.run()
bt.plot()
print(stats)


[*********************100%***********************]  1 of 1 completed


Accuracy: 0.8093645484949833
       Feature  Importance
4    BB_middle    0.182762
3     BB_upper    0.176272
2  MACD_signal    0.168717
1         MACD    0.166931
5     BB_lower    0.160757
0       RSI_14    0.144560
Best RSI period: 37


Start                     2019-01-02 00:00:00
End                       2024-12-31 00:00:00
Duration                   2190 days 00:00:00
Exposure Time [%]                    36.82119
Equity Final [$]                          0.0
Equity Peak [$]                   12358.25007
Return [%]                             -100.0
Buy & Hold Return [%]               333.18595
Return (Ann.) [%]                         0.0
Volatility (Ann.) [%]               227.32156
CAGR [%]                               -100.0
Sharpe Ratio                              0.0
Sortino Ratio                             0.0
Calmar Ratio                              0.0
Max. Drawdown [%]                      -100.0
Avg. Drawdown [%]                   -28.16815
Max. Drawdown Duration     1751 days 00:00:00
Avg. Drawdown Duration      412 days 00:00:00
# Trades                                    1
Win Rate [%]                              0.0
Best Trade [%]                      -96.82924
Worst Trade [%]                   