In [2]:
import pandas as pd
import numpy as np
from math import sqrt
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from hmmlearn.hmm import GaussianHMM
import plotly.graph_objects as go
from openalgo import api

# =========================
# CONFIG
# =========================
SYMBOL = "IRCON"
EXCHANGE = "NSE"

TRAIN_INTERVAL = "D"
TEST_INTERVAL = "15m"

TRAIN_START = "2025-11-01"
TRAIN_END   = "2025-12-31"

TEST_START  = "2026-01-01"
TEST_END    = "2026-01-08"

INITIAL_CAPITAL = 1_000_000
RISK_PER_TRADE = 0.005   # 0.5%
STOP_LOSS_PCT = 0.005
TAKE_PROFIT_PCT = 0.028

# News windows to avoid trading (start, end)
NEWS_WINDOWS = [
    ("2026-01-05 10:00:00", "2026-01-05 11:00:00"),  # Example news window
]

OPENALGO_KEY = "02b4c23ab2a45a49fa11870d21833e57ec649d00435b78e0634a0958d2b41137"
OPENALGO_HOST = "http://127.0.0.1:5000"

LIVE_TRADING = False

# =========================
# CONNECT
# =========================
client = api(api_key=OPENALGO_KEY, host=OPENALGO_HOST)

# ========================================================================================
# FINBERT SENTIMENT INTEGRATION 
# ========================================================================================

# ========================================================================================
# NEWS API CONFIGURATION
# ========================================================================================
# NEWS_KEYWORDS = ["IRCON", "Indian Railway Construction", "rail infrastructure"]
# NEWS_LOOKBACK_DAYS = 7  # How many days back to fetch news
# ========================================================================================
# ========================================================================================
# ENHANCED RISK MANAGEMENT BASED ON NEWS SENTIMENT
# ========================================================================================
# TO ADD: Dynamic risk adjustment based on sentiment
# POSITIVE_NEWS_RISK_MULTIPLIER = 1.5  # Increase position size by 50% on positive news
# NEGATIVE_NEWS_STOP_LOSS_MULTIPLIER = 0.5  # Tighter stop loss (50% of normal) on negative news
# NEGATIVE_NEWS_POSITION_MULTIPLIER = 0.3  # Reduce position size to 30% on negative news
# ========================================================================================

# =========================
# CONFIG
# =========================

# =========================
# INDICATORS
# =========================
def compute_atr(df, n=14):
    hl = df["high"] - df["low"]
    hc = abs(df["high"] - df["close"].shift())
    lc = abs(df["low"] - df["close"].shift())
    tr = pd.concat([hl, hc, lc], axis=1).max(axis=1)
    return tr.rolling(n).mean()

def compute_rsi(series, n=14):
    delta = series.diff()
    gain = delta.clip(lower=0).rolling(n).mean()
    loss = (-delta.clip(upper=0)).rolling(n).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

# ========================================================================================
# FINBERT SENTIMENT ANALYZER CLASS
# ========================================================================================
# 
# class FinBERTSentimentAnalyzer:
#     """
#     FinBERT sentiment analyzer for financial news
#     Returns: 'positive', 'negative', or 'neutral' with confidence score
#     """
#     def __init__(self):
#         self.tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
#         self.model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
#         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#         self.model.to(self.device)
#         self.model.eval()
#     
#     def analyze_sentiment(self, text):
#         """
#         Analyze sentiment of financial text
#         Returns: (sentiment_label, confidence_score, sentiment_numeric)
#         sentiment_numeric: -1 (negative), 0 (neutral), 1 (positive)
#         """
#         inputs = self.tokenizer(text, return_tensors="pt", padding=True, 
#                                truncation=True, max_length=512).to(self.device)
#         
#         with torch.no_grad():
#             outputs = self.model(**inputs)
#             predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
#         
#         # FinBERT outputs: [positive, negative, neutral]
#         positive_score = predictions[0][0].item()
#         negative_score = predictions[0][1].item()
#         neutral_score = predictions[0][2].item()
#         
#         # Determine dominant sentiment
#         max_score = max(positive_score, negative_score, neutral_score)
#         
#         if max_score == positive_score:
#             sentiment_label = "positive"
#             sentiment_numeric = 1
#         elif max_score == negative_score:
#             sentiment_label = "negative"
#             sentiment_numeric = -1
#         else:
#             sentiment_label = "neutral"
#             sentiment_numeric = 0
#         
#         return sentiment_label, max_score, sentiment_numeric
# ========================================================================================

# ========================================================================================
# NEWS FETCHING AND AGGREGATION
# ========================================================================================
# 
# def fetch_news_sentiment(symbol, start_date, end_date, api_key):
#     """
#     Fetch news for a symbol and compute aggregated sentiment
#     Returns: DataFrame with date and sentiment scores
#     """
#     newsapi = NewsApiClient(api_key=api_key)
#     analyzer = FinBERTSentimentAnalyzer()
#     
#     # Convert dates
#     start = pd.to_datetime(start_date)
#     end = pd.to_datetime(end_date)
#     
#     all_news_data = []
#     
#     # Fetch news in chunks (API limitation: 1 month max)
#     current_date = start
#     while current_date <= end:
#         next_date = min(current_date + timedelta(days=30), end)
#         
#         try:
#             # Fetch news articles
#             articles = newsapi.get_everything(
#                 q=' OR '.join(NEWS_KEYWORDS),
#                 from_param=current_date.strftime('%Y-%m-%d'),
#                 to=next_date.strftime('%Y-%m-%d'),
#                 language='en',
#                 sort_by='relevancy',
#                 page_size=100
#             )
#             
#             # Process each article
#             for article in articles.get('articles', []):
#                 published_date = pd.to_datetime(article['publishedAt']).date()
#                 title = article.get('title', '')
#                 description = article.get('description', '')
#                 content = article.get('content', '')
#                 
#                 # Combine text for analysis
#                 full_text = f"{title}. {description}. {content}"
#                 
#                 # Analyze sentiment
#                 sentiment_label, confidence, sentiment_numeric = analyzer.analyze_sentiment(full_text)
#                 
#                 all_news_data.append({
#                     'date': published_date,
#                     'title': title,
#                     'sentiment_label': sentiment_label,
#                     'sentiment_score': sentiment_numeric,
#                     'confidence': confidence,
#                     'weighted_score': sentiment_numeric * confidence
#                 })
#         
#         except Exception as e:
#             print(f"Error fetching news for {current_date}: {e}")
#         
#         current_date = next_date + timedelta(days=1)
#     
#     # Convert to DataFrame
#     news_df = pd.DataFrame(all_news_data)
#     
#     if len(news_df) == 0:
#         return pd.DataFrame(columns=['date', 'daily_sentiment', 'news_count', 'news_state'])
#     
#     # Aggregate by date
#     daily_sentiment = news_df.groupby('date').agg({
#         'weighted_score': 'mean',  # Average weighted sentiment
#         'sentiment_score': 'count'  # Number of articles
#     }).rename(columns={
#         'weighted_score': 'daily_sentiment',
#         'sentiment_score': 'news_count'
#     })
#     
#     # Create news_state: 1 (positive), -1 (negative), 0 (neutral/no news)
#     daily_sentiment['news_state'] = 0
#     daily_sentiment.loc[daily_sentiment['daily_sentiment'] > 0.3, 'news_state'] = 1
#     daily_sentiment.loc[daily_sentiment['daily_sentiment'] < -0.3, 'news_state'] = -1
#     
#     return daily_sentiment.reset_index()
# ========================================================================================


# =========================
# FEATURES
# =========================
def create_features(df):
    df = df.copy()
    df["ret1"] = df["close"].pct_change()
    df["ret3"] = df["close"].pct_change(3)
    df["ema50"] = df["close"].ewm(span=50).mean()
    df["ema_dist"] = (df["close"] - df["ema50"]) / df["ema50"]
    df["rsi"] = compute_rsi(df["close"])
    df["atr"] = compute_atr(df)
    df["atr_pct"] = df["atr"] / df["close"]
    df["vol_chg"] = df["volume"].pct_change()
    df["y"] = np.where(df["close"].shift(-1) > df["close"], 1, 0)
    return df.dropna()

 # ============================================================================
    # TO ADD: News sentiment feature integration
    # ============================================================================
    # # Initialize news_state with neutral (0)
    # df["news_state"] = 0
    # 
    # # Fetch news sentiment (this should be done once and cached)
    # # news_sentiment_df = fetch_news_sentiment(
    # #     SYMBOL, 
    # #     TRAIN_START, 
    # #     TEST_END, 
    # #     NEWS_API_KEY
    # # )
    # 
    # # Merge news sentiment with price data
    # # df['date_only'] = df.index.date
    # # df = df.merge(
    # #     news_sentiment_df[['date', 'news_state', 'daily_sentiment', 'news_count']], 
    # #     left_on='date_only', 
    # #     right_on='date', 
    # #     how='left'
    # # )
    # # df['news_state'].fillna(0, inplace=True)  # Fill days with no news as neutral
    # # df['daily_sentiment'].fillna(0, inplace=True)
    # # df['news_count'].fillna(0, inplace=True)
    # # df.drop(['date_only', 'date'], axis=1, inplace=True)
    # ============================================================================
    

# =========================
# REGIME DETECTION
# =========================
def detect_regime(df):
    if len(df) < 20:
        df["regime"] = 0
        return df
    X = np.column_stack([df["ret1"], df["atr_pct"]])
    hmm = GaussianHMM(n_components=3, n_iter=200)
    hmm.fit(X)
    df["regime"] = hmm.predict(X)
    return df

# =========================
# NEWS FILTER
# =========================
def filter_news(df):
    """Return a boolean mask: True if NO news, False if news."""
    mask = pd.Series(True, index=df.index)
    # Ensure index is datetime
    if not isinstance(df.index, pd.DatetimeIndex):
         df.index = pd.to_datetime(df.index)
         
    for start_str, end_str in NEWS_WINDOWS:
        start = pd.Timestamp(start_str)
        end = pd.Timestamp(end_str)
        # Mark range as False (news present)
        mask.loc[start:end] = False
    return mask

# =========================
# TREND BAR MOMENTUM SIGNAL
# =========================
def trend_bar_signals(df):
    df["prev_high"] = df["high"].shift(1)
    df["prev_low"] = df["low"].shift(1)
    sig = np.zeros(len(df))
    sig[df["close"] > df["prev_high"]] = 1
    sig[df["close"] < df["prev_low"]] = -1
    return sig

# =========================
# BACKTEST ENGINE (Hybrid ML + Trend Bar)
# =========================

 # ============================================================================
    # TO MODIFY: Enhanced backtest with news-based risk management
    # ============================================================================
    # The backtest engine should be modified to adjust risk based on news_state:
    # 
    # 1. On POSITIVE news (news_state == 1):
    #    - risk_per_trade *= POSITIVE_NEWS_RISK_MULTIPLIER (1.5)
    #    - Normal stop loss and take profit
    #    - More aggressive position sizing
    # 
    # 2. On NEGATIVE news (news_state == -1):
    #    - risk_per_trade *= NEGATIVE_NEWS_POSITION_MULTIPLIER (0.3)
    #    - stop_loss_pct *= NEGATIVE_NEWS_STOP_LOSS_MULTIPLIER (0.5 = tighter)
    #    - More conservative approach
    # 
    # 3. On NEUTRAL news (news_state == 0):
    #    - Normal risk parameters
    # 
    # Example modification in the loop:
    # for i in range(1, len(df) - 1):
    #     row = df.iloc[i]
    #     news_state = row.get('news_state', 0)
    #     
    #     # Adjust risk based on news
    #     current_risk = risk_per_trade
    #     current_sl = stop_loss_pct
    #     
    #     if news_state == 1:  # Positive news
    #         current_risk *= POSITIVE_NEWS_RISK_MULTIPLIER
    #     elif news_state == -1:  # Negative news
    #         current_risk *= NEGATIVE_NEWS_POSITION_MULTIPLIER
    #         current_sl *= NEGATIVE_NEWS_STOP_LOSS_MULTIPLIER
    #     
    #     # Rest of backtest logic with current_risk and current_sl
    # ============================================================================

    
def backtest_hybrid(df, signals, risk_per_trade, stop_loss_pct, take_profit_pct):
    capital = INITIAL_CAPITAL
    position = 0
    entry_price = 0
    entry_time = None
    trade_log = []
    equity_curve = []
    pending_short = False

    for i in range(1, len(df) - 1):
        row = df.iloc[i]
        next_row = df.iloc[i + 1]
        price = row["close"]
        time = df.index[i]

        equity_curve.append(capital + (position * (price - entry_price) if position != 0 else 0))

        # Exit long
        if position > 0:
            sl = entry_price * (1 - stop_loss_pct)
            tp = entry_price * (1 + take_profit_pct)
            if price <= sl or price >= tp or signals[i] == -1:
                pnl = position * (price - entry_price)
                capital += pnl
                trade_log.append([entry_time, time, "LONG(CNC)", entry_price, price, pnl])
                position = 0

        # Execute short next day (MIS)
        if pending_short:
            entry_price_s = next_row["open"]
            entry_time_s = df.index[i + 1]
            risk_amount = capital * risk_per_trade
            qty = risk_amount / (entry_price_s * stop_loss_pct)
            exit_price_s = next_row["close"]
            exit_time_s = df.index[i + 1]
            pnl = -qty * (exit_price_s - entry_price_s)
            capital += pnl
            trade_log.append([entry_time_s, exit_time_s, "SHORT(MIS)", entry_price_s, exit_price_s, pnl])
            pending_short = False

        # Entry logic
        if position == 0:
            if signals[i] == 1:
                risk_amount = capital * risk_per_trade
                qty = risk_amount / (price * stop_loss_pct)
                position = qty
                entry_price = price
                entry_time = time
            elif signals[i] == -1:
                pending_short = True

    return capital, equity_curve, trade_log

# =========================
# DATA FETCH
# =========================
def get_data(start, end, interval):
    df = client.history(symbol=SYMBOL, exchange=EXCHANGE,
                        interval=interval, start_date=start, end_date=end)
    df.index = pd.to_datetime(df.index).tz_localize(None)
    return df

# =========================
# ML MODEL SELECTION
# =========================
MODELS = {
    "logistic": LogisticRegression(max_iter=500),
    "xgb": XGBClassifier(n_estimators=150, max_depth=3, verbosity=0)
}

def select_model(train_df, features):
    best_model = None
    best_score = -999
    for model in MODELS.values():
        model.fit(train_df[features], train_df["y"])
        preds = model.predict(train_df[features])
        sigs = np.where(preds == 1, 1, -1)
        
        _, equity_curve, _ = backtest_hybrid(
            train_df, sigs, RISK_PER_TRADE, STOP_LOSS_PCT, TAKE_PROFIT_PCT
        )
        returns = pd.Series(equity_curve).pct_change().dropna()
        sharpe = sqrt(252) * returns.mean() / returns.std() if returns.std() != 0 else 0

        if sharpe > best_score:
            best_score = sharpe
            best_model = model
    return best_model

# =========================
# MAIN
# =========================
def main():
    print("Fetching training data...")
    df_train = get_data(TRAIN_START, TRAIN_END, TRAIN_INTERVAL)
    print("Fetching test data...")
    df_test = get_data(TEST_START, TEST_END, TEST_INTERVAL)

    full_raw = pd.concat([df_train, df_test], axis=0)
    full_raw = full_raw[~full_raw.index.duplicated(keep="first")]
    full_raw = full_raw.sort_index()

    full_feat = create_features(full_raw)
    full_feat = detect_regime(full_feat)

    FEATURES = ["ret1", "ret3", "ema_dist", "rsi", "atr_pct", "vol_chg", "regime"]

    print("Selecting best ML model...")
    model = select_model(full_feat.loc[TRAIN_START:TRAIN_END], FEATURES)

    # ML signals
    full_feat["pred"] = model.predict(full_feat[FEATURES])
    full_feat["ml_signal"] = np.where(full_feat["pred"] == 1, 1, -1)

    # Trend Bar signals
    full_feat["tb_signal"] = trend_bar_signals(full_feat)

    # Hybrid: ML AND Trend Bar
    full_feat["hybrid_signal"] = np.where(
        (full_feat["ml_signal"] == 1) & (full_feat["tb_signal"] == 1), 1,
        np.where((full_feat["ml_signal"] == -1) & (full_feat["tb_signal"] == -1), -1, 0)
    )
    
    # Apply News Filter
    no_news_mask = filter_news(full_feat)
    full_feat["hybrid_signal"] = np.where(no_news_mask, full_feat["hybrid_signal"], 0)

    # Backtest hybrid
    capital, equity_curve, trade_log = backtest_hybrid(
        full_feat,
        full_feat["hybrid_signal"].values,
        RISK_PER_TRADE,
        STOP_LOSS_PCT,
        TAKE_PROFIT_PCT
    )

    trades_df = pd.DataFrame(trade_log, columns=[
        "Entry Time", "Exit Time", "Type", "Entry Price", "Exit Price", "PnL"
    ])
    csv_path = "hybrid_ml_trend_bar_trades.csv"
    trades_df.to_csv(csv_path, index=False)

    # Performance metrics
    equity_series = pd.Series(equity_curve).dropna()
    returns = equity_series.pct_change().dropna()
    total_return = (capital - INITIAL_CAPITAL) / INITIAL_CAPITAL
    years = len(full_feat) / 252
    cagr = (capital / INITIAL_CAPITAL) ** (1 / years) - 1 if years > 0 else 0
    sharpe = sqrt(252) * returns.mean() / returns.std() if returns.std() != 0 else 0
    drawdown = equity_series / equity_series.cummax() - 1
    max_dd = drawdown.min()
    win_rate = (trades_df["PnL"] > 0).mean() * 100 if len(trades_df) else 0

    print("\n========== PERFORMANCE ==========")
    print(f"Final Capital: ₹{capital:,.0f}")
    print(f"Total Return: {total_return*100:.2f}%")
    print(f"CAGR: {cagr*100:.2f}%")
    print(f"Sharpe Ratio: {sharpe:.2f}")
    print(f"Max Drawdown: {max_dd*100:.2f}%")
    print(f"Win Rate: {win_rate:.2f}%")
    print(f"Trades: {len(trades_df)}")
    print(f"Trade Log Exported: {csv_path}")
    
    

    # Plot equity
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=equity_curve, name="Hybrid ML + Trend Bar Equity"))
    fig.update_layout(title="Hybrid ML + Trend Bar Momentum Equity Curve", template="plotly_dark")
    fig.show()

if __name__ == "__main__":
    main()

Fetching training data...
Fetching test data...
Selecting best ML model...

Final Capital: ₹1,153,247
Total Return: 15.32%
CAGR: 22.51%
Sharpe Ratio: 1.53
Max Drawdown: -4.94%
Win Rate: 63.89%
Trades: 36
Trade Log Exported: hybrid_ml_trend_bar_trades.csv
