In [2]:
!pip list

Package                   Version
------------------------- ------------
aiodns                    3.6.0
aiohappyeyeballs          2.6.1
aiohttp                   3.13.2
aiosignal                 1.4.0
anyio                     4.11.0
argon2-cffi               25.1.0
argon2-cffi-bindings      25.1.0
arrow                     1.4.0
asttokens                 3.0.1
async-lru                 2.0.5
async-timeout             5.0.1
attrs                     25.4.0
babel                     2.17.0
beautifulsoup4            4.14.2
bleach                    6.3.0
blinker                   1.9.0
Brotli                    1.2.0
cached-property           1.5.2
ccxt                      4.5.26
certifi                   2025.11.12
cffi                      2.0.0
charset-normalizer        3.4.4
click                     8.3.1
coincurve                 21.0.0
colorama                  0.4.6
comm                      0.2.3
contourpy                 1.3.2
cryptography              46.0.3
cycler          

In [3]:
import torch


In [6]:
import ccxt

In [7]:
import os
os.getcwd()


'C:\\Users\\user\\Desktop\\recsys_crypto\\notebooks'

In [7]:
from pathlib import Path

# You are in .../recsys_crypto/notebooks
project_root = Path.cwd().parent
print("Project root:", project_root)

# Folders to create
folders = [
    "src",
    "src/__pycache__",   # will fill itself when you run code
    "data/raw/ohlcv",
    "data/raw/news",
    "data/processed",
    "data/patterns",
    "models/ml",
    "models/configs",
    "scripts",
    "tools",
]

for f in folders:
    p = project_root / f
    p.mkdir(parents=True, exist_ok=True)

# create __init__.py for packages
for pkg in ["src", "scripts"]:
    init_path = project_root / pkg / "__init__.py"
    if not init_path.exists():
        init_path.write_text("", encoding="utf-8")

print("Folders and __init__.py created.")


Project root: C:\Users\user\Desktop\recsys_crypto
Folders and __init__.py created.


In [2]:
%%writefile ../src/ollama_client.py
# src/ollama_client.py

import json
import re
from typing import Any, Dict, List, Optional

import requests

try:
    # If you have this in your config already
    from .config import OLLAMA_MODEL_DEFAULT
except Exception:
    OLLAMA_MODEL_DEFAULT = "gemma3:4b"

OLLAMA_URL = "http://localhost:11434/api/chat"

# Reuse a single HTTP session for performance
_SESSION = requests.Session()


SYSTEM_PROMPT = """
You are a trading assistant.

You will receive structured market data for a single crypto asset at a specific time:
- Price (open, high, low, close)
- Volume
- Technical indicators (RSI, MACD, moving averages, Bollinger bands, volatility)
- Simple sentiment summary

Your task:
1. Decide how attractive it is to BUY, HOLD, or SELL this asset.
2. Output a single JSON object with this exact schema:

{
  "action_scores": {
    "Buy": 0.0-1.0,
    "Hold": 0.0-1.0,
    "Sell": 0.0-1.0
  },
  "forecast": "bullish" | "bearish" | "neutral",
  "confidence": 0.0-1.0,
  "reason": "short explanation"
}

Rules:
- The three scores do NOT need to sum to 1.0, but must each be between 0.0 and 1.0.
- Be consistent: if forecast is "bullish", Buy usually should have a higher score than Sell.
- Return ONLY the JSON object. No markdown, no extra text.
"""


def build_user_content(row: Dict[str, Any]) -> str:
    """
    Turn a feature row into a text description for the LLM.

    This function expects keys like:
        asset, timestamp, open, high, low, close, volume,
        rsi, macd, macd_signal,
        sma_7, sma_30,
        bb_high, bb_low,
        returns_1h, volatility_24h,
        sentiment_summary (optional).
    """
    asset = row.get("asset", "UNKNOWN")
    ts = row.get("timestamp", "UNKNOWN")

    open_price = row.get("open", None)
    high = row.get("high", None)
    low = row.get("low", None)
    close = row.get("close", None)
    volume = row.get("volume", None)

    rsi = row.get("rsi", None)
    macd = row.get("macd", None)
    macd_signal = row.get("macd_signal", None)
    sma7 = row.get("sma_7", None)
    sma30 = row.get("sma_30", None)
    bb_high = row.get("bb_high", None)
    bb_low = row.get("bb_low", None)
    ret_1h = row.get("returns_1h", None)
    vol_24h = row.get("volatility_24h", None)

    sentiment = row.get("sentiment_summary", "No sentiment data available.")

    lines = [
        f"Asset: {asset}",
        f"Timestamp: {ts}",
        "",
        "Price & Volume:",
        f"- Open: {open_price}",
        f"- High: {high}",
        f"- Low: {low}",
        f"- Close: {close}",
        f"- Volume: {volume}",
        "",
        "Technical indicators:",
        f"- RSI: {rsi}",
        f"- MACD: {macd}",
        f"- MACD Signal: {macd_signal}",
        f"- SMA 7: {sma7}",
        f"- SMA 30: {sma30}",
        f"- Bollinger High: {bb_high}",
        f"- Bollinger Low: {bb_low}",
        f"- 1h Returns: {ret_1h}",
        f"- 24h Volatility (std of returns): {vol_24h}",
        "",
        "Sentiment summary:",
        f"{sentiment}",
        "",
        "Based on this information, estimate how attractive it is to BUY, HOLD, or SELL this asset now.",
        "Return ONLY the JSON object with fields action_scores, forecast, confidence, reason.",
    ]
    return "\n".join(lines)


# -----------------------
# Robust JSON extraction
# -----------------------


def _try_json_direct(text: str) -> Optional[Dict[str, Any]]:
    try:
        return json.loads(text)
    except Exception:
        return None


def _try_json_braces(text: str) -> Optional[Dict[str, Any]]:
    """
    Try to find a top-level {...} block by matching braces.
    """
    start = -1
    depth = 0
    for i, ch in enumerate(text):
        if ch == "{":
            if depth == 0:
                start = i
            depth += 1
        elif ch == "}":
            depth -= 1
            if depth == 0 and start != -1:
                candidate = text[start : i + 1]
                try:
                    return json.loads(candidate)
                except Exception:
                    # continue searching in case there is another valid block
                    start = -1
    return None


def _try_json_regex(text: str) -> Optional[Dict[str, Any]]:
    """
    Fallback: use regex to find the first {...} substring and parse it.
    """
    pattern = re.compile(r"\{.*\}", re.DOTALL)
    match = pattern.search(text)
    if not match:
        return None
    candidate = match.group(0)
    try:
        return json.loads(candidate)
    except Exception:
        return None


def _fallback_from_text(text: str) -> Dict[str, Any]:
    """
    Last-resort extraction if JSON parsing fails completely.

    We try to guess Buy/Hold/Sell scores and forecast/confidence
    from any numbers and keywords in the text.
    """
    text_lower = text.lower()

    def find_score(keyword: str) -> float:
        # Look for keyword followed by a number like 0.7 or 70%
        pattern = re.compile(rf"{keyword}[^0-9]*([0-9]+(\.[0-9]+)?)", re.IGNORECASE)
        m = pattern.search(text)
        if not m:
            return 0.0
        try:
            val = float(m.group(1))
            if val > 1.0:
                val = val / 100.0
            return max(0.0, min(1.0, val))
        except Exception:
            return 0.0

    buy_score = find_score("buy")
    hold_score = find_score("hold")
    sell_score = find_score("sell")

    if buy_score == hold_score == sell_score == 0.0:
        # Fallback neutral
        buy_score = hold_score = sell_score = 1.0 / 3.0

    if "bullish" in text_lower:
        forecast = "bullish"
    elif "bearish" in text_lower:
        forecast = "bearish"
    else:
        forecast = "neutral"

    confidence = 0.5

    return {
        "action_scores": {
            "Buy": float(buy_score),
            "Hold": float(hold_score),
            "Sell": float(sell_score),
        },
        "forecast": forecast,
        "confidence": confidence,
        "reason": "Fallback extraction from non-JSON response.",
    }


def _extract_json(text: str) -> Dict[str, Any]:
    """
    Try multiple strategies to pull a JSON object out of LLM output.
    """
    for fn in (_try_json_direct, _try_json_braces, _try_json_regex):
        obj = fn(text)
        if obj is not None:
            return obj
    # If everything failed, do heuristic fallback
    return _fallback_from_text(text)


# --------------
# Main API call
# --------------


def ask_ollama(
    model_name: str,
    row: Dict[str, Any],
    timeout: int = 120,
) -> Dict[str, Any]:
    """
    Call Ollama with given model and one feature row.

    Returns a dict with keys:
        - action_scores: {"Buy": float, "Hold": float, "Sell": float}
        - forecast: str
        - confidence: float
        - reason: str
    """
    user_content = build_user_content(row)

    payload = {
        "model": model_name or OLLAMA_MODEL_DEFAULT,
        "messages": [
            {"role": "system", "content": SYSTEM_PROMPT.strip()},
            {"role": "user", "content": user_content},
        ],
        "stream": False,
    }

    resp = _SESSION.post(OLLAMA_URL, json=payload, timeout=timeout)
    resp.raise_for_status()

    data = resp.json()
    # Ollama's chat API returns {"message": {"content": "..."}, ...}
    content = ""
    if isinstance(data, dict):
        msg = data.get("message") or data.get("choices", [{}])[0]
        if isinstance(msg, dict):
            content = msg.get("content", "") or msg.get("text", "")
    if not content:
        raise ValueError("Empty response from Ollama")

    parsed = _extract_json(content)

    # Ensure structure
    scores = parsed.get("action_scores", {})
    for k in ("Buy", "Hold", "Sell"):
        scores.setdefault(k, 0.0)

    parsed["action_scores"] = {
        "Buy": float(scores["Buy"]),
        "Hold": float(scores["Hold"]),
        "Sell": float(scores["Sell"]),
    }
    parsed.setdefault("forecast", "neutral")
    parsed.setdefault("confidence", 0.5)
    parsed.setdefault("reason", "")

    return parsed


Overwriting ../src/ollama_client.py


In [13]:
%%writefile ../src/__init__.py
# Make src a package.


Overwriting ../src/__init__.py


In [14]:
%%writefile ../src/config.py
from pathlib import Path

# Project root = .../recsys_crypto
PROJECT_ROOT = Path(__file__).resolve().parent.parent

# Data directory
DATA_DIR = str(PROJECT_ROOT / "data")

# SPMF jar path (if/when you use it)
SPMF_JAR_PATH = str(PROJECT_ROOT / "tools" / "spmf.jar")

# Default Ollama model (change as you like: "llama3", "gemma2:9b", "phi3", etc.)
OLLAMA_MODEL_DEFAULT = "mistral"

# Exchange + assets settings
ASSETS = [
    "BTCUSDT", "ETHUSDT", "SOLUSDT", "BNBUSDT", "XRPUSDT",
    "DOGEUSDT", "ADAUSDT", "AVAXUSDT", "TRXUSDT", "LINKUSDT",
]

EXCHANGE = "binance"
TIMEFRAME = "1h"
WINDOW_DAYS = 45          # ~45 days history
HORIZON_HOURS = 72        # 3 days -> 72 hours


Writing ../src/config.py


In [15]:
%%writefile ../src/market_data.py
import ccxt
import pandas as pd
from pathlib import Path
from ta.trend import MACD, SMAIndicator
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands

from .config import ASSETS, EXCHANGE, TIMEFRAME, WINDOW_DAYS, DATA_DIR


def get_exchange():
    return getattr(ccxt, EXCHANGE)()


def fetch_ohlcv_for_symbol(exchange, symbol: str, limit: int | None = None) -> pd.DataFrame:
    if limit is None:
        limit = WINDOW_DAYS * 24  # rough for 1h data
    ohlcv = exchange.fetch_ohlcv(symbol, timeframe=TIMEFRAME, limit=limit)
    df = pd.DataFrame(
        ohlcv,
        columns=["timestamp", "open", "high", "low", "close", "volume"],
    )
    df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms")
    return df


def add_technical_indicators(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()

    df["rsi"] = RSIIndicator(df["close"], window=14).rsi()

    macd = MACD(df["close"])
    df["macd"] = macd.macd()
    df["macd_signal"] = macd.macd_signal()

    df["sma_7"] = SMAIndicator(df["close"], window=7).sma_indicator()
    df["sma_30"] = SMAIndicator(df["close"], window=30).sma_indicator()

    bb = BollingerBands(df["close"], window=20)
    df["bb_high"] = bb.bollinger_hband()
    df["bb_low"] = bb.bollinger_lband()

    df["returns_1h"] = df["close"].pct_change()
    df["volatility_24h"] = df["returns_1h"].rolling(24).std()

    df = df.dropna()
    return df


def build_market_feature_table() -> pd.DataFrame:
    data_path = Path(DATA_DIR) / "processed"
    data_path.mkdir(parents=True, exist_ok=True)

    exchange = get_exchange()
    all_frames = []

    for symbol in ASSETS:
        symbol_with_slash = symbol.replace("USDT", "/USDT")
        df = fetch_ohlcv_for_symbol(exchange, symbol_with_slash)
        df = add_technical_indicators(df)
        df["asset"] = symbol
        all_frames.append(df)

    full = pd.concat(all_frames, ignore_index=True)
    out_file = data_path / "market_features.csv"
    full.to_csv(out_file, index=False)
    print(f"Saved market features to {out_file}")
    return full


if __name__ == "__main__":
    build_market_feature_table()


Writing ../src/market_data.py


In [29]:
%%writefile ../src/user_model.py
from dataclasses import dataclass
from typing import Literal


RiskLevel = Literal["low", "moderate", "high"]


@dataclass
class UserProfile:
    """
    Simple user profile capturing a name and risk preference.

    risk_level:
        - "low":      conservative / risk-averse
        - "moderate": balanced
        - "high":     aggressive / risk-seeking
    """
    name: str
    risk_level: RiskLevel   # 'low', 'moderate', 'high'

    @classmethod
    def from_type(cls, user_type: str) -> "UserProfile":
        """
        Factory for convenient construction from a simple string.

        Examples:
            UserProfile.from_type("conservative") -> ("Conservative", "low")
            UserProfile.from_type("moderate")     -> ("Moderate", "moderate")
            UserProfile.from_type("aggressive")   -> ("Aggressive", "high")
        """
        t = (user_type or "").strip().lower()

        if t in ("conservative", "low", "risk_low", "risk-averse", "risk_averse"):
            return cls(name="Conservative", risk_level="low")

        if t in ("aggressive", "high", "risk_high", "risk-seeking", "risk_seeking"):
            return cls(name="Aggressive", risk_level="high")

        # default: moderate
        return cls(name="Moderate", risk_level="moderate")


# Optional handy constants (keeps your old style)
CONSERVATIVE = UserProfile("Conservative", "low")
MODERATE = UserProfile("Moderate", "moderate")
AGGRESSIVE = UserProfile("Aggressive", "high")


Overwriting ../src/user_model.py


In [17]:
%%writefile ../src/hybrid_engine.py
from typing import Dict
from .user_model import UserProfile


def compute_final_scores(
    ml_conf: Dict[str, float],
    llm_score: Dict[str, float],
    pattern_support: Dict[str, float],
    alpha: float,
    beta: float,
    gamma: float,
    user: UserProfile,
) -> Dict[str, float]:
    actions = ["Buy", "Hold", "Sell"]
    scores: Dict[str, float] = {}

    for act in actions:
        base = (
            alpha * ml_conf.get(act, 0.0) +
            beta * llm_score.get(act, 0.0) +
            gamma * pattern_support.get(act, 0.0)
        )

        # user risk adjustment
        if user.risk_level == "low":
            if act == "Buy":
                base *= 0.7
            elif act == "Sell":
                base *= 1.1
        elif user.risk_level == "high":
            if act == "Buy":
                base *= 1.2
            elif act == "Sell":
                base *= 1.1
            elif act == "Hold":
                base *= 0.8

        scores[act] = base

    total = sum(scores.values()) or 1.0
    for k in scores:
        scores[k] /= total
    return scores


def rank_actions(scores: Dict[str, float]):
    return sorted(scores.items(), key=lambda kv: kv[1], reverse=True)


Writing ../src/hybrid_engine.py


In [18]:
%%writefile ../src/recsys_metrics.py
from typing import List, Dict
import math
from collections import Counter


def _relevance(action: str, true_action: str) -> int:
    return int(action == true_action)


def ndcg_at_k(records: List[Dict], k: int) -> float:
    scores = []
    for rec in records:
        ranking = rec["ranking"][:k]
        true = rec["true"]
        dcg = 0.0
        for i, a in enumerate(ranking):
            rel = _relevance(a, true)
            dcg += (2**rel - 1) / math.log2(i + 2)
        idcg = (2**1 - 1) / math.log2(1 + 1)
        scores.append(dcg / idcg if idcg > 0 else 0.0)
    return sum(scores) / len(scores) if scores else 0.0


def map_at_k(records: List[Dict], k: int) -> float:
    aps = []
    for rec in records:
        ranking = rec["ranking"][:k]
        true = rec["true"]
        if true in ranking:
            idx = ranking.index(true)
            aps.append(1.0 / (idx + 1))
        else:
            aps.append(0.0)
    return sum(aps) / len(aps) if aps else 0.0


def diversity(records: List[Dict]) -> float:
    top1 = [r["ranking"][0] for r in records if r["ranking"]]
    counts = Counter(top1)
    total = sum(counts.values())
    if total == 0:
        return 0.0
    entropy = -sum((c/total) * math.log2(c/total) for c in counts.values())
    max_entropy = math.log2(len(counts)) if counts else 1.0
    return entropy / max_entropy if max_entropy > 0 else 0.0


Writing ../src/recsys_metrics.py


In [19]:
%%writefile ../scripts/__init__.py
# Make scripts a package (optional).


Overwriting ../scripts/__init__.py


In [20]:
%%writefile ../scripts/demo_recommend.py
import pandas as pd
from pathlib import Path

from src.user_model import MODERATE
from src.hybrid_engine import compute_final_scores, rank_actions
from src.ollama_client import ask_ollama
from src.config import DATA_DIR


def main():
    df_path = Path(DATA_DIR) / "processed" / "market_features.csv"
    if not df_path.exists():
        raise FileNotFoundError(f"market_features.csv not found at {df_path}. "
                                f"Run src.market_data.build_market_feature_table() first.")

    df = pd.read_csv(df_path)
    row = df.iloc[-1].to_dict()  # last row as example

    # placeholder ML & pattern signals (you will replace with real ones later)
    ml_conf = {"Buy": 0.5, "Hold": 0.3, "Sell": 0.2}
    pattern_support = {"Buy": 0.1, "Hold": 0.1, "Sell": 0.1}

    model_name = "mistral"  # or "llama3", "gemma2:9b", "phi3"
    llm_out = ask_ollama(model_name, row)
    llm_scores = llm_out.get("action_scores", {})

    for a in ["Buy", "Hold", "Sell"]:
        llm_scores.setdefault(a, 0.0)

    user = MODERATE
    scores = compute_final_scores(
        ml_conf,
        llm_scores,
        pattern_support,
        alpha=0.4,
        beta=0.4,
        gamma=0.2,
        user=user,
    )
    ranking = rank_actions(scores)

    print("User profile:", user.name)
    print("LLM scores:", llm_scores)
    print("Final scores:", scores)
    print("Ranking:", ranking)


if __name__ == "__main__":
    main()


Writing ../scripts/demo_recommend.py


In [21]:
import sys
from pathlib import Path

project_root = Path.cwd().parent
sys.path.append(str(project_root))

from src import market_data, ollama_client, hybrid_engine, user_model

print("market_data:", market_data.__file__)
print("ollama_client:", ollama_client.__file__)


market_data: C:\Users\user\Desktop\recsys_crypto\src\market_data.py
ollama_client: C:\Users\user\Desktop\recsys_crypto\src\ollama_client.py


In [22]:
import sys
from pathlib import Path

project_root = Path.cwd().parent
sys.path.append(str(project_root))

from src.market_data import build_market_feature_table
from src.config import DATA_DIR

print("PROJECT_ROOT:", project_root)
print("DATA_DIR:", DATA_DIR)

df_features = build_market_feature_table()
df_features.head()


PROJECT_ROOT: C:\Users\user\Desktop\recsys_crypto
DATA_DIR: C:\Users\user\Desktop\recsys_crypto\data
Saved market features to C:\Users\user\Desktop\recsys_crypto\data\processed\market_features.csv


Unnamed: 0,timestamp,open,high,low,close,volume,rsi,macd,macd_signal,sma_7,sma_30,bb_high,bb_low,returns_1h,volatility_24h,asset
0,2025-10-31 10:00:00,109848.39,110117.32,109710.08,109819.2,375.06762,54.808341,171.940629,3.423187,109830.358571,109053.284667,110923.27423,106497.31777,-0.000266,0.005683,BTCUSDT
1,2025-10-31 11:00:00,109819.21,110550.0,109461.53,110415.61,712.18313,59.211415,235.736423,49.885834,109877.728571,109058.862,111127.225528,106563.771472,0.005431,0.005742,BTCUSDT
2,2025-10-31 12:00:00,110415.62,110681.0,109619.94,109653.19,904.10139,52.208584,222.212599,84.351187,109842.591429,109021.701333,111204.727181,106637.131819,-0.006905,0.005337,BTCUSDT
3,2025-10-31 13:00:00,109653.2,110240.0,109366.91,110157.54,1164.32028,55.922307,249.317721,117.344494,109915.097143,108981.395333,111299.20583,106809.73817,0.0046,0.005149,BTCUSDT
4,2025-10-31 14:00:00,110157.54,111054.61,109590.46,110790.14,2487.24179,60.109395,318.176518,157.510899,110106.644286,108983.923,111373.849274,107130.399726,0.005743,0.005133,BTCUSDT


In [23]:
import pandas as pd
from src.config import HORIZON_HOURS

features_path = Path(DATA_DIR) / "processed" / "market_features.csv"
df = pd.read_csv(features_path, parse_dates=["timestamp"])

# sort by asset + time
df = df.sort_values(["asset", "timestamp"])

# compute future close after HORIZON_HOURS
df["future_close"] = df.groupby("asset")["close"].shift(-HORIZON_HOURS)
df["future_return"] = (df["future_close"] - df["close"]) / df["close"]

def true_action_from_return(r: float) -> str:
    if r > 0.01:
        return "Buy"
    elif r < -0.01:
        return "Sell"
    else:
        return "Hold"

df["true_action"] = df["future_return"].apply(true_action_from_return)

# drop last rows where future_close is NaN
df_labeled = df.dropna(subset=["future_close", "true_action"]).reset_index(drop=True)
print("Labeled rows:", len(df_labeled))
df_labeled[["asset", "timestamp", "close", "future_return", "true_action"]].head()


Labeled rows: 8950


Unnamed: 0,asset,timestamp,close,future_return,true_action
0,ADAUSDT,2025-10-31 10:00:00,0.6101,-0.061629,Sell
1,ADAUSDT,2025-10-31 11:00:00,0.6168,-0.066472,Sell
2,ADAUSDT,2025-10-31 12:00:00,0.6117,-0.059506,Sell
3,ADAUSDT,2025-10-31 13:00:00,0.6149,-0.065214,Sell
4,ADAUSDT,2025-10-31 14:00:00,0.6176,-0.063795,Sell


In [24]:
from src.ollama_client import ask_ollama

# pick one row as example
row_example = df_labeled.iloc[-1].to_dict()  # last row
model_name = "llama3"  # or "llama3", or your Gemma tag from `ollama list`

llm_output = ask_ollama(model_name, row_example)
llm_output


{'action_scores': {'Buy': 0.74, 'Hold': 0.23, 'Sell': 0.03},
 'forecast': 'bullish',
 'confidence': 0.85,
 'reason': 'Strong short-term indicators and neutral sentiment support a bullish outlook.'}

In [33]:
import importlib, sys
from pathlib import Path

project_root = Path.cwd().parent
sys.path.append(str(project_root))

import src.ollama_client as oc
importlib.reload(oc)

# quick single-call test
row_debug = df_labeled.iloc[0].to_dict()
print("gemma3:4b ->", oc.ask_ollama("gemma3:4b", row_debug))
print("llama3    ->", oc.ask_ollama("llama3", row_debug))
print("phi       ->", oc.ask_ollama("phi", row_debug))


gemma3:4b -> {'action_scores': {'Buy': 0.45, 'Hold': 0.5, 'Sell': 0.05}, 'forecast': 'bullish', 'confidence': 0.65, 'reason': 'The RSI is moderately bullish, price is near SMA_7 and SMA_30, and recent sentiment is neutral. The price is consolidating, suggesting a potential upward move.'}
llama3    -> {'action_scores': {'Buy': 0.0, 'Hold': 0.7, 'Sell': 0.3}, 'forecast': 'neutral', 'confidence': 0.6, 'reason': 'Mixed signals from indicators; RSI oversold, but MACD and SMAs indicate consolidation'}
phi       -> {'action_scores': {'Buy': 0, 'Hold': 1, 'Sell': 0}, 'forecast': 'neutral', 'confidence': 1.0, 'reason': ''}


In [34]:
models_to_test = [
    "gemma3:4b",
    "llama3",
    "phi",
]

sample_size = 10
sample = df_labeled.sample(n=sample_size, random_state=42).reset_index(drop=True)

results = []

for model_name in models_to_test:
    print(f"\n=== Testing model: {model_name} ===")
    correct = 0
    total = 0
    latencies = []
    errors = 0

    for i, row in sample.iterrows():
        row_dict = row.to_dict()
        start = time.time()
        try:
            out = oc.ask_ollama(model_name, row_dict)

            scores = out.get("action_scores", {})
            for a in ["Buy", "Hold", "Sell"]:
                scores.setdefault(a, 0.0)

            pred_action = max(scores.items(), key=lambda kv: kv[1])[0]

            latency = time.time() - start
            latencies.append(latency)
            total += 1

            if pred_action == row["true_action"]:
                correct += 1

        except Exception as e:
            errors += 1
            print(f"  [Error on row {i}] {e}")

    accuracy = correct / total if total else 0.0
    avg_latency = sum(latencies) / len(latencies) if latencies else None

    results.append({
        "model": model_name,
        "accuracy": accuracy,
        "avg_latency_sec": avg_latency,
        "n_samples": total,
        "errors": errors,
    })

results



=== Testing model: gemma3:4b ===

=== Testing model: llama3 ===
  [Error on row 0] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat
  [Error on row 5] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat
  [Error on row 9] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat

=== Testing model: phi ===


[{'model': 'gemma3:4b',
  'accuracy': 0.2,
  'avg_latency_sec': 1.7844038009643555,
  'n_samples': 10,
  'errors': 0},
 {'model': 'llama3',
  'accuracy': 0.2857142857142857,
  'avg_latency_sec': 2.6377481392451694,
  'n_samples': 7,
  'errors': 3},
 {'model': 'phi',
  'accuracy': 0.2,
  'avg_latency_sec': 8.044142150878907,
  'n_samples': 10,
  'errors': 0}]

In [35]:
print("Total rows in raw feature table: ", len(df))
print("Total rows in labeled table:     ", len(df_labeled))

print("\nRows per asset (labeled):")
print(df_labeled["asset"].value_counts())

print("\nTime range (labeled):")
print("Min timestamp:", df_labeled["timestamp"].min())
print("Max timestamp:", df_labeled["timestamp"].max())


Total rows in raw feature table:  9670
Total rows in labeled table:      8950

Rows per asset (labeled):
asset
ADAUSDT     895
AVAXUSDT    895
BNBUSDT     895
BTCUSDT     895
DOGEUSDT    895
ETHUSDT     895
LINKUSDT    895
SOLUSDT     895
TRXUSDT     895
XRPUSDT     895
Name: count, dtype: int64

Time range (labeled):
Min timestamp: 2025-10-31 10:00:00
Max timestamp: 2025-12-07 16:00:00


In [36]:
import pandas as pd
import time
import src.ollama_client as oc  # make sure this is imported
from importlib import reload
reload(oc)  # ensure latest version is loaded

# 1) build a balanced sample: 30 per asset → 300 total
n_per_asset = 30
parts = []
for asset, g in df_labeled.groupby("asset"):
    parts.append(g.sample(n=n_per_asset, random_state=42))
sample = pd.concat(parts, ignore_index=True)

sample_size = len(sample)
print("Sample size for LLM comparison:", sample_size)
print(sample["asset"].value_counts())

# 2) run comparison on this bigger sample
models_to_test = [
    "gemma3:4b",
    "llama3",
    "phi",
]

results = []

for model_name in models_to_test:
    print(f"\n=== Testing model: {model_name} ===")
    correct = 0
    total = 0
    latencies = []
    errors = 0

    for i, row in sample.iterrows():
        row_dict = row.to_dict()
        start = time.time()
        try:
            out = oc.ask_ollama(model_name, row_dict)

            scores = out.get("action_scores", {})
            for a in ["Buy", "Hold", "Sell"]:
                scores.setdefault(a, 0.0)

            pred_action = max(scores.items(), key=lambda kv: kv[1])[0]

            latency = time.time() - start
            latencies.append(latency)
            total += 1

            if pred_action == row["true_action"]:
                correct += 1

        except Exception as e:
            errors += 1
            print(f"  [Error on row {i}] {e}")

    accuracy = correct / total if total else 0.0
    avg_latency = sum(latencies) / len(latencies) if latencies else None

    results.append({
        "model": model_name,
        "accuracy": accuracy,
        "avg_latency_sec": avg_latency,
        "n_samples": total,
        "errors": errors,
    })

results_df = pd.DataFrame(results)
results_df


Sample size for LLM comparison: 300
asset
ADAUSDT     30
AVAXUSDT    30
BNBUSDT     30
BTCUSDT     30
DOGEUSDT    30
ETHUSDT     30
LINKUSDT    30
SOLUSDT     30
TRXUSDT     30
XRPUSDT     30
Name: count, dtype: int64

=== Testing model: gemma3:4b ===
  [Error on row 61] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat
  [Error on row 64] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat
  [Error on row 65] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat
  [Error on row 67] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat
  [Error on row 80] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat
  [Error on row 81] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat
  [Error on row 82] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat
  [Error on row 84] 500 Server Error: Int

KeyboardInterrupt: 

In [None]:
from sklearn.ensemble import RandomForestClassifier

# ensure sorted
df_labeled = df_labeled.sort_values(["timestamp", "asset"]).reset_index(drop=True)

# time-based split: 70% earliest timestamps for training, 30% latest for test
unique_times = df_labeled["timestamp"].sort_values().unique()
split_idx = int(len(unique_times) * 0.7)
split_time = unique_times[split_idx]

train_mask = df_labeled["timestamp"] <= split_time
test_mask = df_labeled["timestamp"] > split_time

df_train = df_labeled[train_mask].copy()
df_test = df_labeled[test_mask].copy()

print("Train rows:", len(df_train), "Test rows:", len(df_test))
print("Train period:", df_train["timestamp"].min(), "→", df_train["timestamp"].max())
print("Test period:", df_test["timestamp"].min(), "→", df_test["timestamp"].max())


In [None]:
# pick feature columns (exclude target and obvious IDs)
exclude_cols = ["timestamp", "asset", "future_close", "future_return", "true_action"]
feature_cols = [c for c in df_labeled.columns if c not in exclude_cols]

X_train = df_train[feature_cols]
y_train = df_train["true_action"]

clf = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)
clf.fit(X_train, y_train)

print("Classes:", clf.classes_)


In [None]:
from src.hybrid_engine import compute_final_scores, rank_actions
from src.recsys_metrics import ndcg_at_k, map_at_k
from src.user_model import MODERATE  # or CONSERVATIVE / AGGRESSIVE

import src.ollama_client as oc
from importlib import reload
reload(oc)

user_profile = MODERATE
model_name = "gemma3:4b"  # or "llama3", "phi"

records = []    # for RecSys metrics
pnl_returns = []  # optional: realized returns based on top action

for _, row in df_test.iterrows():
    row_dict = row.to_dict()

    # 1) ML confidences
    X_row = row[feature_cols].values.reshape(1, -1)
    proba = clf.predict_proba(X_row)[0]
    ml_conf = {cls: float(p) for cls, p in zip(clf.classes_, proba)}
    # ensure all three keys exist
    for a in ["Buy", "Hold", "Sell"]:
        ml_conf.setdefault(a, 0.0)

    # 2) LLM scores
    llm_out = oc.ask_ollama(model_name, row_dict)
    llm_scores = llm_out.get("action_scores", {})
    for a in ["Buy", "Hold", "Sell"]:
        llm_scores.setdefault(a, 0.0)

    # 3) Pattern support (placeholder for now)
    pattern_support = {"Buy": 0.1, "Hold": 0.1, "Sell": 0.1}

    # 4) Hybrid final scores & ranking
    final_scores = compute_final_scores(
        ml_conf,
        llm_scores,
        pattern_support,
        alpha=0.4,   # ML weight
        beta=0.4,    # LLM weight
        gamma=0.2,   # pattern weight
        user=user_profile,
    )
    ranking = [a for a, _ in rank_actions(final_scores)]

    # 5) ground truth
    true_action = row["true_action"]
    records.append({"ranking": ranking, "true": true_action})

    # 6) simple PnL (optional):
    # assume: if we "Buy", we take future_return; if "Sell", we take -future_return; if "Hold", 0
    top_action = ranking[0]
    fr = row["future_return"]
    if top_action == "Buy":
        pnl = fr
    elif top_action == "Sell":
        pnl = -fr
    else:
        pnl = 0.0
    pnl_returns.append(pnl)

# RecSys metrics
k = 3
ndcg = ndcg_at_k(records, k)
mapk = map_at_k(records, k)

import numpy as np
avg_return = float(np.mean(pnl_returns))

print(f"Model: {model_name}, User: {user_profile.name}")
print(f"NDCG@{k}: {ndcg:.4f}  MAP@{k}: {mapk:.4f}")
print(f"Average 3-day return per position: {avg_return:.4f}")
print(f"Number of test cases: {len(records)}")


In [37]:
from pathlib import Path
from src.config import DATA_DIR

labeled_path = Path(DATA_DIR) / "processed" / "market_features_labeled.csv"
labeled_path.parent.mkdir(parents=True, exist_ok=True)
df_labeled.to_csv(labeled_path, index=False)
print("Saved labeled dataset to:", labeled_path)


Saved labeled dataset to: C:\Users\user\Desktop\recsys_crypto\data\processed\market_features_labeled.csv


In [38]:
%%writefile ../src/recsys_metrics.py
from typing import List, Dict
import math
from collections import Counter


def _relevance(action: str, true_action: str) -> int:
    return int(action == true_action)


def ndcg_at_k(records: List[Dict], k: int) -> float:
    scores = []
    for rec in records:
        ranking = rec["ranking"][:k]
        true = rec["true"]
        dcg = 0.0
        for i, a in enumerate(ranking):
            rel = _relevance(a, true)
            dcg += (2**rel - 1) / math.log2(i + 2)
        idcg = (2**1 - 1) / math.log2(1 + 1)
        scores.append(dcg / idcg if idcg > 0 else 0.0)
    return sum(scores) / len(scores) if scores else 0.0


def map_at_k(records: List[Dict], k: int) -> float:
    aps = []
    for rec in records:
        ranking = rec["ranking"][:k]
        true = rec["true"]
        if true in ranking:
            idx = ranking.index(true)
            aps.append(1.0 / (idx + 1))
        else:
            aps.append(0.0)
    return sum(aps) / len(aps) if aps else 0.0


def diversity(records: List[Dict]) -> float:
    top1 = [r["ranking"][0] for r in records if r["ranking"]]
    counts = Counter(top1)
    total = sum(counts.values())
    if total == 0:
        return 0.0
    entropy = -sum((c/total) * math.log2(c/total) for c in counts.values())
    max_entropy = math.log2(len(counts)) if counts else 1.0
    return entropy / max_entropy if max_entropy > 0 else 0.0


def coverage(records: List[Dict]) -> float:
    """
    Very simple coverage: fraction of queries where we produced a non-empty ranking.
    If you add asset info into each record, you can refine this to 'fraction of assets covered'.
    """
    if not records:
        return 0.0
    non_empty = sum(1 for r in records if r.get("ranking"))
    return non_empty / len(records)


def serendipity(records: List[Dict], baseline_top1: List[str]) -> float:
    """
    Simple serendipity definition:
    - 'Serendipitous' if our top-1 action differs from baseline's top-1,
      AND it is actually correct (matches true_action).
    - Serendipity = (# serendipitous cases) / (# total cases).
    
    baseline_top1 should be a list of same length as records,
    where each entry is the baseline's top-1 action for that query.
    """
    if not records or not baseline_top1 or len(records) != len(baseline_top1):
        return 0.0

    serend = 0
    total = 0

    for rec, base_action in zip(records, baseline_top1):
        if not rec.get("ranking"):
            continue
        top_action = rec["ranking"][0]
        true = rec["true"]
        total += 1
        if top_action != base_action and top_action == true:
            serend += 1

    return serend / total if total else 0.0


Overwriting ../src/recsys_metrics.py


In [72]:
%%writefile ../src/ml_models.py
from pathlib import Path
from typing import List, Tuple

import pandas as pd
from sklearn.ensemble import RandomForestClassifier

from .config import DATA_DIR


def load_labeled_data() -> pd.DataFrame:
    """
    Load the labeled market features dataset with columns including:
    timestamp, asset, technical indicators, future_return, true_action, ...
    """
    path = Path(DATA_DIR) / "processed" / "market_features_labeled.csv"
    df = pd.read_csv(path, parse_dates=["timestamp"])
    return df


def get_feature_columns(df: pd.DataFrame) -> List[str]:
    """
    Return the list of feature columns (exclude target and obvious ID / leakage columns).
    """
    exclude_cols = ["timestamp", "asset", "future_close", "future_return", "true_action"]
    return [c for c in df.columns if c not in exclude_cols]


def train_classifier(df_train: pd.DataFrame) -> Tuple[RandomForestClassifier, List[str]]:
    """
    Train a RandomForestClassifier on the training portion only.
    This function does NOT touch the test data.
    """
    df_train = df_train.sort_values(["timestamp", "asset"]).reset_index(drop=True)

    feature_cols = get_feature_columns(df_train)
    X_train = df_train[feature_cols]
    y_train = df_train["true_action"]

    clf = RandomForestClassifier(
        n_estimators=200,
        max_depth=None,
        random_state=42,
        n_jobs=-1,
    )
    clf.fit(X_train, y_train)

    return clf, feature_cols


if __name__ == "__main__":
    # Debug: train on full dataset and print sizes (not used in backtest).
    df_all = load_labeled_data()
    clf, feat_cols = train_classifier(df_all)
    print("Trained RandomForest on", len(df_all), "rows.")
    print("Number of features:", len(feat_cols))


Overwriting ../src/ml_models.py


In [3]:
%%writefile ../src/backtest.py
# src/backtest.py

from typing import Dict, List, Optional, Tuple

from concurrent.futures import ThreadPoolExecutor, as_completed

import numpy as np
import pandas as pd

from .ml_models import (
    load_labeled_data,
    get_feature_columns,
    train_classifier,
)
from .user_model import UserProfile
from .hybrid_engine import compute_final_scores, rank_actions
from .recsys_metrics import (
    ndcg_at_k,
    map_at_k,
    diversity,
    coverage,
    serendipity,
)
from . import ollama_client as oc


# -------------------------
# Train / test time split
# -------------------------


def time_split(
    df: pd.DataFrame,
    train_ratio: float = 0.7,
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Time-based split to avoid information leakage.
    """
    df_sorted = df.sort_values(["timestamp", "asset"])
    n = len(df_sorted)
    split_idx = int(n * train_ratio)
    df_train = df_sorted.iloc[:split_idx].copy()
    df_test = df_sorted.iloc[split_idx:].copy()
    return df_train, df_test


# -------------
# PnL & equity
# -------------


def compute_pnl(top_action: str, future_return: float) -> float:
    """
    Simple PnL without transaction cost:
    - Buy  -> +future_return
    - Sell -> -future_return (assume perfect shorting)
    - Hold -> 0
    """
    if top_action == "Buy":
        return float(future_return)
    if top_action == "Sell":
        return float(-future_return)
    return 0.0


def compute_pnl_with_costs(
    top_action: str,
    future_return: float,
    fee_rate: float = 0.001,
) -> float:
    """
    PnL with simple transaction costs:
    - Buy:  profit ≈ future_return - 2 * fee_rate (entry + exit)
    - Sell: profit ≈ -future_return - 2 * fee_rate
    - Hold: 0
    """
    if top_action == "Buy":
        gross = float(future_return)
    elif top_action == "Sell":
        gross = float(-future_return)
    else:
        return 0.0

    cost = 2.0 * fee_rate
    return gross - cost


def equity_curve(
    pnls: List[float],
    initial_capital: float = 1.0,
) -> List[float]:
    """
    Convert per-trade returns into an equity curve,
    assuming full capital allocated on each trade.
    """
    equity = float(initial_capital)
    curve = [equity]
    for r in pnls:
        equity *= (1.0 + r)
        curve.append(equity)
    return curve


def equity_stats(curve: List[float]) -> Dict[str, float]:
    """
    Compute simple equity stats:
    - final: final capital
    - max_drawdown: minimum (equity / running_max - 1)
    """
    if not curve:
        return {"final": 0.0, "max_drawdown": 0.0}

    arr = np.array(curve, dtype=float)
    running_max = np.maximum.accumulate(arr)
    drawdown = (arr - running_max) / running_max
    max_dd = float(drawdown.min()) if len(drawdown) > 0 else 0.0

    return {
        "final": float(arr[-1]),
        "max_drawdown": max_dd,
    }


# -------------------
# LLM parallel helper
# -------------------


def _llm_call_single(model_name: str, row_dict: Dict) -> Dict:
    """
    Simple wrapper to call Ollama for one row_dict.
    """
    return oc.ask_ollama(model_name, row_dict)


def call_llm_parallel(
    model_name: str,
    rows: List[Dict],
    max_workers: int = 4,
) -> Tuple[List[Optional[Dict]], int]:
    """
    Call LLM in parallel for a list of row dicts.

    Returns:
        outputs: list of LLM outputs (or None on error) aligned with rows
        error_count: how many calls failed
    """
    outputs: List[Optional[Dict]] = [None] * len(rows)
    error_count = 0

    if max_workers <= 1 or len(rows) <= 1:
        # Fallback to sequential for small cases / debugging
        for i, rd in enumerate(rows):
            try:
                outputs[i] = _llm_call_single(model_name, rd)
            except Exception:
                error_count += 1
        return outputs, error_count

    with ThreadPoolExecutor(max_workers=max_workers) as ex:
        futures = {
            ex.submit(_llm_call_single, model_name, rd): i
            for i, rd in enumerate(rows)
        }
        for fut in as_completed(futures):
            i = futures[fut]
            try:
                outputs[i] = fut.result()
            except Exception:
                error_count += 1

    return outputs, error_count


# ---------------------------
# Simple pattern heuristics
# ---------------------------


def heuristic_pattern_support(row: pd.Series) -> Dict[str, float]:
    """
    Simple rule-based pattern support based on RSI and MACD.

    This is a placeholder for real SPMF-based pattern mining,
    but already injects pattern information into the hybrid engine.
    """
    rsi = float(row.get("rsi", 50.0))
    macd = float(row.get("macd", 0.0))

    # Start from neutral
    support = {"Buy": 1.0, "Hold": 1.0, "Sell": 1.0}

    # RSI-based pattern
    if rsi < 30:
        # Oversold -> support Buy
        support["Buy"] += 2.0
    elif rsi > 70:
        # Overbought -> support Sell
        support["Sell"] += 2.0
    else:
        support["Hold"] += 1.0

    # MACD-based pattern
    if macd > 0:
        support["Buy"] += 1.0
    elif macd < 0:
        support["Sell"] += 1.0

    # Normalize to [0,1]
    total = sum(support.values()) or 1.0
    for k in support:
        support[k] /= total

    return support


# ----------------
# ML-only backtest
# ----------------


def backtest_ml_only(
    model,
    feature_cols: List[str],
    df_test: pd.DataFrame,
) -> Dict:
    """
    Baseline: ML-only ranking from RandomForest predicted probabilities.
    """
    records: List[Dict] = []
    pnls: List[float] = []
    top1_actions: List[str] = []

    df_sorted = df_test.sort_values(["timestamp", "asset"])
    X = df_sorted[feature_cols]
    y_true = df_sorted["true_action"].tolist()
    fut_ret = df_sorted["future_return"].tolist()

    probas = model.predict_proba(X)
    classes = list(model.classes_)

    for i in range(len(df_sorted)):
        proba = probas[i]
        ml_conf = {cls: float(p) for cls, p in zip(classes, proba)}
        for a in ["Buy", "Hold", "Sell"]:
            ml_conf.setdefault(a, 0.0)

        # For ML-only, we can just treat ml_conf as final scores
        scores = ml_conf
        ranking = sorted(scores.keys(), key=lambda k: scores[k], reverse=True)
        true_action = y_true[i]

        records.append({"ranking": ranking, "true": true_action})
        top1_actions.append(ranking[0])

        # Use PnL with costs for more realism
        pnl = compute_pnl_with_costs(ranking[0], fut_ret[i], fee_rate=0.001)
        pnls.append(pnl)

    k = 3
    ndcg_val = ndcg_at_k(records, k)
    map_val = map_at_k(records, k)
    div = diversity(records)
    cov = coverage(records)
    avg_pnl = float(np.mean(pnls)) if pnls else 0.0

    curve = equity_curve(pnls, initial_capital=1.0)
    eq = equity_stats(curve)

    metrics = {
        "mode": "ml_only",
        "ndcg": ndcg_val,
        "map": map_val,
        "diversity": div,
        "coverage": cov,
        "serendipity": None,
        "avg_pnl": avg_pnl,
        "equity_final": eq["final"],
        "equity_max_drawdown": eq["max_drawdown"],
        "n": len(records),
        "baseline_top1": top1_actions,
        "records": records,
    }
    return metrics


# -----------------
# LLM-only backtest
# -----------------


def backtest_llm_only(
    model_name: str,
    df_test: pd.DataFrame,
    user: UserProfile,
    max_workers: int = 4,
) -> Dict:
    """
    LLM-only:
    - Ignore ML scores, only take LLM 'action_scores'.
    - If Ollama fails for a row, fall back to neutral scores.
    - Uses parallel calls to the LLM for efficiency.
    """
    records: List[Dict] = []
    pnls: List[float] = []
    top1_actions: List[str] = []

    # Materialize rows once
    rows = list(df_test.iterrows())
    row_dicts = [row.to_dict() for _, row in rows]

    # Parallel LLM calls
    llm_outputs, error_count = call_llm_parallel(
        model_name=model_name,
        rows=row_dicts,
        max_workers=max_workers,
    )

    for idx, (pandas_idx, row) in enumerate(rows):
        out = llm_outputs[idx]

        if out is None:
            # LLM failed for this row
            scores = {"Buy": 1.0 / 3.0, "Hold": 1.0 / 3.0, "Sell": 1.0 / 3.0}
        else:
            scores = out.get("action_scores", {})
        for a in ["Buy", "Hold", "Sell"]:
            scores.setdefault(a, 0.0)

        final_scores = compute_final_scores(
            ml_conf={"Buy": 0.0, "Hold": 0.0, "Sell": 0.0},
            llm_score=scores,
            pattern_support={"Buy": 0.0, "Hold": 0.0, "Sell": 0.0},
            alpha=0.0,
            beta=1.0,
            gamma=0.0,
            user=user,
        )
        ranking = [a for a, _ in rank_actions(final_scores)]
        true_action = row["true_action"]

        records.append({"ranking": ranking, "true": true_action})
        top1_actions.append(ranking[0])

        pnl = compute_pnl_with_costs(ranking[0], row["future_return"], fee_rate=0.001)
        pnls.append(pnl)

    k = 3
    ndcg_val = ndcg_at_k(records, k)
    map_val = map_at_k(records, k)
    div = diversity(records)
    cov = coverage(records)
    avg_pnl = float(np.mean(pnls)) if pnls else 0.0

    curve = equity_curve(pnls, initial_capital=1.0)
    eq = equity_stats(curve)

    metrics = {
        "mode": f"llm_only_{model_name}",
        "ndcg": ndcg_val,
        "map": map_val,
        "diversity": div,
        "coverage": cov,
        "serendipity": None,
        "avg_pnl": avg_pnl,
        "equity_final": eq["final"],
        "equity_max_drawdown": eq["max_drawdown"],
        "n": len(records),
        "baseline_top1": top1_actions,
        "records": records,
        "llm_errors": error_count,
    }
    return metrics


# -------------------
# Hybrid backtest
# -------------------


def backtest_hybrid(
    model,
    feature_cols: List[str],
    df_test: pd.DataFrame,
    model_name: str,
    user: UserProfile,
    alpha: float = 0.4,
    beta: float = 0.4,
    gamma: float = 0.2,
    ml_baseline_top1: Optional[List[str]] = None,
    max_workers: int = 4,
) -> Dict:
    """
    Hybrid:
    - Combine ML probabilities, LLM scores and pattern support.
    - If LLM fails, fall back to ML-only for that row.
    - Uses parallel LLM calls for efficiency.
    """
    records: List[Dict] = []
    pnls: List[float] = []
    hybrid_top1: List[str] = []

    # Materialize rows once
    rows = list(df_test.iterrows())
    row_dicts = [row.to_dict() for _, row in rows]

    # Parallel LLM calls
    llm_outputs, error_count = call_llm_parallel(
        model_name=model_name,
        rows=row_dicts,
        max_workers=max_workers,
    )

    for idx, (pandas_idx, row) in enumerate(rows):
        # ML confidences
        X_row = row[feature_cols].to_frame().T
        proba = model.predict_proba(X_row)[0]
        classes = model.classes_
        ml_conf = {cls: float(p) for cls, p in zip(classes, proba)}
        for a in ["Buy", "Hold", "Sell"]:
            ml_conf.setdefault(a, 0.0)

        # LLM scores
        out = llm_outputs[idx]
        if out is None:
            llm_scores = {"Buy": 0.0, "Hold": 0.0, "Sell": 0.0}
        else:
            llm_scores = out.get("action_scores", {})
        for a in ["Buy", "Hold", "Sell"]:
            llm_scores.setdefault(a, 0.0)

        # Pattern support (heuristic for now)
        pattern_support = heuristic_pattern_support(row)

        final_scores = compute_final_scores(
            ml_conf=ml_conf,
            llm_score=llm_scores,
            pattern_support=pattern_support,
            alpha=alpha,
            beta=beta,
            gamma=gamma,
            user=user,
        )
        ranking = [a for a, _ in rank_actions(final_scores)]
        true_action = row["true_action"]

        records.append({"ranking": ranking, "true": true_action})
        hybrid_top1.append(ranking[0])

        pnl = compute_pnl_with_costs(ranking[0], row["future_return"], fee_rate=0.001)
        pnls.append(pnl)

    k = 3
    ndcg_val = ndcg_at_k(records, k)
    map_val = map_at_k(records, k)
    div = diversity(records)
    cov = coverage(records)
    avg_pnl = float(np.mean(pnls)) if pnls else 0.0

    curve = equity_curve(pnls, initial_capital=1.0)
    eq = equity_stats(curve)

    ser = None
    if ml_baseline_top1 is not None:
        ser = serendipity(records, ml_baseline_top1)

    metrics = {
        "mode": f"hybrid_{model_name}",
        "ndcg": ndcg_val,
        "map": map_val,
        "diversity": div,
        "coverage": cov,
        "serendipity": ser,
        "avg_pnl": avg_pnl,
        "equity_final": eq["final"],
        "equity_max_drawdown": eq["max_drawdown"],
        "n": len(records),
        "hybrid_top1": hybrid_top1,
        "records": records,
        "llm_errors": error_count,
    }
    return metrics


# --------------
# Top-level run
# --------------


def run_all(
    model_name: str = "gemma3:4b",
    user_type: str = "moderate",
    max_eval_rows: int = 300,
    llm_max_workers: int = 4,
) -> Dict[str, Dict]:
    """
    End-to-end:
    - Load labeled data
    - Time-based split
    - Train RF classifier
    - Sample subset of test period
    - Run ML-only, LLM-only, Hybrid backtests

    Returns a dict with three metrics dicts.
    """
    df = load_labeled_data()
    df_train, df_test = time_split(df, train_ratio=0.7)
    feature_cols = get_feature_columns(df_train)

    clf = train_classifier(df_train)

    # Choose user profile
    user = UserProfile.from_type(user_type)

    # Sample subset of test period for evaluation
    df_eval = df_test.sample(
        n=min(max_eval_rows, len(df_test)), random_state=42
    ).sort_values(["timestamp", "asset"])

    ml_m = backtest_ml_only(clf, feature_cols, df_eval)
    llm_m = backtest_llm_only(
        model_name,
        df_eval,
        user=user,
        max_workers=llm_max_workers,
    )
    hyb_m = backtest_hybrid(
        clf,
        feature_cols,
        df_eval,
        model_name,
        user=user,
        alpha=0.4,
        beta=0.4,
        gamma=0.2,
        ml_baseline_top1=ml_m["baseline_top1"],
        max_workers=llm_max_workers,
    )

    # Simple printout (optional)
    print("=== ML-only ===")
    print(
        f"NDCG={ml_m['ndcg']:.4f}, MAP={ml_m['map']:.4f}, "
        f"Div={ml_m['diversity']:.4f}, Cov={ml_m['coverage']:.4f}, "
        f"AvgPnL={ml_m['avg_pnl']:.4f}, FinalEq={ml_m['equity_final']:.4f}, "
        f"MaxDD={ml_m['equity_max_drawdown']:.4f}"
    )
    print("=== LLM-only ===")
    print(
        f"NDCG={llm_m['ndcg']:.4f}, MAP={llm_m['map']:.4f}, "
        f"Div={llm_m['diversity']:.4f}, Cov={llm_m['coverage']:.4f}, "
        f"AvgPnL={llm_m['avg_pnl']:.4f}, FinalEq={llm_m['equity_final']:.4f}, "
        f"MaxDD={llm_m['equity_max_drawdown']:.4f}, LLM_errors={llm_m['llm_errors']}"
    )
    print("=== Hybrid ===")
    print(
        f"NDCG={hyb_m['ndcg']:.4f}, MAP={hyb_m['map']:.4f}, "
        f"Div={hyb_m['diversity']:.4f}, Cov={hyb_m['coverage']:.4f}, "
        f"Ser={hyb_m['serendipity']}, AvgPnL={hyb_m['avg_pnl']:.4f}, "
        f"FinalEq={hyb_m['equity_final']:.4f}, MaxDD={hyb_m['equity_max_drawdown']:.4f}, "
        f"LLM_errors={hyb_m['llm_errors']}"
    )

    return {"ml": ml_m, "llm": llm_m, "hybrid": hyb_m}


Overwriting ../src/backtest.py


In [74]:
from pathlib import Path
from src.config import DATA_DIR

labeled_path = Path(DATA_DIR) / "processed" / "market_features_labeled.csv"
print("Labeled path:", labeled_path, "exists:", labeled_path.exists())


Labeled path: C:\Users\user\Desktop\recsys_crypto\data\processed\market_features_labeled.csv exists: True


In [75]:
from src.ml_models import train_and_save_classifier

clf, feat_cols = train_and_save_classifier()


Saved classifier to: C:\Users\user\Desktop\recsys_crypto\models\ml\clf_true_action.pkl


In [None]:
from src.backtest import run_all

ml_m, llm_m, hyb_m = run_all(model_name="llama3", user_type="moderate")
ml_m, llm_m, hyb_m


[LLM ERROR llama3 at row 54] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat
[LLM ERROR llama3 at row 55] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat
[LLM ERROR llama3 at row 56] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat
[LLM ERROR llama3 at row 57] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat
[LLM ERROR llama3 at row 59] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat
[LLM ERROR llama3 at row 77] 500 Server Error: Internal Server Error for url: http://localhost:11434/api/chat


In [None]:
from src import backtest
import importlib; importlib.reload(backtest)

models = ["gemma3:4b", "phi", "llama3"]  # 'llama3' == 'llama3:latest'

results = {}

for m in models:
    print(f"\n### Running backtest for {m} ###")
    ml_m, llm_m, hyb_m = backtest.run_all(model_name=m, user_type="moderate")
    results[m] = {"ml": ml_m, "llm": llm_m, "hybrid": hyb_m}
    print("ML-only  NDCG:", ml_m["ndcg"], "MAP:", ml_m["map"], "Avg PnL:", ml_m["avg_pnl"])
    print("LLM-only NDCG:", llm_m["ndcg"], "MAP:", llm_m["map"], "Avg PnL:", llm_m["avg_pnl"],
          "LLM errors:", llm_m.get("llm_errors", 0))
    print("Hybrid   NDCG:", hyb_m["ndcg"], "MAP:", hyb_m["map"], "Avg PnL:", hyb_m["avg_pnl"],
          "Serendipity:", hyb_m["serendipity"], "LLM errors:", hyb_m.get("llm_errors", 0))


In [4]:
import sys
from pathlib import Path

# Project root = parent of notebooks/
root = Path("..").resolve()
sys.path.append(str(root / "src"))


In [5]:
from backtest import run_all

results = run_all(
    model_name="gemma3:4b",   # or whatever Ollama model you use
    user_type="moderate",     # "conservative", "moderate", "aggressive"
    max_eval_rows=200,        # smaller at first to test speed
    llm_max_workers=4,        # parallel LLM calls
)


ImportError: attempted relative import with no known parent package

In [9]:
C:\Users\user\Desktop\recsys_crypto

SyntaxError: unexpected character after line continuation character (1666828745.py, line 1)

In [1]:
import sys
from pathlib import Path

# REAL project root (one level ABOVE notebooks)
project_root = Path(r"C:\Users\user\Desktop\recsys_crypto").resolve()

if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

print("Project root:", project_root)
print("In sys.path:", str(project_root) in sys.path)


Project root: C:\Users\user\Desktop\recsys_crypto
In sys.path: True


In [2]:
import pandas as pd

from src.ml_models import load_labeled_data
import src.ollama_client as oc
from src.user_model import UserProfile
from src.hybrid_engine import compute_final_scores, rank_actions
from src.backtest import heuristic_pattern_support


In [3]:
df = load_labeled_data()

# Take first 3 rows as examples
sample = df.sample(3, random_state=0)

user = UserProfile.from_type("moderate")

for _, row in sample.iterrows():
    row_dict = row.to_dict()

    # Call LLM once
    out = oc.ask_ollama("gemma3:4b", row_dict)
    llm_scores = out["action_scores"]

    # For demo, we ignore ML and use only LLM + pattern
    pattern_support = heuristic_pattern_support(row)

    final_scores = compute_final_scores(
        ml_conf={"Buy": 0.0, "Hold": 0.0, "Sell": 0.0},
        llm_score=llm_scores,
        pattern_support=pattern_support,
        alpha=0.0,
        beta=1.0,
        gamma=0.3,
        user=user,
    )
    ranking = rank_actions(final_scores)

    print("Asset:", row["asset"], "Time:", row["timestamp"])
    print("True action:", row["true_action"])
    print("LLM raw:", llm_scores)
    print("Pattern support:", pattern_support)
    print("Final ranking:", ranking)
    print("-" * 40)


Asset: XRPUSDT Time: 2025-11-30 22:00:00
True action: Hold
LLM raw: {'Buy': 0.4, 'Hold': 0.5, 'Sell': 0.1}
Pattern support: {'Buy': 0.2, 'Hold': 0.4, 'Sell': 0.4}
Final ranking: [('Hold', 0.4769230769230769), ('Buy', 0.35384615384615387), ('Sell', 0.1692307692307692)]
----------------------------------------
Asset: AVAXUSDT Time: 2025-11-26 00:00:00
True action: Buy
LLM raw: {'Buy': 0.2, 'Hold': 0.7, 'Sell': 0.1}
Pattern support: {'Buy': 0.3333333333333333, 'Hold': 0.16666666666666666, 'Sell': 0.5}
Final ranking: [('Hold', 0.5769230769230769), ('Buy', 0.23076923076923075), ('Sell', 0.1923076923076923)]
----------------------------------------
Asset: ETHUSDT Time: 2025-11-21 17:00:00
True action: Buy
LLM raw: {'Buy': 0.4, 'Hold': 0.4, 'Sell': 0.2}
Pattern support: {'Buy': 0.2, 'Hold': 0.4, 'Sell': 0.4}
Final ranking: [('Hold', 0.4), ('Buy', 0.35384615384615387), ('Sell', 0.24615384615384614)]
----------------------------------------


In [4]:
from src.backtest import run_all

# You can wrap run_all to return the base variants we care about
results_moderate = run_all(
    model_name="gemma3:4b",       # or your LLM model
    user_type="moderate",         # middle risk
    max_eval_rows=300,
    llm_max_workers=4,
)

results_moderate


TypeError: train_classifier() takes 1 positional argument but 2 were given