In [None]:
#!pip install pandas requests beautifulsoup4 seaborn

## Strat.Ninja Analysis

This notebook automates the download and analysis of public trading strategies from [Strat.Ninja](https://strat.ninja/). It extracts metadata and code features for each strategy, enabling research, comparison, and feature correlation. The analysis is designed to help you explore which strategy properties and indicators are most relevant to performance.

In [None]:
import re
import os
import csv
import json
import shutil
import pandas
import base64
import requests
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed

In [None]:

class StratNinja:
    def __init__(self, file_path, file_meta, save):
        self.file_path = file_path
        self.file_meta = file_meta
        self.save = save

        self.publics = []
        self.privates = []
        self.metadata = []
        self.processed = set()
        self.new_publics = []

        self.load_data()

    def load_data(self):
        self.metadata = []
        self.processed = set()

        if not self.file_meta or not os.path.exists(self.file_meta):
            return

        try:
            with open(self.file_meta, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line:
                        continue
                    try:
                        obj = json.loads(line)
                    except json.JSONDecodeError:
                        print(f"[WARN] Invalid line {self.file_meta}: {line[:80]}...")
                        continue
                    self.metadata.append(obj)
                    strat = obj.get("strategy")
                    if strat:
                        self.processed.add(strat)
        except Exception as e:
            print(f"load_data error: {e}")

    def save_data(self, row: dict):
        """Append one line to NDJSON and update in-memory indexes."""
        with open(self.file_meta, "a", encoding="utf-8") as f:
            f.write(json.dumps(row, ensure_ascii=False, separators=(",", ":")) + "\n")
        self.metadata.append(row)
        if row.get("strategy"):
            self.processed.add(row["strategy"])

    def get_public_strategies(self):
        resp = requests.request('GET', 'https://strat.ninja/strats.php')
        if resp.status_code == 200:
            rows = resp.text.splitlines()
            for row in rows:
                if 'target="_blank"' in row:
                    match = re.search(r'href="overview\.php\?strategy=(.*?)"', row)
                    if match:
                        name = match.group(1)
                        if 'private' in row.lower():
                            self.privates.append(name)
                        else:
                            self.publics.append(name)

        self.new_publics = [s for s in self.publics if s not in self.processed]

    def get_user_strategies(self):
        pass

    def process_strategies(self, strategies):
        def process_one(strategy, idx, total):
            if strategy in self.processed:
                return None

            print(f"[{idx}/{total}] {strategy}")

            resp_info, tags = self.download_strategy_info(strategy)
            resp_code, code_status = self.download_strategy_code(strategy, self.save) if strategy not in self.privates else ("", 404)

            if not resp_info or (code_status and code_status == 404 and strategy not in self.privates):
                return None

            scope = 'Public' if strategy in self.publics else 'Private'
            mode = self.get_mode(tags)
            dca = self.get_dca(tags)
            failed = self.get_failed(tags)
            bias = self.get_bias(tags)
            stalled = self.get_stalled(tags)
            leverage = self.get_leverage(tags)
            profit = self.get_profit(resp_info)
            short = self.get_short(resp_code)
            stoploss = self.get_stoploss(resp_info)
            timeframe = self.get_timeframe(tags)
            inds_set = self.get_indicators(resp_code)

            row = {
                "strategy": strategy,
                "scope": scope,
                "mode": mode,
                "dca": dca,
                "timeframe": timeframe,
                "failed": failed,
                "bias": bias,
                "stalled": stalled,
                "leverage": leverage,
                "short": short,
                "profit": profit,
                "stoploss": stoploss,
            }
            row.update({ind: 1 for ind in inds_set})
            self.save_data(row)
            return row

        total = len(strategies)
        with ThreadPoolExecutor(max_workers=3) as executor:
            futures = [executor.submit(process_one, s, i + 1, total) for i, s in enumerate(strategies)]
            for future in as_completed(futures):
                future.result()

    def download_strategy_info(self, strategy):
        try:
            resp = requests.request("GET", f"https://strat.ninja/overview.php?strategy={strategy}")
            soup = BeautifulSoup(resp.text, features="html.parser")
            tags_div = soup.find("div", class_="tags")
            elements = []
            if tags_div:
                for tag in tags_div.find_all("a"):
                    if not tag.find("img") and not tag.get("onclick"):
                        elements.append(tag.get_text())
            return resp.text, elements
        except Exception as e:
            print(f"EXCEPTION {strategy}: {e}")
            return None, None

    def download_strategy_code(self, strategy, save=False):
        try:
            resp = requests.request("GET", f'https://strat.ninja/mirror/{strategy}.py')
            if save and resp.status_code == 200:
                file_path = Path(self.file_path) / f"{strategy}.py"
                file_path.parent.mkdir(parents=True, exist_ok=True)
                with open(file_path, "w", encoding="utf-8") as f:
                    f.write(resp.text)
            return resp.text, resp.status_code
        except Exception as e:
            print(f"EXCEPTION {strategy}: {e}")
            return None, None

    def get_mode(self, tags):
        if not tags: return None
        for mode in ["Spot", "Futures"]:
            if mode in tags:
                return mode
        return None

    def get_dca(self, tags):
        return bool(tags and "DCA" in tags)

    def get_timeframe(self, tags):
        if not tags: return None
        for tf in ["1m","3m","5m","10m","15m","30m","1h","2h","4h","6h","12h","1d","1w"]:
            if tf in tags:
                return tf
        return None

    def get_failed(self, tags):
        if not tags: return None
        return "Failed" if "Failed" in tags else None

    def get_bias(self, tags):
        if not tags: return None
        for bias in ["Biased (Lookahead Analysis)", "Bias unchecked", "Unbiased"]:
            if bias in tags:
                return bias
        return None

    def get_stalled(self, tags):
        if not tags: return None
        for s in ["Stalled - 90 Percent Negative","Stalled - Biased","Stalled - Negative"]:
            if s in tags:
                return s
        return None

    def get_leverage(self, tags):
        if not tags: return None
        last = tags[-1] if tags else ""
        if isinstance(last, str) and last.endswith("X"):
            try:
                return int(last[:-1])
            except ValueError:
                return None
        return None

    def get_short(self, code_text):
        if not code_text:
            return False
        return bool(re.search(r'can_short\s*=\s*True', str(code_text)))

    def get_stoploss(self, html_text):
        if not html_text:
            return None
        m = re.search(r'<b>Stoploss:</b> -(\d+\.\d+)<br>', str(html_text))
        return float(m.group(1)) if m else None

    def get_indicators(self, code_text):
        if not code_text:
            return []
        indicators = []
        for match in re.findall(r'dataframe\[(.*?)\]', str(code_text)):
            parts = match.split(',') if ',' in match else [match]
            for p in parts:
                clean = re.sub(r"[\[\]\'\"\\/\s]", "", p)
                if clean and set("{}").isdisjoint(clean):
                    indicators.append(clean.lower())
        return indicators

    def get_profit(self, html_text):
        if not html_text or 'Failed' in html_text:
            return 0
        soup = BeautifulSoup(html_text, features="html.parser")
        table = soup.find('table', id='example')
        if not table: return 0
        tbody = table.find('tbody')
        if not tbody: return 0
        cum_prof = []
        for row in tbody.find_all('tr'):
            cols = row.find_all('th')
            if len(cols) >= 6:
                try:
                    cum_prof.append(float(cols[5].text))
                except ValueError:
                    pass
        return (sum(cum_prof) / len(cum_prof)) if cum_prof else 0


### Load data and fetch new public strategies

Initialize a StratNinja instance, which loads previously processed strategies from strategies_metadata.ndjson. Then fetch the list of public strategies.

In [None]:
sn = StratNinja(
    file_path="strategies",
    file_meta="strategies_metadata.ndjson",
    save=True
)

sn.get_public_strategies()
to_process = sn.new_publics

### Process only new strategies

If there are new public strategies found, process them (download info, code, extract tags and indicators) and append them to the NDJSON file. If there are no new ones, skip this step.

In [None]:
if to_process:
    print(f"Found {len(to_process)} new public strategies to process.")
    sn.process_strategies(to_process)
else:
    print("No new public strategies found.")

### Load, clean, and filter the dataset

Read the updated NDJSON file into a DataFrame, remove duplicates, and apply filters.

In [None]:
df = pandas.read_json(sn.file_meta, lines=True)
df = df.drop_duplicates(subset=["strategy"], keep="last")

df = df[df['failed'].isnull()]
df = df[df['stalled'].isnull()]
df = df[df['bias'].isin(['Unbiased', 'Bias Unchecked'])]
df = df.dropna(axis=1, how='all')

df

### Correlation analysis with profit

One-hot encode all categorical columns and compute a full correlation matrix. Then we filter correlations to keep only those with an absolute value ≥ 0.5 and display the top 20 features most correlated with profit.

In [None]:
cat_cols = df.select_dtypes(include=["object", "category"]).columns
num_cols = df.select_dtypes(include=["number", "bool"])

dummies = pd.get_dummies(df[cat_cols], drop_first=False, dtype=int)
df_numeric = pd.concat([num_cols, dummies], axis=1)

corr = df_numeric.corr()
corr_filtered = corr.where(corr >= 0.5).dropna(how="all").dropna(axis=1, how="all")

top_corr_with_profit = (corr["profit"].drop("profit").sort_values(key=np.abs, ascending=False))

print(top_corr_with_profit.head(20))

### Top correlations with profit

Sort and display the 20 strongest correlations (positive or negative) between all features and the profit column.

In [None]:
top_corr_with_profit = (
    corr["profit"]
    .drop("profit")
    .sort_values(key=np.abs, ascending=False)
)

print(top_corr_with_profit.head(20))

### Top Feature Pairs

Show the pairs of features with the highest absolute correlation. This can reveal redundancy, dependencies, or interesting relationships between strategy attributes and indicators.

In [None]:
corr_abs = corr.abs()
mask = np.triu(np.ones(corr_abs.shape), k=1).astype(bool)
corr_pairs = corr_abs.where(mask)

top_pairs = (
    corr_pairs.unstack()
    .dropna()
    .sort_values(ascending=False)
)

print(top_pairs.head(20))