In [None]:
#!pip install pandas requests beautifulsoup4 seaborn

## Strat.Ninja Analysis

This notebook automates the download and analysis of public trading strategies from [Strat.Ninja](https://strat.ninja/). It extracts metadata and code features for each strategy, enabling research, comparison, and feature correlation. The analysis is designed to help you explore which strategy properties and indicators are most relevant to performance.

In [None]:
import re
import os
import csv
import json
import shutil
import pandas
import base64
import requests
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed

In [None]:
class StratNinja:
    def __init__(self, file_path, file_meta, save):
        self.file_path = file_path
        self.file_meta = file_meta
        self.save = save

        self.publics = []
        self.privates = []
        self.metadata = []
        self.processed = set()

        self.load_data()

    def load_data(self):
        self.metadata = []
        self.processed = set()

        if not self.file_meta or not os.path.exists(self.file_meta):
            return

        try:
            with open(self.file_meta, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line:
                        continue
                    try:
                        obj = json.loads(line)
                    except json.JSONDecodeError:
                        print(f"[WARN] Invalid line {self.file_meta}: {line[:80]}...")
                        continue
                    self.metadata.append(obj)
                    strat = obj.get("strategy")

                    if strat:
                        self.processed.add(strat)

        except Exception as e:
            print(f"load_data error: {e}")

    def save_data(self, row: dict):
        with open(self.file_meta, "a", encoding="utf-8") as f:
            f.write(json.dumps(row, ensure_ascii=False, separators=(",", ":")) + "\n")

        self.metadata.append(row)
        
        if row.get("strategy"):
            self.processed.add(row["strategy"])
    
    def get_public_strategies(self):
        resp = requests.request('GET', 'https://strat.ninja/strats.php')
        if resp.status_code == 200:
            rows = resp.text.splitlines()

            for row in rows:
                if re.search(r'target="_blank"', row):
                    match = re.search(r'href="overview.php\?strategy=(.*?)"', row)
                    if match:
                        name = match.group(1)
                        if 'private' in row.lower():
                            self.privates.append(name)
                        else:
                            self.publics.append(name)
            
    def get_user_strategies(self):
        pass

    def process_strategies(self, strategies):
        def process_one(strategy):
            print(f"[{len(self.publics)}/{strategies.index(strategy) + 1}] {strategy}")

            resp_info, tags = self.download_strategy_info(strategy)
            resp_code, code = self.download_strategy_code(strategy, self.save) if strategy not in self.privates else ("", 404)

            if not resp_code or not resp_info:
                return None

            scope = 'Public' if strategy in self.publics else 'Private'
            mode = self.get_mode(tags)
            dca = self.get_dca(tags)
            failed = self.get_failed(tags)
            bias = self.get_bias(tags)
            stalled = self.get_stalled(tags)
            leverage = self.get_leverage(tags)
            profit = self.get_profit(resp_info)
            short = self.get_short(resp_code)
            stoploss = self.get_stoploss(resp_info)
            timeframe = self.get_timeframe(resp_code)
            inds_set = self.get_indicators(resp_code)

            row = {
                "strategy": strategy,
                "scope": scope,
                "mode": mode,
                "dca": dca,
                "timeframe": timeframe,
                "failed": failed,
                "bias": bias,
                "stalled": stalled,
                "leverage": leverage,
                "short": short,
                "profit": profit,
                "stoploss": stoploss,
            }
            row.update({ind: 1 for ind in inds_set})
            self.save_data(row)
            return row

        with ThreadPoolExecutor(max_workers=3) as executor:
            futures = [executor.submit(process_one, strategy) for strategy in strategies]
            for future in as_completed(futures):
                future.result()

    def download_strategy_info(self, strategy):
        try:
            resp = requests.request("GET", f"https://strat.ninja/overview.php?strategy={strategy}")

            soup = BeautifulSoup(resp.text, features="html.parser")
            tags = soup.find("div", class_="tags")

            elements = []
            for tag in tags.find_all("a"):
                if not tag.find("img") and not tag.get("onclick"):
                    elements.append(tag.get_text())

            return resp.text, elements
        except Exception as e:
            print(f"EXCEPTION {strategy}: {e}")
            return None, None
    
    def download_strategy_code(self, strategy, save=False):
        try:
            resp = requests.request("GET", f'https://strat.ninja/mirror/{strategy}.py')

            if save:
                file_path = Path(self.file_path) / f"{strategy}.py"
                with open(file_path, "w", encoding="utf-8") as f:
                    f.write(resp.text)

            return resp.text, resp.status_code
        except Exception as e:
            print(f"EXCEPTION {strategy}: {e}")
            return None, None

    def get_mode(self, tags):
        modes = [ "Spot", "Futures"]
        for mode in modes:
            if mode in tags:
                return mode
        return None
    
    def get_dca(self, tags):
        modes = [ "DCA" ]
        for mode in modes:
            if mode in tags:
                return True
        return False

    def get_timeframe(self, tags):
        timeframes = [ "1m", "3m", "5m", "10m", "15m", "30m", "1h", "2h", "4h", "6h", "12h", "1d", "1w", ]
        for timeframe in timeframes:
            if timeframe in tags:
                return timeframe
        return None

    def get_failed(self, tags):
        fails = [ "Failed" ]
        for fail in fails:
            if fail in tags:
                return fail
        return None

    def get_bias(self, tags):
        biases = ["Biased (Lookahead Analysis)", "Bias unchecked", "Unbiased"]
        for bias in biases:
            if bias in tags:
                return bias
        return None

    def get_stalled(self, tags):
        stalleds = [ "Stalled - 90 Percent Negative", "Stalled - Biased", "Stalled - Negative", ]
        for stalled in stalleds:
            if stalled in tags:
                return stalled
        return None
        
    def get_leverage(self, tags):
        leverage = "X"
        if tags[-1].endswith(leverage):
            return int(tags[-1][:-1])
        return None
    
    def get_short(self, resp):
        can_short = re.search(r'can_short\s*=\s*True', str(resp))
        if can_short:
            return True
        return False
    
    def get_stoploss(self, resp):
        match = re.search(r'<b>Stoploss:</b> -(.*?)<br>', str(resp))
        if match:
            return float(match.group(1))
        return None
    
    def get_indicators(self, resp):
        indicators = []
        matches = re.findall(r'dataframe\[(.*?)\]', str(resp))
        for match in matches:
            parts = match.split(',') if ',' in match else [match]
            for p in parts:
                clean = re.sub(r"[\[\]\'\"\\/\s]", "", p)
                if clean and set("{}").isdisjoint(clean):
                    indicators.append(clean.lower())
        return indicators
    
    def get_profit(self, resp):
        profit = 0
        cum_prof = []
        
        if 'Failed' not in resp:
            soup = BeautifulSoup(resp, features="html.parser")

            table = soup.find('table', id='example')
            if not table: return

            tbody = table.find('tbody')
            if not tbody: return

            rows = tbody.find_all('tr')
            for row in rows:
                columns = row.find_all('th')
                cum_prof.append(float(columns[5].text))

            if cum_prof:
                profit = sum(cum_prof) / len(cum_prof)

        return profit
    

### Data Loading

Load the strategy metadata from disk if available, otherwise download and process all public strategies from Strat.Ninja. The resulting dataframe contains one row per strategy, with columns for metadata, performance, and indicator usage.

In [None]:
if not os.path.exists('strategies_metadata.ndjson'):
    sn = StratNinja(
        file_path = "strategies",
        file_meta = "strategies_metadata.ndjson",
        save=True
    )

    sn.get_public_strategies()
    sn.process_strategies(sn.publics)

    df = pandas.DataFrame(sn.metadata)
    df.to_json('strategies_metadata.ndjson', orient="records", lines=True)
else:
    df = pandas.read_json('strategies_metadata.ndjson', lines=True)
df

### Correlation of the Dataframe

Compute the correlation matrix for all numeric and indicator columns. This helps identify which features (such as indicators or metadata fields) are most strongly related to each other and to the profit metric.

In [None]:
cat_cols = df.select_dtypes(include=["object", "category"]).columns
num_cols = df.select_dtypes(include=["number", "bool"])

dummies = pd.get_dummies(df[cat_cols], drop_first=False, dtype=int)
df_numeric = pd.concat([num_cols, dummies], axis=1)

corr = df_numeric.corr()
corr_filtered = corr.where(np.abs(corr) >= 0.5).dropna(how="all").dropna(axis=1, how="all")

top_corr_with_profit = (corr["profit"].drop("profit").sort_values(key=np.abs, ascending=False))
print(top_corr_with_profit.head(20))

### Correlation with Profit

Display the features most correlated (positively or negatively) with the profit column. This highlights which strategy properties or indicators are most associated with higher or lower performance.

In [None]:
top_corr_with_profit = (
    corr["profit"]
    .drop("profit")
    .sort_values(key=np.abs, ascending=False)
)

print(top_corr_with_profit.head(20))

### Top Feature Pairs

Show the pairs of features with the highest absolute correlation. This can reveal redundancy, dependencies, or interesting relationships between strategy attributes and indicators.

In [None]:
corr_abs = corr.abs()
mask = np.triu(np.ones(corr_abs.shape), k=1).astype(bool)
corr_pairs = corr_abs.where(mask)

top_pairs = (
    corr_pairs.unstack()
    .dropna()
    .sort_values(ascending=False)
)

print(top_pairs.head(20))