In [5]:
#!pip install pandas requests beautifulsoup4 seaborn

### Import modules

In [6]:
import re
import os
import csv
import json
import shutil
import pandas
import base64
import requests
import numpy as np
import seaborn as sns
from pathlib import Path
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed

In [None]:
class StratNinja:
    def __init__(self, file_path, file_meta, save):
        self.file_path = file_path
        self.file_meta = file_meta
        self.save = save

        self.publics = []
        self.privates = []
        self.metadata = []
    
    def get_public_strategies(self):
        resp = requests.request('GET', 'https://strat.ninja/strats.php')
        if resp.status_code == 200:
            rows = resp.text.splitlines()

            for row in rows:
                if re.search(r'target="_blank"', row):
                    match = re.search(r'href="overview.php\?strategy=(.*?)"', row)
                    if match:
                        name = match.group(1)
                        if 'private' in row.lower():
                            self.privates.append(name)
                        else:
                            self.publics.append(name)
            
    def get_user_strategies(self):
        pass

    def process_strategies(self, strategies):
        def process_one(strategy):
            print(f"[{len(self.publics)}/{strategies.index(strategy) + 1}] {strategy}")

            resp_info, tags = self.download_strategy_info(strategy)
            resp_code, code = self.download_strategy_code(strategy, self.save) if strategy not in self.privates else ("", 404)

            if not resp_code or not resp_info:
                return None

            scope = 'Public' if strategy in self.publics else 'Private'
            mode = self.get_mode(tags)
            dca = self.get_dca(tags)
            failed = self.get_failed(tags)
            bias = self.get_bias(tags)
            stalled = self.get_stalled(tags)
            leverage = self.get_leverage(tags)
            profit = self.get_profit(resp_info)
            short = self.get_short(resp_code)
            stoploss = self.get_stoploss(resp_code)
            timeframe = self.get_timeframe(resp_code)
            inds_set = self.get_indicators(resp_code)

            row = {
                "strategy": strategy,
                "scope": scope,
                "mode": mode,
                "dca": dca,
                "timeframe": timeframe,
                "failed": failed,
                "bias": bias,
                "stalled": stalled,
                "leverage": leverage,
                "short": short,
                "profit": profit,
                "stoploss": stoploss,
            }
            row.update({ind: 1 for ind in inds_set})
            self.save_data(row)
            return row

        with ThreadPoolExecutor(max_workers=5) as executor:
            futures = [executor.submit(process_one, strategy) for strategy in strategies]
            for future in as_completed(futures):
                future.result()

    def download_strategy_info(self, strategy):
        try:
            resp = requests.request("GET", f"https://strat.ninja/overview.php?strategy={strategy}")

            soup = BeautifulSoup(resp.text, features="html.parser")
            tags = soup.find("div", class_="tags")

            elements = []
            for tag in tags.find_all("a"):
                if not tag.find("img") and not tag.get("onclick"):
                    elements.append(tag.get_text())

            return resp.text, elements
        except Exception as e:
            print(f"EXCEPTION {strategy}: {e}")
            return None, None
    
    def download_strategy_code(self, strategy, save=False):
        try:
            resp = requests.request("GET", f'https://strat.ninja/mirror/{strategy}.py')

            if save:
                file_path = Path(self.file_path) / f"{strategy}.py"
                with open(file_path, "w", encoding="utf-8") as f:
                    f.write(resp.text)

            return resp.text, resp.status_code
        except Exception as e:
            print(f"EXCEPTION {strategy}: {e}")
            return None, None

    def get_mode(self, tags):
        modes = [ "Spot", "Futures"]
        for mode in modes:
            if mode in tags:
                return mode
        return None
    
    def get_dca(self, tags):
        modes = [ "DCA" ]
        for mode in modes:
            if mode in tags:
                return True
        return False

    def get_timeframe(self, tags):
        timeframes = [ "1m", "3m", "5m", "10m", "15m", "30m", "1h", "2h", "4h", "6h", "12h", "1d", "1w", ]
        for timeframe in timeframes:
            if timeframe in tags:
                return timeframe
        return None

    def get_failed(self, tags):
        fails = [ "Failed" ]
        for fail in fails:
            if fail in tags:
                return fail
        return None

    def get_bias(self, tags):
        biases = ["Biased (Lookahead Analysis)", "Bias unchecked", "Unbiased"]
        for bias in biases:
            if bias in tags:
                return bias
        return None

    def get_stalled(self, tags):
        stalleds = [ "Stalled - 90 Percent Negative", "Stalled - Biased", "Stalled - Negative", ]
        for stalled in stalleds:
            if stalled in tags:
                return stalled
        return None
        
    def get_leverage(self, tags):
        leverage = "X"
        if tags[-1].endswith(leverage):
            return int(tags[-1][:-1])
        return None
    
    def get_short(self, resp):
        can_short = re.search(r'can_short\s*=\s*True', str(resp))
        if can_short:
            return True
        return False
    
    def get_stoploss(self, resp):
        match = re.search(r'stoploss\s*=\s*(-?\d+(?:\.\d+)?)', str(resp))
        if match:
            return float(match.group(1))
        return None
    
    def get_indicators(self, resp):
        indicators = []
        matches = re.findall(r'dataframe\[(.*?)\]', str(resp))
        for match in matches:
            parts = match.split(',') if ',' in match else [match]
            for p in parts:
                clean = re.sub(r"[\[\]\'\"\\/\s]", "", p)
                if clean and set("{}").isdisjoint(clean):
                    indicators.append(clean.lower())
        return indicators
    
    def get_profit(self, resp):
        profit = 0
        cum_prof = []
        
        if 'Failed' not in resp:
            soup = BeautifulSoup(resp, features="html.parser")

            table = soup.find('table', id='example')
            if not table: return

            tbody = table.find('tbody')
            if not tbody: return

            rows = tbody.find_all('tr')
            for row in rows:
                columns = row.find_all('th')
                cum_prof.append(float(columns[5].text))

            if cum_prof:
                profit = sum(cum_prof) / len(cum_prof)

        return profit
    

### Data Loading

In [8]:
if not os.path.exists('strategies_metadata.ndjson'):
    sn = StratNinja(
        file_path = "strategies",
        file_meta = "strategies_metadata.ndjson",
        save=True
    )

    sn.get_public_strategies()
    sn.process_strategies(sn.publics)

    df = pandas.DataFrame(sn.metadata)
    df.to_json('strategies_metadata.ndjson', orient="records", lines=True)
else:
    df = pandas.read_json('strategies_metadata.ndjson', lines=True)

df = df[df["bias"] == "Unbiased"]
df = df[df["stalled"].isna()]
df

AttributeError: 'StratNinja' object has no attribute 'processed'

### Filter columns

In [None]:
df_clean = df.replace("", np.nan)
counts = df_clean.notna().sum()
counts_sorted = counts[counts > 10].sort_values(ascending=False)
included_columns = counts_sorted.index.tolist()

print("Columns to include in the analysis:\n")
for column in included_columns:
    print(column)

df = df[included_columns]

In [None]:
exclude_cols = [
    "strategy", "scope", "mode", "timeframe",
    "failed", "bias", "stalled", "leverage",
    "short", "profit"
]
cols_to_fill = [col for col in df.columns if col not in exclude_cols]
df[cols_to_fill] = df[cols_to_fill].fillna(0)

df

### Spot

In [None]:
spots = df[df["mode"] == "Spot"]

spots.sort_values("profit", ascending=False).head(25)

In [None]:
cat_cols = spots.select_dtypes(include=["object", "category"]).columns

dummies = pandas.get_dummies(spots[cat_cols], drop_first=False, dtype=int)

numeric_cols = spots.select_dtypes(include=["number", "bool"])

spots_all_numeric = pandas.concat([numeric_cols, dummies], axis=1)

corr = spots_all_numeric.corr()

sns.heatmap(corr, cmap="coolwarm", vmin=-1, vmax=1)

### Futures

In [None]:
futures = df[df["mode"] == "Futures"]

futures.sort_values("profit", ascending=False).head(25)

In [None]:
cat_cols = futures.select_dtypes(include=["object", "category"]).columns

dummies = pandas.get_dummies(futures[cat_cols], drop_first=False, dtype=int)

numeric_cols = futures.select_dtypes(include=["number", "bool"])

futures_all_numeric = pandas.concat([numeric_cols, dummies], axis=1)

corr = futures_all_numeric.corr()

sns.heatmap(corr, cmap="coolwarm", vmin=-1, vmax=1)