## Dataset generation notebook
The following notebook is used to generate a dataset of trades and portofolio results to be analyzed later. 

### Dependencies

In [2]:
!pip install git+https://github.com/trsvchn/calabar.git
!pip3 install backtrader
!pip3 install pyfolio
!pip3 install tqdm
!pip3 install wheel
!pip3 install pandas
!pip3 install dask
!pip3 install graphviz

Collecting graphviz
  Downloading graphviz-0.16-py2.py3-none-any.whl (19 kB)
Installing collected packages: graphviz
Successfully installed graphviz-0.16


### Env variables and imports

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import datetime
import os.path
import sys
import uuid
import copy
import json
import warnings

import pandas as pd
import requests
import tqdm
import backtrader as bt
import pyfolio as pf

warnings.filterwarnings("ignore")  # Avoid some noise



In [2]:
## Locations
# # When running in google drive
# MOUNT_LOCATION = '/content/drive'
# DATA_LOCATION = f'{MOUNT_LOCATION}/MyDrive/licenta'

# When running locally
PROJECT_LOCATION = "/home/narboom23/Projects/licenta"
MOUNT_LOCATION = f"{PROJECT_LOCATION}/mount/"
DATA_LOCATION = f"{MOUNT_LOCATION}"

TICKERS_LOCATION = f"{DATA_LOCATION}/tickers"
RESULTS_LOCATION = f"{DATA_LOCATION}/results"

PARSED_TICKERS_JSON = f"{PROJECT_LOCATION}/parsed_tickers.json"


## Email notifications
MY_EMAIL = "chira.mircea.darius@gmail.com"
NOTIFICATION_RECEIVER_LIST = [MY_EMAIL]


## Finance API
YAHOO_FINANCE_MAX_URL = "https://query1.finance.yahoo.com/v7/finance/download/{TICKER_NAME}?period1=946688400&period2=1609459199&interval=1d&events=history&includeAdjustedClose=true"
# 1609459199 - 2020.12.31 11:59:59 PM GMT
# 946688400 - 2000.01.01 01:00:00 AM GMT
# 883616400 -

## Tickers - uncomment the full json to use only a small sample
# Test sample
TICKER_NAMES = {
    "ATVI": "Activision Blizzard, Inc.",
    "EA": "Electronic Arts Inc.",
    "NTDOY": "Nintento (traded in US)",
    "GME": "GameStop Corp.",
    "GOOG": "Alphabet Inc.",
}

# Full json
with open(f"{PROJECT_LOCATION}/ticker_list.json", "r") as f:
    TICKER_NAMES = json.loads(f.read())


## Ticker list to be parsed
with open(PARSED_TICKERS_JSON, "r") as f:
    PARSED_TICKERS = json.loads(f.read())

ALL_TICKERS = list(TICKER_NAMES.keys())
PARSED_TICKER_NAMES = list(PARSED_TICKERS.keys())
TICKERS = [x for x in ALL_TICKERS if x not in PARSED_TICKER_NAMES]
NEXT_TICKER = TICKERS[0]
print(NEXT_TICKER)  # Useful for debugging stuck cronjobs


## Strategy defaults
DEFAULT_FROM_DATE = datetime.datetime(2000, 1, 1)
DEFAULT_TO_DATE = datetime.datetime(2020, 12, 31)
DEFAULT_CASH = 1000.0
DEFAULT_COMMISION = 0.0
DEFAULT_CPU_COUNT = 4


## Some other configs
KLASS_KEY = "klass"
RESULTS_FILENAME = f"{NEXT_TICKER}"

TXN


### Utilities

In [3]:
def add_entry_to_csv(csv_filename, trade_info_dict):
    csv_path = f"{RESULTS_LOCATION}/{csv_filename}.csv"

    trade_info_dict = {k: [v] for k, v in trade_info_dict.items()}

    trade_df = pd.DataFrame.from_dict(trade_info_dict)
    trade_df.to_csv(csv_path, mode="a", index=False, header=False)

In [4]:
def get_ticker_csv_path(ticker_name):
    return f"{TICKERS_LOCATION}/{ticker_name}.csv"


def get_ticker_csv_as_df(ticker_name):
    return pd.read_csv(get_ticker_csv_path(ticker_name))

In [5]:
def download_latest_ticker_csv(ticker_name):
    yahoo_url = YAHOO_FINANCE_MAX_URL.format(TICKER_NAME=ticker_name)
    file_respone = requests.get(yahoo_url)

    csv_path = get_ticker_csv_path(ticker_name)

    with open(csv_path, "wb") as f:
        f.write(file_respone.content)


def update_all_ticker_csvs():
    with tqdm.tqdm(total=len(TICKERS)) as pbar:
        for ticker_name in TICKERS:
            download_latest_ticker_csv(ticker_name)
            pbar.update(1)


# update_all_ticker_csvs()
# download_latest_ticker_csv('GOOG')

In [6]:
def bt_opt_callback(cb):
    pbar.update()


def test_strategy(
    strategy_class,
    ticker_list,
    from_date=DEFAULT_FROM_DATE,
    to_date=DEFAULT_TO_DATE,
    cash=DEFAULT_CASH,
    commision=DEFAULT_COMMISION,
    cpu_count=DEFAULT_CPU_COUNT,
    **strategy_kwargs,
):

    # Create a cerebro entity
    cerebro = bt.Cerebro(optdatas=False)
    kwargs = {**strategy_kwargs}

    # Add a strategy
    strats = cerebro.optstrategy(strategy_class, **kwargs)

    datalist = [
        (get_ticker_csv_path(ticker_name), ticker_name) for ticker_name in ticker_list
    ]

    for i in range(len(ticker_list)):
        # Create a Data Feed
        data = bt.feeds.YahooFinanceCSVData(
            dataname=datalist[i][0],
            name=datalist[i][1],
            # Do not pass values before this date
            fromdate=from_date,
            # Do not pass values before this date
            todate=to_date,
            # Do not pass values after this date
            reverse=False,
        )

        # Add the Data Feed to Cerebro
        cerebro.adddata(data)

    # Set our desired cash start
    cerebro.broker.setcash(cash)

    # Add pyfolio analyzer for stats
    #     cerebro.addanalyzer(bt.analyzers.PyFolio, _name='pyfolio')

    # Add a FixedSize sizer according to the stake
    cerebro.addsizer(bt.sizers.FixedSize, stake=10)

    # Set the commission
    cerebro.broker.setcommission(commission=commision)

    cerebro.optcallback(cb=bt_opt_callback)

    # Run over everything
    cerebro.run(maxcpus=cpu_count)

In [7]:
import time


def run_through_tickers(strategy_class, ticker_list, *args, **kwargs):
    valid_tickers = []
    for ticker_name in ticker_list:
        # Validate ticker
        with open(f"{TICKERS_LOCATION}/{ticker_name}.csv", "r") as f:
            if len(f.readlines()) > 550:
                valid_tickers.append(ticker_name)

    test_strategy(strategy_class, valid_tickers, *args, **kwargs)


def run_backtest_for_strategy_by_name(strategy_name, ticker_list=TICKERS):
    print(
        f"Running `run_backtest_for_strategy_by_name` for {strategy_name} for {len(ticker_list)} tickers"
    )
    strategy_setup = copy.deepcopy(STRATEGIES_ALL_CONFIGS)[strategy_name]
    strategy_klass = strategy_setup.pop(KLASS_KEY)
    return run_backtest_for_strategy(strategy_klass, ticker_list, strategy_setup)


def list_configs(configs):
    """Translate a configuration with ranges into a list of dicts with value pairs"""
    config_dict_list = [{"_dummy_param": 1}]

    for name, value in configs.items():
        if type(value) == range:
            new_config_dict_list = []
            for v in value:
                for cd in config_dict_list:
                    new_config_dict_list.append({name: v, **cd})
            config_dict_list = new_config_dict_list
        else:
            new_config_dict_list = []
            for cd in config_dict_list:
                new_config_dict_list.append({name: value, **cd})
            config_dict_list = new_config_dict_list

    for x in config_dict_list:
        x.pop("_dummy_param")

    return config_dict_list


def run_backtest_for_strategy(strategy_klass, ticker_list, configs):
    config_combination_list = list_configs(copy.deepcopy(configs))

    total_nr_jobs = len(ticker_list) * len(config_combination_list)

    run_through_tickers(strategy_klass, ticker_list, **configs)


def run_backtest_for_strategy_for_all_tickers(strategy_klass, configs):
    return run_backtest_for_strategy(strategy_klass, TICKERS, configs)

### Strategy
The main strategy class used

In [8]:
class RaynerTeoStrategy(bt.Strategy):
    """
    Rayner Teo Strategy with some additional logging

    Market:
      any stock

    Define the trend:
      (closing?) price above the 200-day moving average

    Entry:
      10-period RSI below 30 (buy on the next day's open)

    Exit:
      10-period RSI above 40, or after 10 trading days (sell on the next day's open)
    """

    params = (
        # TODO: these should be in a defaults class or something maybe
        # SMA
        ("maperiod", 15),
        # RSI
        ("rsi_open_period", 10),
        ("rsi_close_period", 30),
        # ADX
        ("adx_period", 14),
        # PPO
        ("ppo_period_short", 12),
        ("ppo_period_long", 26),
        # Stochastic
        ("stochastic_period", 14),
        # Other
        ("days_ago_close_period", 10),
        ("printlog", False),
        ("ticker", "GME"),
    )

    def log(self, txt, dt=None, doprint=False):
        """Logging function for this strategy"""
        if self.params.printlog or doprint:
            dt = dt or self.datas[0].datetime.date(0)
            print("%s, %s" % (dt.isoformat(), txt))

    def __init__(self):
        self.inds = dict()
        for i, d in enumerate(self.datas):
            self.inds[d] = dict()

            self.inds[d]["sma"] = bt.indicators.SimpleMovingAverage(
                d.close, period=self.params.maperiod
            )
            self.inds[d]["rsi"] = bt.indicators.RSI(
                d.close, period=self.params.rsi_open_period, safediv=True
            )

            self.inds[d]["adx8"] = bt.indicators.ADX(d, period=8)
            self.inds[d]["adx16"] = bt.indicators.ADX(d, period=16)
            self.inds[d]["adx32"] = bt.indicators.ADX(d, period=32)
            self.inds[d]["adx64"] = bt.indicators.ADX(d, period=64)
            self.inds[d]["adx128"] = bt.indicators.ADX(d, period=128)
            self.inds[d]["adx256"] = bt.indicators.ADX(d, period=256)
            self.inds[d]["adx512"] = bt.indicators.ADX(d, period=512)

            self.inds[d]["pdi8"] = bt.indicators.PlusDirectionalIndicator(d, period=8)
            self.inds[d]["pdi16"] = bt.indicators.PlusDirectionalIndicator(d, period=16)
            self.inds[d]["pdi32"] = bt.indicators.PlusDirectionalIndicator(d, period=32)
            self.inds[d]["pdi64"] = bt.indicators.PlusDirectionalIndicator(d, period=64)
            self.inds[d]["pdi128"] = bt.indicators.PlusDirectionalIndicator(
                d, period=128
            )
            self.inds[d]["pdi256"] = bt.indicators.PlusDirectionalIndicator(
                d, period=256
            )
            self.inds[d]["pdi512"] = bt.indicators.PlusDirectionalIndicator(
                d, period=512
            )

            self.inds[d]["mdi8"] = bt.indicators.MinusDirectionalIndicator(d, period=8)
            self.inds[d]["mdi16"] = bt.indicators.MinusDirectionalIndicator(
                d, period=16
            )
            self.inds[d]["mdi32"] = bt.indicators.MinusDirectionalIndicator(
                d, period=32
            )
            self.inds[d]["mdi64"] = bt.indicators.MinusDirectionalIndicator(
                d, period=64
            )
            self.inds[d]["mdi128"] = bt.indicators.MinusDirectionalIndicator(
                d, period=128
            )
            self.inds[d]["mdi256"] = bt.indicators.MinusDirectionalIndicator(
                d, period=256
            )
            self.inds[d]["mdi512"] = bt.indicators.MinusDirectionalIndicator(
                d, period=512
            )

            self.inds[d]["ppo8"] = bt.indicators.PPO(
                d.close, period1=8, period2=self.params.maperiod
            )  # , period_signal=?)
            self.inds[d]["ppo16"] = bt.indicators.PPO(
                d.close, period1=16, period2=self.params.maperiod
            )  # , period_signal=?)
            self.inds[d]["ppo32"] = bt.indicators.PPO(
                d.close, period1=32, period2=self.params.maperiod
            )  # , period_signal=?)
            self.inds[d]["ppo64"] = bt.indicators.PPO(
                d.close, period1=64, period2=self.params.maperiod
            )  # , period_signal=?)
            self.inds[d]["ppo128"] = bt.indicators.PPO(
                d.close, period1=128, period2=self.params.maperiod
            )  # , period_signal=?)
            self.inds[d]["ppo256"] = bt.indicators.PPO(
                d.close, period1=256, period2=self.params.maperiod
            )  # , period_signal=?)
            self.inds[d]["ppo512"] = bt.indicators.PPO(
                d.close, period1=512, period2=self.params.maperiod
            )  # , period_signal=?)

            self.inds[d]["stochastic8"] = bt.indicators.Stochastic(
                d, period=8, safediv=True
            )
            self.inds[d]["stochastic16"] = bt.indicators.Stochastic(
                d, period=16, safediv=True
            )
            self.inds[d]["stochastic32"] = bt.indicators.Stochastic(
                d, period=32, safediv=True
            )
            self.inds[d]["stochastic64"] = bt.indicators.Stochastic(
                d, period=64, safediv=True
            )
            self.inds[d]["stochastic128"] = bt.indicators.Stochastic(
                d, period=128, safediv=True
            )
            self.inds[d]["stochastic256"] = bt.indicators.Stochastic(
                d, period=256, safediv=True
            )
            self.inds[d]["stochastic512"] = bt.indicators.Stochastic(
                d, period=512, safediv=True
            )
            # TODO: There are other parameters here, add them all or as needed!

            self.inds[d]["order_placed_days_ago"] = 0

        self.csv_filename = RESULTS_FILENAME

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return

        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log(
                    "BUY EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f"
                    % (order.executed.price, order.executed.value, order.executed.comm)
                )

                self.buyprice = order.executed.price
                self.buycomm = order.executed.comm
            else:  # Sell
                self.log(
                    "SELL EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f"
                    % (order.executed.price, order.executed.value, order.executed.comm)
                )

            self.bar_executed = len(self)

        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log("Order Canceled/Margin/Rejected")

        # Write down: no pending order
        self.order = None

    def notify_trade(self, trade):
        if not trade.isclosed:
            return

        self.log("OPERATION PROFIT, GROSS %.2f, NET %.2f" % (trade.pnl, trade.pnlcomm))

    def next(self):
        for i, d in enumerate(self.datas):
            dt, dn = self.datetime.date(), d._name
            pos = self.getposition(d).size

            if not pos:  # no market / no orders
                if d.close[0] > self.inds[d]["sma"][0] and self.inds[d]["rsi"][0] < 30:
                    self.inds[d]["trade_info_dict"] = {
                        "uid": str(uuid.uuid1()),
                        "ticker": dn,
                        "date": self.data.datetime.date(),
                        "price_open": d.close[0],
                        "maperiod": self.params.maperiod,
                        "rsi_open_period": self.params.rsi_open_period,
                        "adx8": self.inds[d]["adx8"][0],
                        "adx16": self.inds[d]["adx16"][0],
                        "adx32": self.inds[d]["adx32"][0],
                        "adx64": self.inds[d]["adx64"][0],
                        "adx128": self.inds[d]["adx128"][0],
                        "adx256": self.inds[d]["adx256"][0],
                        "adx512": self.inds[d]["adx512"][0],
                        "mdi8": self.inds[d]["mdi8"][0],
                        "mdi16": self.inds[d]["mdi16"][0],
                        "mdi32": self.inds[d]["mdi32"][0],
                        "mdi64": self.inds[d]["mdi64"][0],
                        "mdi128": self.inds[d]["mdi128"][0],
                        "mdi256": self.inds[d]["mdi256"][0],
                        "mdi512": self.inds[d]["mdi512"][0],
                        "pdi8": self.inds[d]["pdi8"][0],
                        "pdi16": self.inds[d]["pdi16"][0],
                        "pdi32": self.inds[d]["pdi32"][0],
                        "pdi64": self.inds[d]["pdi64"][0],
                        "pdi128": self.inds[d]["pdi128"][0],
                        "pdi256": self.inds[d]["pdi256"][0],
                        "pdi512": self.inds[d]["pdi512"][0],
                        "ppo8": self.inds[d]["ppo8"][0],
                        "ppo16": self.inds[d]["ppo16"][0],
                        "ppo32": self.inds[d]["ppo32"][0],
                        "ppo64": self.inds[d]["ppo64"][0],
                        "ppo128": self.inds[d]["ppo128"][0],
                        "ppo256": self.inds[d]["ppo256"][0],
                        "ppo512": self.inds[d]["ppo512"][0],
                        "stochastic8": self.inds[d]["stochastic8"][0],
                        "stochastic16": self.inds[d]["stochastic16"][0],
                        "stochastic32": self.inds[d]["stochastic32"][0],
                        "stochastic64": self.inds[d]["stochastic64"][0],
                        "stochastic128": self.inds[d]["stochastic128"][0],
                        "stochastic256": self.inds[d]["stochastic256"][0],
                        "stochastic512": self.inds[d]["stochastic512"][0],
                    }

                    # CONSIDERATION: the 'checked' price vs the 'order accepted at' price

                    self.buy(data=d)
            else:
                #             if self.rsi[0] > self.params.rsi_close_period or self.order_placed_days_ago == self.params.days_ago_close_period:
                #                 if self.inds[d]['rsi'][0] > self.params.rsi_close_period or self.inds[d]['order_placed_days_ago'] == self.params.days_ago_close_period:
                if (
                    self.inds[d]["order_placed_days_ago"]
                    == self.params.days_ago_close_period
                ):
                    self.sell(data=d)

                    # Move this one level down and comment the rsi condition to get better performance!
                    # Or keep it in the if and keep the rsi to test the actual algorithm
                    self.inds[d]["trade_info_dict"]["price_sell"] = d.close[0]
                    self.inds[d]["trade_info_dict"][
                        "days_ago_close_period"
                    ] = self.inds[d]["order_placed_days_ago"]
                    self.inds[d]["trade_info_dict"]["rsi_close_period"] = self.inds[d][
                        "rsi"
                    ][0]
                    add_entry_to_csv(
                        csv_filename=self.csv_filename,
                        trade_info_dict=copy.deepcopy(self.inds[d]["trade_info_dict"]),
                    )

                    self.inds[d]["order_placed_days_ago"] = 0
                else:
                    # Move this one level down and comment the rsi condition to get better performance!
                    # Or keep it in the if and keep the rsi to test the actual algorithm
                    self.inds[d]["trade_info_dict"]["price_sell"] = d.close[0]
                    self.inds[d]["trade_info_dict"][
                        "days_ago_close_period"
                    ] = self.inds[d]["order_placed_days_ago"]
                    self.inds[d]["trade_info_dict"]["rsi_close_period"] = self.inds[d][
                        "rsi"
                    ][0]
                    add_entry_to_csv(
                        csv_filename=self.csv_filename,
                        trade_info_dict=copy.deepcopy(self.inds[d]["trade_info_dict"]),
                    )

                    self.inds[d]["order_placed_days_ago"] += 1

    def stop(self):
        self.log(
            f"(MA Period {self.params.maperiod}, "
            f"RSI open {self.params.rsi_open_period}, "
            f"RSI close {self.params.rsi_close_period}, "
            f"Close after {self.params.days_ago_close_period} days) "
            f"Ending Value {self.broker.getvalue()}",
            doprint=True,
        )

### Strategy run configs
Here are the configurations for particular strategy runs and the actual setup to run them

In [9]:
STRATEGIES_ALL_CONFIGS = {
    "Rayner Teo High Winrate": {
        KLASS_KEY: RaynerTeoStrategy,
        # Optimization
        "maperiod": range(180, 300, 20),
        "rsi_open_period": range(6, 16, 2),
        #         'rsi_close_period': range(26, 36),
        "days_ago_close_period": 20
        #         # Dummy run
        #         'maperiod': 256,
        #         'rsi_open_period': 8,
        #         'rsi_close_period': 32,
        #         'days_ago_close_period': 16
    },
}

In [10]:
from tqdm.auto import tqdm

strategy_name = "Rayner Teo High Winrate"

cfg_cpy = copy.deepcopy(STRATEGIES_ALL_CONFIGS[strategy_name])
cfg_cpy.pop(KLASS_KEY)
cfg_list = list_configs(cfg_cpy)
expected_number_of_tests = len(cfg_list)

pbar = tqdm(
    desc="Running backtests",
    leave=True,
    position=1,
    unit="run",
    colour="violet",
    total=expected_number_of_tests,
)
resulting_GME_df = run_backtest_for_strategy_by_name(
    "Rayner Teo High Winrate", ticker_list=[NEXT_TICKER]
)

PARSED_TICKERS[NEXT_TICKER] = TICKER_NAMES[NEXT_TICKER]

with open(PARSED_TICKERS_JSON, "w") as f:
    f.write(json.dumps(PARSED_TICKERS))

Running backtests:   0%|          | 0/30 [00:00<?, ?run/s]

Running `run_backtest_for_strategy_by_name` for Rayner Teo High Winrate for 1 tickers
{'maperiod': range(180, 300, 20), 'rsi_open_period': range(6, 16, 2), 'days_ago_close_period': 20}


### Filesystem healthcheck commands
A couple of commands to inspect the health of the data

In [35]:
# !ls mount/results

In [36]:
# !cat mount/results/result_123.csv | wc -l

In [37]:
# !shuf -n 10 mount/results/main.csv

In [38]:
# !rm mount/results/efficient_results.csv