## Dataset generation notebook
The following notebook is used to generate a dataset of trades and portofolio results to be analyzed later. 

### Dependencies

In [2]:
!pip install git+https://github.com/trsvchn/calabar.git
!pip3 install backtrader
!pip3 install pyfolio
!pip3 install tqdm
!pip3 install wheel
!pip3 install pandas
!pip3 install dask
!pip3 install graphviz

Collecting graphviz
  Downloading graphviz-0.16-py2.py3-none-any.whl (19 kB)
Installing collected packages: graphviz
Successfully installed graphviz-0.16


### Env variables and imports

In [42]:
from __future__ import absolute_import, division, print_function, unicode_literals

import datetime
import os.path
import sys
import uuid
import copy
import json
import warnings

import pandas as pd
import requests
import tqdm
import backtrader as bt
import pyfolio as pf

warnings.filterwarnings("ignore")  # Avoid some noise

In [43]:
PROJECT_LOCATION = "/home/narboom23/Projects/licenta"
TICKERS_LOCATION = f"{PROJECT_LOCATION}/data/weekly_tickers"
RESULTS_LOCATION = f"{PROJECT_LOCATION}/dataset/weekly/results"

PARSED_TICKERS_JSON = f"{PROJECT_LOCATION}/dataset/weekly/parsed_tickers.json"
TICKERS_JSON = f"{PROJECT_LOCATION}/dataset/weekly/ticker_list.json"

## Tickers - uncomment the full json to use only a small sample
# Test sample
TICKER_NAMES = {
    "ATVI": "Activision Blizzard, Inc.",
    "EA": "Electronic Arts Inc.",
    "NTDOY": "Nintento (traded in US)",
    "GME": "GameStop Corp.",
    "GOOG": "Alphabet Inc.",
}

# Full json
with open(TICKERS_JSON, "r") as f:
    TICKER_NAMES = json.loads(f.read())

    
## Ticker list to be parsed    
with open(PARSED_TICKERS_JSON, "r") as f:
    PARSED_TICKERS = json.loads(f.read())

REMOVED_TICKERS = ['BF.B', 'VIAC', 'WRK']
    
ALL_TICKERS = list(TICKER_NAMES.keys())
PARSED_TICKER_NAMES = list(PARSED_TICKERS.keys())
TICKERS = [x for x in ALL_TICKERS if x not in PARSED_TICKER_NAMES and x not in REMOVED_TICKERS]
NEXT_TICKER = TICKERS[0]
print(NEXT_TICKER)  # Useful for debugging stuck cronjobs


## Strategy defaults
DEFAULT_FROM_DATE = datetime.datetime(1998, 1, 1)
DEFAULT_TO_DATE = datetime.datetime(2020, 12, 31)
DEFAULT_CASH = 1000.0
DEFAULT_COMMISION = 0.0
DEFAULT_CPU_COUNT = 4


## Some other configs
KLASS_KEY = "klass"
RESULTS_FILENAME = f"{NEXT_TICKER}"

AAPL


### Utilities

In [44]:
def add_entry_to_csv(csv_filename, trade_info_dict):
    csv_path = f"{RESULTS_LOCATION}/{csv_filename}.csv"

    trade_info_dict = {k: [v] for k, v in trade_info_dict.items()}

    trade_df = pd.DataFrame.from_dict(trade_info_dict)
    trade_df.to_csv(csv_path, mode="a", index=False, header=False)

In [45]:
def get_ticker_csv_path(ticker_name):
    return f"{TICKERS_LOCATION}/{ticker_name}.csv"


def get_ticker_csv_as_df(ticker_name):
    return pd.read_csv(get_ticker_csv_path(ticker_name))

In [46]:
def bt_opt_callback(cb):
    pbar.update()


def test_strategy(
    strategy_class,
    ticker_list,
    from_date=DEFAULT_FROM_DATE,
    to_date=DEFAULT_TO_DATE,
    cash=DEFAULT_CASH,
    commision=DEFAULT_COMMISION,
    cpu_count=DEFAULT_CPU_COUNT,
    **strategy_kwargs,
):

    # Create a cerebro entity
    cerebro = bt.Cerebro(optdatas=False)
    kwargs = {**strategy_kwargs}

    # Add a strategy
    strats = cerebro.optstrategy(strategy_class, **kwargs)

    datalist = [
        (get_ticker_csv_path(ticker_name), ticker_name) for ticker_name in ticker_list
    ]

    for i in range(len(ticker_list)):
        # Create a Data Feed
        data = bt.feeds.YahooFinanceCSVData(
            dataname=datalist[i][0],
            name=datalist[i][1],
            # Do not pass values before this date
            fromdate=from_date,
            # Do not pass values before this date
            todate=to_date,
            # Do not pass values after this date
            reverse=False,
        )

        # Add the Data Feed to Cerebro
        cerebro.adddata(data)

    # Set our desired cash start
    cerebro.broker.setcash(cash)

    # Add pyfolio analyzer for stats
    #     cerebro.addanalyzer(bt.analyzers.PyFolio, _name='pyfolio')

    # Add a FixedSize sizer according to the stake
    cerebro.addsizer(bt.sizers.FixedSize, stake=10)

    # Set the commission
    cerebro.broker.setcommission(commission=commision)

    cerebro.optcallback(cb=bt_opt_callback)

    # Run over everything
    cerebro.run(maxcpus=None)

In [47]:
import time


def run_through_tickers(strategy_class, ticker_list, *args, **kwargs):
    valid_tickers = []
    for ticker_name in ticker_list:
        # Validate ticker
        with open(f"{TICKERS_LOCATION}/{ticker_name}.csv", "r") as f:
            if len(f.readlines()) > 550:
                valid_tickers.append(ticker_name)

    test_strategy(strategy_class, valid_tickers, *args, **kwargs)


def run_backtest_for_strategy_by_name(strategy_name, ticker_list=TICKERS):
    print(
        f"Running `run_backtest_for_strategy_by_name` for {strategy_name} for {len(ticker_list)} tickers"
    )
    strategy_setup = copy.deepcopy(STRATEGIES_ALL_CONFIGS)[strategy_name]
    strategy_klass = strategy_setup.pop(KLASS_KEY)
    return run_backtest_for_strategy(strategy_klass, ticker_list, strategy_setup)


def list_configs(configs):
    """Translate a configuration with ranges into a list of dicts with value pairs"""
    config_dict_list = [{"_dummy_param": 1}]

    for name, value in configs.items():
        if type(value) == range:
            new_config_dict_list = []
            for v in value:
                for cd in config_dict_list:
                    new_config_dict_list.append({name: v, **cd})
            config_dict_list = new_config_dict_list
        else:
            new_config_dict_list = []
            for cd in config_dict_list:
                new_config_dict_list.append({name: value, **cd})
            config_dict_list = new_config_dict_list

    for x in config_dict_list:
        x.pop("_dummy_param")

    return config_dict_list


def run_backtest_for_strategy(strategy_klass, ticker_list, configs):
    config_combination_list = list_configs(copy.deepcopy(configs))

    total_nr_jobs = len(ticker_list) * len(config_combination_list)

    run_through_tickers(strategy_klass, ticker_list, **configs)


def run_backtest_for_strategy_for_all_tickers(strategy_klass, configs):
    return run_backtest_for_strategy(strategy_klass, TICKERS, configs)

### Strategy
The main strategy class used

In [48]:
class RaynerTeoStrategy(bt.Strategy):
    """
    Rayner Teo Strategy with some additional logging

    Market:
      any stock

    Define the trend:
      (closing?) price above the 200-day moving average

    Entry:
      10-period RSI below 30 (buy on the next day's open)

    Exit:
      10-period RSI above 40, or after 10 trading days (sell on the next day's open)
    """

    params = (
        # TODO: these should be in a defaults class or something maybe
        # SMA
        ("maperiod", 15),
        # RSI
        ("rsi_open_period", 10),
        ("rsi_close_period", 30),
        # ADX
        ("adx_period", 14),
        # PPO
        ("ppo_period_short", 12),
        ("ppo_period_long", 26),
        # Stochastic
        ("stochastic_period", 14),
        # Other
        ("days_ago_close_period", 10),
        ("printlog", False),
        ("ticker", "GME"),
    )

    def log(self, txt, dt=None, doprint=False):
        """Logging function for this strategy"""
        if self.params.printlog or doprint:
            dt = dt or self.datas[0].datetime.date(0)
            print("%s, %s" % (dt.isoformat(), txt))

    def __init__(self):
        self.inds = dict()
        for i, d in enumerate(self.datas):
            self.inds[d] = dict()

            self.inds[d]["sma"] = bt.indicators.SimpleMovingAverage(
                d.close, period=self.params.maperiod
            )
            self.inds[d]["rsi"] = bt.indicators.RSI(
                d.close, period=self.params.rsi_open_period, safediv=True
            )

            self.inds[d]["adx3"] = bt.indicators.ADX(d, period=3)
            self.inds[d]["adx6"] = bt.indicators.ADX(d, period=6)
            self.inds[d]["adx9"] = bt.indicators.ADX(d, period=9)

            self.inds[d]["ppo3"] = bt.indicators.PPO(
                d.close, period1=3, period2=self.params.maperiod
            )  # , period_signal=?)
            self.inds[d]["ppo6"] = bt.indicators.PPO(
                d.close, period1=6, period2=self.params.maperiod
            )  # , period_signal=?)
            self.inds[d]["ppo9"] = bt.indicators.PPO(
                d.close, period1=9, period2=self.params.maperiod
            )  # , period_signal=?

            self.inds[d]["stochastic3"] = bt.indicators.Stochastic(
                d, period=3, safediv=True
            )
            self.inds[d]["stochastic6"] = bt.indicators.Stochastic(
                d, period=6, safediv=True
            )
            self.inds[d]["stochastic9"] = bt.indicators.Stochastic(
                d, period=9, safediv=True
            )

            self.inds[d]["order_placed_days_ago"] = 0

        self.csv_filename = RESULTS_FILENAME

    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            return

        if order.status in [order.Completed]:
            if order.isbuy():
                self.log(
                    "BUY EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f"
                    % (order.executed.price, order.executed.value, order.executed.comm)
                )

                self.buyprice = order.executed.price
                self.buycomm = order.executed.comm
            else:  # Sell
                self.log(
                    "SELL EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f"
                    % (order.executed.price, order.executed.value, order.executed.comm)
                )

            self.bar_executed = len(self)

        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log("Order Canceled/Margin/Rejected")

        # Write down: no pending order
        self.order = None

    def notify_trade(self, trade):
        if not trade.isclosed:
            return

        self.log("OPERATION PROFIT, GROSS %.2f, NET %.2f" % (trade.pnl, trade.pnlcomm))

    def next(self):
        for i, d in enumerate(self.datas):
            dt, dn = self.datetime.date(), d._name
            pos = self.getposition(d).size

            if not pos:  # no market / no orders
                if d.close[0] > self.inds[d]["sma"][0] and self.inds[d]["rsi"][0] < 30:
                    self.inds[d]["trade_info_dict"] = {
                        "uid": str(uuid.uuid1()),
                        "ticker": dn,
                        "date": self.data.datetime.date(),
                        "price_open": d.close[0],
                        "maperiod": self.params.maperiod,
                        "rsi_open_period": self.params.rsi_open_period,
                        "adx3": self.inds[d]["adx3"][0],
                        "adx6": self.inds[d]["adx6"][0],
                        "adx9": self.inds[d]["adx9"][0],
                        "ppo3": self.inds[d]["ppo3"][0],
                        "ppo6": self.inds[d]["ppo6"][0],
                        "ppo9": self.inds[d]["ppo9"][0],
                        "stochastic3": self.inds[d]["stochastic3"][0],
                        "stochastic6": self.inds[d]["stochastic6"][0],
                        "stochastic9": self.inds[d]["stochastic9"][0],
                    }
                    self.buy(data=d)
            else:
                if self.inds[d]["order_placed_days_ago"] == self.params.days_ago_close_period:
                    self.sell(data=d)

                    self.inds[d]["trade_info_dict"]["price_sell"] = d.close[0]
                    self.inds[d]["trade_info_dict"][
                        "days_ago_close_period"
                    ] = self.inds[d]["order_placed_days_ago"]
                    self.inds[d]["trade_info_dict"]["rsi_close_period"] = self.inds[d][
                        "rsi"
                    ][0]
                    
                    add_entry_to_csv(
                        csv_filename=self.csv_filename,
                        trade_info_dict=copy.deepcopy(self.inds[d]["trade_info_dict"]),
                    )

                    self.inds[d]["order_placed_days_ago"] = 0
                else:

                    self.inds[d]["trade_info_dict"]["price_sell"] = d.close[0]
                    self.inds[d]["trade_info_dict"][
                        "days_ago_close_period"
                    ] = self.inds[d]["order_placed_days_ago"]
                    self.inds[d]["trade_info_dict"]["rsi_close_period"] = self.inds[d][
                        "rsi"
                    ][0]
                    
                    add_entry_to_csv(
                        csv_filename=self.csv_filename,
                        trade_info_dict=copy.deepcopy(self.inds[d]["trade_info_dict"]),
                    )

                    self.inds[d]["order_placed_days_ago"] += 1

    def stop(self):
        self.log(
            f"(MA Period {self.params.maperiod}, "
            f"RSI open {self.params.rsi_open_period}, "
            f"RSI close {self.params.rsi_close_period}, "
            f"Close after {self.params.days_ago_close_period} days) "
            f"Ending Value {self.broker.getvalue()}",
            doprint=True,
        )

### Strategy run configs
Here are the configurations for particular strategy runs and the actual setup to run them

In [49]:
STRATEGIES_ALL_CONFIGS = {
    "Rayner Teo High Winrate": {
        KLASS_KEY: RaynerTeoStrategy,
        # Optimization
        "maperiod": range(30, 60, 5),
        "rsi_open_period": range(2, 8, 2),
        "days_ago_close_period": 5
        
        #         # Dummy run
        #         'maperiod': 45,
        #         'rsi_open_period': 4,
        #         'days_ago_close_period': 5
    },
}

In [50]:
from tqdm.auto import tqdm

strategy_name = "Rayner Teo High Winrate"

cfg_cpy = copy.deepcopy(STRATEGIES_ALL_CONFIGS[strategy_name])
cfg_cpy.pop(KLASS_KEY)
cfg_list = list_configs(cfg_cpy)
expected_number_of_tests = len(cfg_list)

pbar = tqdm(
    desc="Running backtests",
    leave=True,
    position=1,
    unit="run",
    colour="violet",
    total=expected_number_of_tests,
)
resulting_GME_df = run_backtest_for_strategy_by_name(
    "Rayner Teo High Winrate", ticker_list=[NEXT_TICKER]
)

PARSED_TICKERS[NEXT_TICKER] = TICKER_NAMES[NEXT_TICKER]

with open(PARSED_TICKERS_JSON, "w") as f:
    f.write(json.dumps(PARSED_TICKERS))

Running backtests:   0%|          | 0/18 [00:00<?, ?run/s]

Running `run_backtest_for_strategy_by_name` for Rayner Teo High Winrate for 1 tickers


ZeroDivisionError: float division by zero