# Notebook to evaluate lumibot backtest logs

Latest version: 2024-09-06  
Author: MvS

## Description

Notebook reads the Yahoo backtesting table coming out of Lumibot and calculates wining/losing trade data.

## Result

Dataframe containing position type, profit, trade duration

In [2]:
import yfinance as yf

from dotenv import dotenv_values
import requests
import pandas as pd
import datetime as dt
import csv

import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s %(message)s")

cur_date = dt.datetime.today().strftime('%Y-%m-%d')

env_dict = dotenv_values("../.env")

### Load file

In [None]:
filename = 'TrendFollowing_2024-08-31_13-42_3ziCGk_trades.csv'
filename = 'TrendFollowing_2024-09-06_11-02_SsmW56_trades.csv'

logging.info(f"Opening trade log: {filename}")

try:
    trades_df = pd.read_csv(
        f"logs/{filename}", sep=",", quotechar='"'
    )
except IOError as e:
    logging.info(f"file not found: {e}")


### Data wrangling of trade sheet

1. ignoring the buy/sell orders, only looking at the fill confirmations
2. updating the row count
3. assume each opened order is followed up by an order to close the position
4. getting previous row data --- to infer profit and consistency
5. then, determine whether position was long or short
6. calculate profit
7. backfilling of profit to position opening timestamp
8. identify the source of the order closing (SL/TP/other signal)

In [None]:
trades_df = trades_df[trades_df.status == 'fill'].copy()

# Re-number the rows after filter
trades_df.reset_index(drop=True, inplace=True)

# Type-cast timestamp to datetime
trades_df['time'] = pd.to_datetime(trades_df['time'], utc=True)

# Assuming no partial fills or side effects in synthetic backtesting data
def position_status(row):
    row_num = row.name
    if row_num % 2 == 0:
        return 'open'
    return 'closed'

trades_df['pos_stat'] = trades_df.apply(position_status, axis=1)

# Get previous order
trades_df['p_price'] = trades_df['price'].shift(1)
# Fix first/last row
trades_df['p_price'] = trades_df['p_price'].bfill()


def position_type(row):
    side = row['side']
    pos_stat = row['pos_stat']
    if (pos_stat == 'open' and side == 'buy') or (pos_stat == 'closed' and side == 'sell'):
        return 'long'
    return 'short'

trades_df['pos_type'] = trades_df.apply(position_type, axis=1)

def position_profit(row):
    pos_stat = row['pos_stat']
    pos_type = row['pos_type']
    quant = row['filled_quantity']
    cost = row['trade_cost']
    o_price = row['p_price']
    c_price = row['price']
    if pos_stat == 'closed':
        if pos_type == 'long':
            return quant * (c_price - o_price) - 2.0 * cost
        else: 
            return quant * (o_price - c_price) - 2.0 * cost
    return None

trades_df['pos_profit'] = trades_df.apply(position_profit, axis=1)
trades_df['pos_profit'] = trades_df['pos_profit'].bfill()

def pos_closing(row):
    pos_stat = row['pos_stat']
    ctype = row['type']
    # Identify reason for closing position
    if pos_stat == 'closed':
        if ctype == 'limit':
            return 'TP'
        elif ctype == 'stop_limit':
            return 'SL'
        else:
            return 'SIG'
    return None

# Apply the function
trades_df['pos_clos'] = trades_df.apply(pos_closing, axis=1)
trades_df['pos_clos'] = trades_df['pos_clos'].bfill()

# Get next time stamp
trades_df['n_time'] = trades_df['time'].shift(-1)

def pos_duration(row):
    start_time = row['time']
    end_time = row['n_time']
    pos_stat = row['pos_stat']
    # Identify duration of position
    if pos_stat == 'open':
        t_delta = end_time - start_time
        return t_delta.days
    return None

# Apply the function
trades_df['pos_dur'] = trades_df.apply(pos_duration, axis=1)
trades_df['pos_dur'] = trades_df['pos_dur'].ffill()


trades_df