# Scratch work

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time

In [2]:
df = pd.read_csv("../data/generated/fet-celr-500.csv")

In [3]:
start = time.time()

df["cusum"] = df["cusum"].apply(eval)
df["returns"] = df["returns"].apply(eval)
df["drawdowns"] = df["drawdowns"].apply(eval)

print(f"reformatting took: {round((time.time()-start)/60, 2)} minutes")

reformatting took: 7.67 minutes


## About the dataframe:

In [4]:
df.head(3)

Unnamed: 0,lookback,thres,sell_thres,cusum,returns,drawdowns
0,500,0.5,-0.25,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[-0.03508, -0.02851, 0.01315, 0.00257, 0.00144...",[42074700]
1,500,0.5,0.0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[-0.01806, -0.00843, 0.01069, 0.00495, -0.0061...",[42072360]
2,500,0.5,0.25,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[-0.01785, 0.01069, 0.00495, -0.00618, 0.00675...","[41984460, 76620, 1020]"


| Column name | description |
| --- | --- |
| lookback | metric for identifying strategy (ignore) |
| thres | metric for identifying strategy (ignore) |
| sell_thres | metric for identifying strategy (ignore) |
| cusum | cumulative realized returns starting at 0 and updated every minute |
| returns | list of realized returns, where its length == number of trades that took place |
| drawdowns | list of seconds indicating drawdown length, sorted from greatest to least |

Therefore: 
- if you call .cumsum() on `returns`, the graph resembles `cusum` but much smoother because less granular detail. 
- length of `drawdowns` is how many 'dips' the strategy encountered.
- calling .diff() on `cusum` returns `returns` but on a minutely basis. 

In [5]:
def get_wins_and_losses(returns):
    """returns the number of profitable trades and number of losing trades"""
    wins = len(list(filter(lambda x: x>0, returns)))
    losses = len(list(filter(lambda x: x<=0, returns)))
    return wins, losses

def get_average_win_loss(returns):
    """returns the mean % gain of profitable trades and mean % loss of losing trades in decimal format"""
    wins = np.mean(list(filter(lambda x: x>0, returns)))
    losses = np.mean(list(filter(lambda x: x<=0, returns)))
    return wins, losses

def get_winning_losing_streak(returns):
    """returns the length of the winning streak and length of losing streak"""
    w, p_w = 0, 0
    l, p_l = 0, 0
    for trade in returns:
        if trade > 0:
            w += 1
            p_l = l if l>p_l else p_l
            l = 0
        else:
            l += 1
            p_w = w if w>p_w else p_w
            w = 0
    return p_w, p_l

## Below are examples for this trade:

In [6]:
result = df.iloc[5]
result

lookback                                                    500
thres                                                       0.5
sell_thres                                                  1.0
cusum         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
returns       [-0.03467, -0.00425, -0.00456, 0.01364, -0.000...
drawdowns     [39182160, 527460, 440700, 304800, 158940, 147...
Name: 5, dtype: object

### Get number of trades

In [7]:
len(result.returns)

2531

### Get # of wins and # of losses

In [8]:
get_wins_and_losses(result.returns)

(1481, 1050)

### Get mean win amount and mean loss amount (where 0.02 == 2%)

In [9]:
get_average_win_loss(result.returns)

(0.013105367994598245, -0.02793844761904762)

### Get winning and losing streak, where 17 means 17 wins/losses in a row

In [10]:
get_winning_losing_streak(result.returns)

(14, 17)

### Get max drawdown length (in seconds)

In [11]:
result.drawdowns[0] #sorted by longest to shortest

39182160

### Get max drawdown on a trade and max gain on a trade (realized)

In [12]:
min(result.returns), max(result.returns)

(-0.43374, 0.12575)