In [55]:
import pandas as pd
import ta
import numpy as np
import pickle
import itertools
from datetime import datetime

from alphalens import utils, plotting, performance, tears

import mlfinlab as ml

import pyfolio

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')

%matplotlib inline

In [56]:
trainPrices = pickle.load(open('/home/ubuntu/projects/trading/data/Prices_clean.pkl', 'rb'))

In [57]:
trainPrices['WHL.JO']

Unnamed: 0_level_0,high,low,open,close,volume,adj_close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-01-01,6760.160156,6760.160156,6760.160156,6760.160156,0.0,5069.762207
2013-01-02,6895.359863,6586.870117,6854.419922,6679.229980,2105298.0,5009.068848
2013-01-03,6855.370117,6712.549805,6759.209961,6741.120117,3103630.0,5055.483887
2013-01-04,6878.220215,6707.790039,6735.399902,6878.220215,2293156.0,5158.302246
2013-01-07,6902.979980,6769.680176,6879.180176,6781.109863,1809340.0,5085.473145
...,...,...,...,...,...,...
2017-12-25,6293.000000,6293.000000,6293.000000,6293.000000,0.0,5665.954102
2017-12-26,6293.000000,6293.000000,6293.000000,6293.000000,0.0,5665.954102
2017-12-27,6360.000000,6242.000000,6290.000000,6329.000000,3542933.0,5698.367188
2017-12-28,6447.000000,6300.000000,6320.000000,6447.000000,2857012.0,5804.608887


In [58]:
def getLabels(datas,
              lookback = 5,
              num_days_ahead = 1,
              pt_sl = [1, 1],
              min_ret = 0.005,
              at_open = True):
    
    output = {}
    for symbol, data in datas.items():
        
        try:
            close = data['close']
            open = data['open']

            if len(close) > lookback: 

                daily_vol = ml.util.get_daily_vol(
                                close, 
                                lookback = lookback)
        
                if daily_vol.nunique() != 1:

                    cusum_events = ml.filters.cusum_filter(
                                        close,
                                        threshold = daily_vol.mean())

                    data = data.reset_index()
                    data['event'] = np.where(data['Date'].isin(cusum_events), 1, 0)
                    data.set_index('Date', inplace = True)

                    vertical_barriers = ml.labeling.add_vertical_barrier(
                                            t_events = cusum_events,
                                            close = close,
                                            num_days = num_days_ahead)

                    data['vertical_barriers'] = vertical_barriers

                    triple_barrier_events = ml.labeling.get_events(
                                    close = close,
                                    t_events = cusum_events,
                                    pt_sl = pt_sl,
                                    target = daily_vol,
                                    min_ret = min_ret,
                                    vertical_barrier_times = vertical_barriers,
                                    num_threads = 1,
                                    verbose = False)

                    data = pd.concat(
                        [data, triple_barrier_events], 
                        axis = 1).drop('trgt', axis = 1)

                    labels = ml.labeling.get_bins(
                        triple_barrier_events, close, open = open, at_open = at_open)

                    data = pd.concat([data, labels], axis = 1)
                    data.index.name = 'date'

                    data['profit_taking'] = data['pt'].mul(data['trgt'])
                    data['stop_loss'] = data['sl'].mul(data['trgt']).mul(-1)

                    data.reset_index(inplace = True)

                    data['holding_period'] = pd.to_datetime(data['t1'])\
                        .sub(pd.to_datetime(data['date']))\
                        .map(lambda x: x.days)
                    data.drop(['trgt', 'pt', 'sl'], axis = 1, inplace = True)
                    data.set_index('date', inplace = True)

                    data.rename(columns = 
                                {'ret':'expected_return', 
                                 'bin':'label'
                                }, inplace = True) 
                    output[symbol] = data[data['label'] != 0]
                    
        except Exception as e:
            print(f"{symbol}: {e}")
    return output

In [59]:
data = {'MTN.JO':trainPrices['MTN.JO']}
out = getLabels(data, lookback = 14, num_days_ahead = 5, pt_sl = [0.0001, 0.0001], at_open = True)['MTN.JO']
out[out['event'] == 1]#value_counts() / len(out.dropna())

Unnamed: 0_level_0,high,low,open,close,volume,adj_close,event,vertical_barriers,t1,expected_return,label,profit_taking,stop_loss,holding_period
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2013-01-10,18090.0,17573.0,18090.0,17662.0,4091984.0,10874.135742,1,2013-01-15,2013-01-11,-0.003567,-1.0,0.000002,-0.000002,1.0
2013-01-21,17948.0,17653.0,17672.0,17850.0,3200684.0,10989.884766,1,2013-01-28,2013-01-22,0.002140,1.0,0.000001,-0.000001,1.0
2013-01-30,17675.0,17321.0,17550.0,17350.0,4330669.0,10682.043945,1,2013-02-04,2013-01-31,0.013793,1.0,0.000001,-0.000001,1.0
2013-02-04,17871.0,17643.0,17755.0,17871.0,3673888.0,11002.811523,1,2013-02-11,2013-02-05,0.029371,1.0,0.000001,-0.000001,1.0
2013-02-05,18379.0,17800.0,17875.0,18367.0,7363505.0,11308.188477,1,2013-02-11,2013-02-06,-0.032663,-1.0,0.000002,-0.000002,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-12-06,13142.0,12662.0,12772.0,13025.0,3968807.0,10807.081055,1,2017-12-11,2017-12-07,0.006196,1.0,0.000002,-0.000002,1.0
2017-12-11,12846.0,12499.0,12833.0,12586.0,3121684.0,10442.835938,1,2017-12-18,2017-12-12,0.022258,1.0,0.000002,-0.000002,1.0
2017-12-14,13179.0,12570.0,12570.0,13173.0,6229685.0,10929.879883,1,2017-12-19,2017-12-15,0.017241,1.0,0.000002,-0.000002,1.0
2017-12-20,13261.0,12901.0,13221.0,13046.0,4835771.0,10824.506836,1,2017-12-25,2017-12-21,0.019139,1.0,0.000002,-0.000002,1.0


In [60]:
# cusumTrainPrices = getLabels(trainPrices, lookback = 14, num_days_ahead = 5, pt_sl = [0.0001, 0.0001], at_open = True)

In [61]:
# pickle.dump(cusumTrainPrices, open('/home/ubuntu/projects/trading/data/cusumTrainPrices.pkl', 'wb'))
cusumTrainPrices = pickle.load(open('/home/ubuntu/projects/trading/data/cusumTrainPrices.pkl', 'rb'))

In [62]:
cusumTrainPrices['MTN.JO']

Unnamed: 0_level_0,high,low,open,close,volume,adj_close,event,vertical_barriers,t1,expected_return,label,profit_taking,stop_loss,holding_period
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2013-01-01,17760.0,17760.0,17760.0,17760.0,0.0,10934.471680,0,NaT,NaT,,,,,
2013-01-02,17974.0,17500.0,17900.0,17699.0,1727978.0,10896.917969,0,NaT,NaT,,,,,
2013-01-03,17845.0,17440.0,17700.0,17675.0,4956273.0,10882.139648,0,NaT,NaT,,,,,
2013-01-04,18109.0,17620.0,17700.0,18045.0,3895447.0,11109.942383,0,NaT,NaT,,,,,
2013-01-07,18219.0,17822.0,18025.0,17831.0,2936696.0,10978.184570,0,NaT,NaT,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-12-25,13065.0,13065.0,13065.0,13065.0,0.0,10840.269531,0,NaT,NaT,,,,,
2017-12-26,13065.0,13065.0,13065.0,13065.0,0.0,10840.269531,0,NaT,NaT,,,,,
2017-12-27,13570.0,13066.0,13249.0,13570.0,4275780.0,11259.278320,1,NaT,2017-12-28,-0.001548,-1.0,0.000002,-0.000002,1.0
2017-12-28,13724.0,13300.0,13570.0,13524.0,4085005.0,11221.112305,0,NaT,NaT,,,,,


In [63]:
def getTarget(cusumTrainPrices):
    target = pd.DataFrame()
    for symbol, data in cusumTrainPrices.items():
        
        data = data[data['event'] == 1].copy()
        label = data[['label']]
        label['asset'] = symbol
        out = label.reset_index().set_index(['date', 'asset'])
        target = pd.concat([target, out])
        
    return target.sort_index(level = 'date')

In [64]:
target = getTarget(cusumTrainPrices)
target.dropna()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Unnamed: 0_level_0,Unnamed: 1_level_0,label
date,asset,Unnamed: 2_level_1
2013-01-04,ACT.JO,1.0
2013-01-04,AEE.JO,1.0
2013-01-04,AFE.JO,1.0
2013-01-04,AFT.JO,1.0
2013-01-04,AFX.JO,1.0
...,...,...
2017-12-27,TMT.JO,1.0
2017-12-27,TON.JO,1.0
2017-12-27,TRU.JO,-1.0
2017-12-27,TWR.JO,1.0


In [65]:
target['label'].value_counts()

 1.0    45286
-1.0    44634
Name: label, dtype: int64

In [66]:
pickle.dump(target, open('/home/ubuntu/projects/trading/data/binary_target_5_day.pkl', 'wb'))