In [1]:
# Import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf

# Import MlFinLab tools
from mlfinlab.labeling import TailSetLabels

In [2]:
# Load price data for 20 stocks
tickers = "AAPL MSFT COST PFE SYY F GE BABA AMD CCL ZM FB WFC JPM NVDA CVX TWTR UBER GPS KO"

data = yf.download(tickers, start="2019-01-20", end="2020-05-25", group_by="ticker")
data = data.loc[:, (slice(None), 'Adj Close')]
data.columns = data.columns.droplevel(1)
data.head()

[*********************100%***********************]  20 of 20 completed


Unnamed: 0_level_0,SYY,MSFT,AMD,TWTR,KO,PFE,F,AAPL,FB,JPM,GE,GPS,WFC,CCL,COST,NVDA,CVX,UBER,ZM,BABA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2019-01-22,58.341274,102.378876,19.76,32.25,43.693233,35.752155,7.837517,37.205044,147.570007,94.728691,65.86393,23.316143,45.463188,51.75024,201.700134,36.959034,98.19883,,,152.149994
2019-01-23,58.55542,103.376709,19.799999,30.969999,44.196827,35.659122,7.689988,37.355515,144.300003,94.489441,66.396332,23.727493,45.700256,51.512947,201.405762,37.088215,97.368423,,,152.029999
2019-01-24,58.22023,102.882652,20.85,31.610001,43.665764,34.635696,7.929722,37.059429,145.830002,94.544647,66.776604,23.605959,45.572601,52.224846,199.715439,39.212296,99.221565,,,155.860001
2019-01-25,57.745388,103.822334,21.93,32.900002,43.372768,34.373493,8.169458,38.287457,149.009995,95.142792,69.666702,24.007961,45.709377,52.689949,198.537918,39.786171,98.968063,,,159.210007
2019-01-28,57.978161,101.797615,20.18,33.130001,43.18964,33.43465,7.985046,37.933121,147.470001,95.593704,67.917435,24.0921,45.426716,53.534737,200.152252,34.285912,98.050224,,,158.919998


In [3]:
# Create tail set labels with mean absolute deviation as the volatility adjustment
labels = TailSetLabels(data, n_bins=10, vol_adj='mean_abs_dev', window=180)
pos_set, neg_set, matrix_set = labels.get_tail_sets()

In [4]:
# Get the positive set, of the top 10% returns for each day
pos_set.head()

Date
2020-01-28    [MSFT, AAPL]
2020-01-29        [FB, GE]
2020-01-30      [MSFT, KO]
2020-01-31       [PFE, ZM]
2020-02-03      [MSFT, ZM]
dtype: object

In [5]:
# Get the negative set, of the lowest 10% returns for each day
neg_set.head()

Date
2020-01-28      [KO, PFE]
2020-01-29     [AMD, PFE]
2020-01-30      [FB, CCL]
2020-01-31    [AAPL, CVX]
2020-02-03    [SYY, COST]
dtype: object

In [6]:
# All labels for the day
matrix_set.head()

Unnamed: 0_level_0,SYY,MSFT,AMD,TWTR,KO,PFE,F,AAPL,FB,JPM,GE,GPS,WFC,CCL,COST,NVDA,CVX,UBER,ZM,BABA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-01-28,0,1,0,0,-1,-1,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2020-01-29,0,0,-1,0,0,-1,0,0,1,0,1,0,0,0,0,0,0,0,0,0
2020-01-30,0,1,0,0,1,0,0,0,-1,0,0,0,0,-1,0,0,0,0,0,0
2020-01-31,0,0,0,0,0,1,0,-1,0,0,0,0,0,0,0,0,-1,0,1,0
2020-02-03,-1,1,0,0,0,0,0,0,0,0,0,0,0,0,-1,0,0,0,1,0


In [7]:
# See the numerical returns
labels.vol_adj_rets.dropna().head()

Unnamed: 0_level_0,SYY,MSFT,AMD,TWTR,KO,PFE,F,AAPL,FB,JPM,GE,GPS,WFC,CCL,COST,NVDA,CVX,UBER,ZM,BABA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-01-28,0.673352,2.266726,1.392376,1.390078,-1.291935,-6.03694,0.928093,2.563234,1.28333,2.127738,1.611575,1.308987,0.590566,1.983516,0.648229,2.059893,0.837846,1.009972,0.699858,1.629303
2020-01-29,-1.583906,1.788832,-3.27202,0.453511,0.0,-1.714305,0.471957,1.881798,2.306039,-0.177735,5.918342,-0.059516,-0.220517,0.0,-0.591037,-0.640164,-0.862404,-0.028533,1.319231,0.606245
2020-01-30,1.904614,3.127752,1.393518,-0.889339,4.837998,-0.408829,-0.238029,-0.133212,-5.570945,1.458486,-0.986542,0.030141,1.396283,-2.807958,-0.051907,0.072299,1.180124,-0.447485,0.602343,-1.167138
2020-01-31,-1.394166,-1.657687,-1.939863,-1.620252,-1.185756,0.537514,-0.240911,-4.004481,-3.167479,-3.04113,-1.335214,-2.150183,-2.093746,-1.997299,-1.856399,-2.509622,-4.716605,-0.571501,1.083256,-0.686757
2020-02-03,-9.993946,2.629116,1.118703,1.289931,0.468279,0.850301,1.890252,-0.245181,0.960599,0.879675,-0.92415,0.124836,0.394833,-1.301449,-1.660412,1.054595,-0.975206,1.860891,6.026169,2.188626
