In [1]:
import pandas as pd
import numpy as np

def date_parser(timestamp, format = '%Y-%m-%d %H:%M:%S'):
    import math
    from datetime import datetime      
    if isinstance(timestamp, float) and math.isnan(timestamp):
        return pd.NaT
    if isinstance(timestamp, str):
        return datetime.strptime(timestamp, format)
    return timestamp

#data from https://enigma.co/catalyst/status/
prices = pd.read_csv('prices_1T.csv', index_col=0, date_parser = date_parser)

# Resampling

In [2]:
prices = prices.resample('1H').agg(
    {'open': 'first', 
     'high': 'max', 
     'low': 'min', 
     'close': 'last'})

# All candle patterns

In [3]:
import talib as ta
from tqdm import tqdm_notebook as tqdm

CDLs = [name for name in ta.__TA_FUNCTION_NAMES__ if 'CDL' in name]

for CDL in tqdm(CDLs):
    prices[CDL] = getattr(ta, CDL)(*prices[['open','high','low','close']].values.T)/100

HBox(children=(IntProgress(value=0, max=61), HTML(value='')))




# Next candle balance plot

In [4]:
def get_accuracy(prices, CDLs):
    prices['price_up'] = (prices['close']>=prices['open']).astype(int).replace(0,-1)
    #the shift defines how far in the future we look
    prices['price_up_shifted'] = prices['price_up'].shift(-1)
    
    from sklearn.metrics import accuracy_score
    res = []
    for CDL in CDLs:
        mask = prices[CDL] != 0
        prices_with_prediction = prices.loc[mask]
        sub_df = prices_with_prediction[[CDL,'price_up_shifted']].dropna()
        accuracy = accuracy_score(sub_df.price_up_shifted, sub_df[CDL])
        re = {'name':CDL, 'accuracy':accuracy, 'samples': len(prices_with_prediction)}
        res.append(re)
    res= pd.DataFrame(res).sort_values(by=['accuracy'], ascending = True).dropna() 
    return res

### Accuracy vs sample size plot

In [5]:
def accuracy_plot(df, N, exclusion_list = ['CDLDOJI']):
    df = df[-N:]
    df = df[df.name.apply(lambda x: x not in exclusion_list) ]
    
    data = []
    layout = dict()
    fig = dict(data=data, layout=layout)

    fig['layout'] = dict(
        autosize=True,
        width=1000,
        height=600,
        title = 'Accuracy of the {} best performing candle patterns'.format(N),
        titlefont = dict(
            family='Futura',
            size=20,
            color='#7f7f7f'),
        showlegend=False,
        margin=dict(pad = 5, b = 100),
        font=dict(family='Futura', size=12, color='#7f7f7f')
    )

    fig['layout']['xaxis'] = dict(
        title = 'Accuracy',
        showgrid=True,
        showline=False,
        showticklabels=True,
        zeroline=False,
        automargin=True
        ) 

    fig['layout']['yaxis'] = dict(
        title = 'Predictions no.',
        type='log',
        #dtick = 1,
        showgrid=True,
        showline=False,
        showticklabels=True,
        zeroline=False,
        automargin=True,
        tickfont=dict(
            family="Futura",
            size=12,
            color='#7f7f7f')
        )    


    green = 'rgba(50, 171, 96, 0.6)'
    red = '#cf4817'
    neutral = 'rgba(127, 127, 127, 0.6)'
    blue = '#17becf' #'rgba(66,46,139, 0.6)'

    green_line = 'rgba(50, 171, 96, 1.0)'
    red_line = '#cf4817'
    neutral_line = 'rgba(127, 127, 127, 1.0)'
    blue_line = '#17becf' #'rgba(66,46,139, 1.0)'

    fig['data'].append(dict(type = 'scatter',
                            orientation = 'v',
                            x = df.accuracy,    
                            y = df.samples,                                
                            mode = 'markers',
                            marker = dict(
                                size = 5,
                                color = [blue if val>=0.5 else red for val in df.accuracy],
                                line = dict(
                                    color = [blue_line if val>=0.5 else red_line for val in df.accuracy],
                                    width = 2,
                                )                    
                           ),
                           ))
    from math import log

    df_ann = df[-9:]
    fig['layout']['annotations'] = [
            dict(
                x=xpos,
                y=log(ypos, 10),
                xref='x',
                yref='y',            
                text = str(txt),
                showarrow=True,
                arrowhead=3,
                ax=-70,
                ay=0,
                font=dict(family='Futura', size=10, color='#7f7f7f')
        ) for xpos, ypos, txt in list(zip(df_ann.accuracy, df_ann.samples, df_ann.name.map(lambda x: x[3:])))
        ]    
    return fig

In [6]:
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

res =  get_accuracy(prices, CDLs)
fig = accuracy_plot(res, 20, exclusion_list = ['CDLDOJI'])
iplot(fig)


Mean of empty slice.


invalid value encountered in double_scalars

