In [1]:
import pandas as pd
import numpy as np

def date_parser(timestamp, format = '%Y-%m-%d %H:%M:%S'):
    import math
    from datetime import datetime      
    if isinstance(timestamp, float) and math.isnan(timestamp):
        return pd.NaT
    if isinstance(timestamp, str):
        return datetime.strptime(timestamp, format)
    return timestamp

#data from https://enigma.co/catalyst/status/
prices = pd.read_csv('prices_1T.csv', index_col=0, date_parser = date_parser)

# Resampling

In [2]:
prices = prices.resample('1H').agg(
    {'open': 'first', 
     'high': 'max', 
     'low': 'min', 
     'close': 'last'})

# All candle patterns

In [3]:
import talib as ta
from tqdm import tqdm_notebook as tqdm

CDLs = [name for name in ta.__TA_FUNCTION_NAMES__ if 'CDL' in name]

for CDL in tqdm(CDLs):
    prices[CDL] = getattr(ta, CDL)(*prices[['open','high','low','close']].values.T)/100

HBox(children=(IntProgress(value=0, max=61), HTML(value='')))




### Freq plot

In [4]:
CDL_freq = prices[CDLs].apply(pd.value_counts).T/len(prices)
CDL_freq.columns = CDL_freq.columns.astype(int)
CDL_freq['freq'] = 1-CDL_freq[0]
CDL_freq = CDL_freq['freq']
CDL_freq.index = CDL_freq.index.map(lambda x: x[3:])
CDL_freq = CDL_freq.sort_values(ascending = True)

In [5]:
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

def get_colors(ixs, green, red, neutral):
    colors = []
    for ix in ixs:
        if 'BULL' in ix:
            colors.append(green)
        elif 'BEAR' in ix:
            colors.append(red)
        else:
            colors.append(neutral)
    return colors

def freq_plot(df, N):
    data = []
    layout = dict()
    fig = dict(data=data, layout=layout)

    df = df[-N:]

    fig['layout'] = dict(
        autosize=True,
        width=1000,
        height=800,
        title = 'Top {} frequent candle patterns'.format(N),
        titlefont = dict(
            family='Futura',
            size=20,
            color='#7f7f7f'),
        showlegend=False,
        margin=dict(l=10, pad=5),
        font=dict(family='Futura', size=12, color='#7f7f7f')
    )

    fig['layout']['xaxis'] = dict(
        showgrid=False,
        showline=False,
        showticklabels=False,
        zeroline=False,
        automargin=True
        ) 

    fig['layout']['yaxis'] = dict(
        showgrid=False,
        showline=False,
        showticklabels=True,
        zeroline=False,
        automargin=True,
        tickfont=dict(
            family="Futura",
            size=12,
            color='#7f7f7f')
        )    


    green = 'rgba(50, 171, 96, 0.6)'
    red = 'rgba(171, 62, 50, 0.6)'
    neutral = 'rgba(127, 127, 127, 0.6)'

    green_line = 'rgba(50, 171, 96, 1.0)'
    red_line = 'rgba(171, 62, 50, 1.0)'
    neutral_line = 'rgba(127, 127, 127, 1.0)'


    fig['data'].append(dict(type = 'bar',
                            orientation = 'h',
                            x = df.values,
                            y = df.index,            
                            marker=dict(
                                color = get_colors(df.index, green, red, green),                                
                                line=dict(
                                    color = get_colors(df.index, green_line, red_line, green_line),
                                    width=2),                        
                           ),
                           ))

    fig['layout']['annotations'] = [
            dict(
                x=xpos+0.011,
                y=ypos,
                xref='x',
                yref='y',
                text=str(round(xpos*100,2))+'%',
                showarrow=False,
        ) for xpos, ypos in zip(df.values, df.index)
        ]

    return fig  

In [6]:
iplot(freq_plot(CDL_freq, 30))