In [1]:
import pandas as pd
import numpy as np
from scipy.signal import argrelextrema
from collections import defaultdict
import warnings
import time, requests
import matplotlib.pyplot as plt
import matplotlib, json
from IPython.core.display import display, HTML
display(HTML(""))
%config InlineBackend.figure_format = 'retina'
%matplotlib tk
from tqdm import tqdm
import mplfinance as mpf
from datetime import timedelta

matplotlib.rcParams['figure.figsize'] = (8, 5)
def get_data(symbol, lookback):
    all_data = pd.DataFrame()
    for x in range(lookback):
        if x == 0:
            data = requests.get(f"https://fapi.binance.com/fapi/v1/klines?symbol={symbol}&interval=1m&limit=1500")
            data = pd.DataFrame(json.loads(data.text), columns=['Open time','Open','High','Low','Close',\
                                                                'Volume','Close time','Quote asset volume',\
                                                                'Number of trades','Taker buy base asset volume',\
                                                                'Taker buy quote asset volume','Ignore'])
        else:
            start = data['Open time'].min()
            data = requests.get(f"https://fapi.binance.com/fapi/v1/klines?symbol={symbol}&interval=1m&limit=1500&endTime={start}")
            data = pd.DataFrame(json.loads(data.text), columns=['Open time','Open','High','Low','Close',\
                                                                'Volume','Close time','Quote asset volume',\
                                                                'Number of trades','Taker buy base asset volume',\
                                                                'Taker buy quote asset volume','Ignore'])

        data = data.astype({'Open':'float','High':'float','Low':'float','Close':'float'})
        all_data = pd.concat([data, all_data], axis=0)
    all_data.drop(columns=['Volume','Close time','Quote asset volume',\
                           'Number of trades','Taker buy base asset volume',\
                           'Taker buy quote asset volume','Ignore'], inplace=True)
    all_data['Open time'] = pd.to_datetime(all_data['Open time']*1000000)
    all_data = all_data.set_index('Open time') 
    all_data.replace(0, method='bfill', inplace=True)
    
    return all_data
        
def get_stock_data(stocklist, lookback):
    stock_data = {}
    for stock in tqdm(stocklist, desc='Getting stock data'):
        try:
            stock_data[stock] = get_data(stock, lookback)
        except Exception as e:
            print('Exception {} {}'.format(stock, e))
    return stock_data

def resample(stock_data, rate):
    for stock in stock_data:
        stock_data[stock] = stock_data[stock].resample(rate, closed='right', label='right').agg({'Open': 'first',
                                                                                                 'High': 'max',
                                                                                                 'Low': 'min',
                                                                                                 'Close': 'last'}).dropna()
    return stock_data

In [10]:
def get_max_min(prices):

    local_max_idx = argrelextrema(prices['High'].values, np.greater, order=1)[0]
    local_min_idx = argrelextrema(prices['Low'].values, np.less)[0]
    
    local_max_draw = list()
    local_min_draw = list()
    
    for idx, value in enumerate(prices['High']):
        if idx in local_max_idx:
            local_max_draw.append(prices['High'].iloc[idx])
        else:
            local_max_draw.append(np.nan)
            
        if idx in local_min_idx:
            local_min_draw.append(prices['Low'].iloc[idx])
        else:
            local_min_draw.append(np.nan)
    print(local_max_draw)
    ap = [mpf.make_addplot(local_max_draw, type='scatter', marker='^', markersize=50, color='g'),
          mpf.make_addplot(local_min_draw, type='scatter', marker='*', markersize=50, color='r'),
     ]
    mpf.plot(prices, type='candle', addplot=ap)
    
#     price_local_max_dt = []
#     window_range = 3
#     for i in local_max_idx:
#         if (i>window_range) and (i<len(prices)-window_range):
# #             print(prices.iloc[i-window_range:i+window_range]['Close'])
# #             print(pd.to_numeric(prices.iloc[i-window_range:i+window_range]['Close']).dtype)
#             price_local_max_dt.append(pd.to_numeric(prices.iloc[i-window_range:i+window_range]['Close']).idxmax())
#     print(price_local_max_dt)
    
#     price_local_min_dt = []
#     for i in local_min_idx:
#         if (i>window_range) and (i<len(prices)-window_range):
#             price_local_min_dt.append(pd.to_numeric(prices.iloc[i-window_range:i+window_range]['Close']).idxmin()) 
            
#     maxima = pd.DataFrame(prices.loc[price_local_max_dt])
#     minima = pd.DataFrame(prices.loc[price_local_min_dt])
#     max_min = pd.concat([maxima, minima]).sort_index()
#     max_min.index.name = 'date'
#     max_min = max_min.reset_index()
#     max_min = max_min[~max_min.date.duplicated()]
#     p = prices.reset_index()   
#     print("p: ", p)
#     max_min['day_num'] = p[p['Open time'].isin(max_min.date)].index.values
#     max_min = max_min.set_index('day_num')['Close']
#     print("max_min: ", max_min)
    return local_max_draw, local_min_draw

def find_patterns(timeframe, stock, prices, local_max_draw, local_min_draw):  
    patterns = defaultdict(list)
    length = len(local_max_draw)
    ap = []
    color = ['blue', 'orange', 'red', 'yellow']
    color_idx = 0
    ratio = 0.02
    start = time.time()
    local_max_draw = [(idx, value) for idx, value in enumerate(local_max_draw) if str(value) != 'nan']
    local_min_draw = [(idx, value) for idx, value in enumerate(local_min_draw) if str(value) != 'nan']
    for i in range(length):  
#         print("i:", i)
        if local_max_draw[i] is np.nan:
            continue
        for j in range(i+1, length):
            if local_min_draw[j] is np.nan:
                continue
            for k in range(j+1, length):
                if local_max_draw[k] is np.nan:
                    continue
                for l in range(k+1, length):
                    if local_min_draw[l] is np.nan:
                        continue
                    for m in range(l+1, length):
                        if local_max_draw[m] is np.nan:
                            continue
#                         print(i, j, k, l, m)
                        first_high = local_max_draw[i]
                        first_low = local_min_draw[j]
                        second_high = local_max_draw[k]
                        second_low = local_min_draw[l]
                        third_high = local_max_draw[m]
        
        
                        if  (m - i < 20 and first_high > first_low and 
                             second_high > first_high and
                             second_high > second_low and
                             second_high > third_high and
                             third_high > first_low and
                             third_high > second_low and
                            abs(first_high - third_high) <= np.mean([first_high, third_high]) * ratio and
                            abs(first_low - second_low) <= np.mean([first_low, second_low]) * ratio):
                                print(first_high,first_low,second_high,second_low,third_high)
                                head_shoulder = list()
                                for idx, value in enumerate(prices['High']):
                                    if idx in [i, k, m]:
                                        head_shoulder.append(prices['High'].iloc[idx])
                                    elif idx in [j, l]:
                                        head_shoulder.append(prices['Low'].iloc[idx])
                                    else:
                                        head_shoulder.append(np.nan)
                                ap = []
                                ap.append(mpf.make_addplot(head_shoulder, type='scatter', marker='o', alpha=.3, markersize=50, color=color[color_idx]),
                                 )
#                                 color_idx += 1
#                                 color_idx %= len(color)
#                                 i = m
#                                 j = k = l = m = 0
                                mpf.plot(prices, title=f"{stock}_{timeframe}", type='candle', addplot=ap, savefig=f'./head_shoulder/{stock}_{timeframe}_{i}_{j}_{k}_{l}_{m}.png')
    
#                                 patterns['IHS'].append()
    end = time.time()
    print("Time cost: ", end - start)
    return patterns
    
def plot_minmax_patterns(prices, max_min, patterns, stock):
    
    incr = str((prices.index[1] - prices.index[0]).seconds/60)
    if len(patterns) == 0:
        pass
    else:
        num_pat = len([x for x in patterns.items()][0][1])
        f, axes = plt.subplots(1, 2, figsize=(16, 5))
        axes = axes.flatten()
        prices['Close'] = pd.to_numeric(prices['Close'])
        prices_ = prices.reset_index()['Close']
        axes[0].plot(prices_)
        axes[0].scatter(max_min.index, max_min, s=100, alpha=.3, color='orange')
        axes[1].plot(prices_)
        for name, end_day_nums in patterns.items():
            for i, tup in enumerate(end_day_nums):
                sd = tup[0]
                ed = tup[1]
                axes[1].scatter(max_min.loc[sd:ed].index,
                              max_min.loc[sd:ed].values,
                              s=200, alpha=.3)
                plt.yticks([])
        plt.tight_layout()
        plt.title('{}: {}:  ({} patterns)'.format(stock, incr, num_pat))

def get_results(prices, max_min, pat, stock):
    
    incr = str((prices.index[1] - prices.index[0]).seconds/60)
    
    #fw_list = [1, 12, 24, 36] 
    fw_list = [1, 2, 3]
    results = []
    if len(pat.items()) > 0:
        end_dates = [v for k, v in pat.items()][0]      
        for date in end_dates:  
            param_res = {'stock': stock,
                         'increment': incr, 
                         'date': date}
            for x in fw_list:
#                 print("fk: ", pd.to_numeric(prices['Close']).pct_change(x))
                returns = (pd.to_numeric(prices['Close']).pct_change(x).shift(-x).reset_index(drop=True).dropna())
                try:
                    param_res['fw_ret_{}'.format(x)] = returns.loc[date[1]]   
                except Exception as e:
                    param_res['fw_ret_{}'.format(x)] = e
            results.append(param_res)  
    else:
        param_res = {'stock': stock,
                     'increment': incr,
                     'ema': ema_,
                     'window': window_,
                     'date': None}
        for x in fw_list:
            param_res['fw_ret_{}'.format(x)] = None   
        results.append(param_res)
    return pd.DataFrame(results)

def screener(timeframe, stock_data, plot, results):
    
    all_results = pd.DataFrame()
    
    for stock in stock_data:
        prices = stock_data[stock]
        
        local_max_draw, local_min_draw = get_max_min(prices)
        pat = find_patterns(timeframe, stock, prices, local_max_draw, local_min_draw)

#         if plot == True:
#             plot_minmax_patterns(prices, max_min, pat, stock)

#         if results == True:
#             all_results = pd.concat([all_results, get_results(prices, max_min, pat, stock)], axis=0)
                
    if results == True:
        return all_results.reset_index(drop=True)


# Run different timeframes here without requesting new data. '5T' = 5 minutes, '60T' = 1 hour, '120T' = 2 hours, etc.
for timeframe in ['120T']:
    resampled_stock_data = resample(stock_data, timeframe)
    results = screener(timeframe, resampled_stock_data, plot=True, results=True)

[nan, nan, nan, nan, nan, nan, nan, nan, nan, 1330.45, nan, nan, nan, 1359.8, nan, nan, 1366.0, nan, nan, 1370.0, nan, nan, nan, nan, 1358.0, nan, nan, nan, nan, nan, 1364.75, nan, nan, nan, 1383.29, nan, nan, nan, nan, 1380.0, nan, nan, nan, nan, nan, 1361.21, nan, nan, nan, 1360.65, nan, 1359.0, nan, nan, nan, nan, nan, 1337.19, nan, nan, 1333.0, nan, 1330.09, nan, nan, 1329.95, nan, nan, nan, nan, nan, nan, nan, nan, nan, 1328.72, nan, 1327.0, nan, nan, nan, 1337.93, nan, nan, nan, nan, nan, 1319.19, nan, nan, nan, nan, nan, nan, nan, nan, nan, 1288.45, nan, nan, nan, 1297.7, nan, nan, nan, 1288.6, nan, nan, nan, 1306.6, nan, 1305.5, nan, nan, nan, 1302.47, nan, nan, nan, nan, nan, nan, nan, 1284.0, nan, nan, nan, 1301.0, nan, nan, 1343.36, nan, nan, nan, nan, 1343.0, nan, nan, nan, nan, nan, nan, 1301.32, nan, nan, nan, nan, nan, 1291.4, nan, nan, nan, nan, 1287.0, nan, 1288.78, nan, 1289.9, nan, nan, 1287.48, nan, nan, 1315.0, nan, nan, nan, nan, nan, nan, nan, 1337.48, nan, nan, 



            POSSIBLE TO SEE DETAILS (Candles, Ohlc-Bars, Etc.)
   For more information see:
   - https://github.com/matplotlib/mplfinance/wiki/Plotting-Too-Much-Data
   
   OR set kwarg `warn_too_much_data=N` where N is an integer 
   LARGER than the number of data points you want to plot.



IndexError: list index out of range

NameError: name 'end' is not defined

In [5]:
# stocklist = [MSFT','MMM',
#              'S', 'V', 'VZ', 'AAPL', 
#              'AMZN', 'FB', 'NFLX', 
#              'GOOG', 'GS', 'LNKD',
#              'TLRY']

# stocklist = ['btcusdt', 'ethusdt', 'bnbusdt']
stocklist = ['ethusdt']

stock_data = get_stock_data(stocklist, 50)

Getting stock data: 100%|████████████████████████████████████████████████████████████████| 1/1 [00:07<00:00,  7.94s/it]


In [None]:
# Run different timeframes here without requesting new data. '5T' = 5 minutes, '60T' = 1 hour, '120T' = 2 hours, etc.
resampled_stock_data = resample(stock_data, '1T')

In [None]:
# Run the screener on ema_list and window_list, plotting results.

ema_list = [3, 10, 20, 30, ]
window_list = [3, 10, 20, 30, ]

results = screener(resampled_stock_data, ema_list, window_list, plot=True, results=True)

In [None]:
# Examples of reading the results.

for x in [x for x in results.columns if x not in ['stock', 'date']]:
    results[x] = pd.to_numeric(results[x], errors='coerce')
    
print('Average results by window param')
display(round(results.groupby('window').mean(), 3).sort_values(by=[x for x in results.columns if 'fw' in x], ascending=False))
print('\n', 'Average results by ema param')
display(round(results.groupby('ema').mean(), 3).sort_values(by=[x for x in results.columns if 'fw' in x], ascending=False))
print('\n', 'Average results by stock')
display(round(results.groupby('stock').mean(), 3).sort_values(by=[x for x in results.columns if 'fw' in x], ascending=False))
# print('\n', 'Results sorted by return')
# display(round(results.sort_values(by=[x for x in results.columns if 'fw' in x], ascending=False), 3))

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D


points = np.ones(3)  # Draw 3 points for each line
text_style = dict(horizontalalignment='right', verticalalignment='center',
                  fontsize=12, fontdict={'family': 'monospace'})
marker_style = dict(linestyle=':', color='0.8', markersize=10,
                    mfc="C0", mec="C0")


def format_axes(ax):
    ax.margins(0.2)
    ax.set_axis_off()
    ax.invert_yaxis()


def split_list(a_list):
    i_half = len(a_list) // 2
    return (a_list[:i_half], a_list[i_half:])
fig, axes = plt.subplots(ncols=2)
fig.suptitle('un-filled markers', fontsize=14)

# Filter out filled markers and marker settings that do nothing.
unfilled_markers = [m for m, func in Line2D.markers.items()
                    if func != 'nothing' and m not in Line2D.filled_markers]
print(unfilled_markers)
for ax, markers in zip(axes, split_list(unfilled_markers)):
    for y, marker in enumerate(markers):
        ax.text(-0.5, y, repr(marker), **text_style)
        ax.plot(y * points, marker=marker, **marker_style)
    format_axes(ax)

plt.show()