# Scratch Notebook for Pattern Recognition function


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.signal import argrelextrema, savgol_filter
from fbm import FBM
from collections import defaultdict
from pymongo import MongoClient
from datetime import datetime
import quandl
import re
import string
from sklearn.preprocessing import LabelEncoder
import time
%matplotlib inline

token = 'authtoken'

In [None]:
cols.find_one()

In [None]:
def normalize(series):
    return 2*(series - min(series)) / (max(series) - min(series)) - 1

In [None]:
def normalize_windows(window):
    return (window - np.min(window)) / (np.max(window) - np.min(window))

In [None]:
def standardize(series):
    return (series / series[0]) - 1

In [None]:
def transform(x):
    return (spread / (40 - 1)) * (x - 1) + max_min.min()

In [None]:
def find_max_min(prices, smooth=0.2):
    prices = pd.Series(prices)
    prices.index = np.linspace(1., len(prices), len(prices))
    #xi = np.linspace(1.,40.,100)
    rbf = Rbf(prices.index, prices, smooth=smooth)
    smooth_prices = pd.Series(rbf(prices.index), index=prices.index)
    
    local_max = argrelextrema(smooth_prices.values, np.greater)[0]
    local_min = argrelextrema(smooth_prices.values, np.less)[0]
    #local_max = np.around(argrelextrema(smooth_prices.values, np.greater)[0] / 2.5).astype(int)
    #local_min = np.around(argrelextrema(smooth_prices.values, np.less)[0] / 2.5).astype(int)
    
    price_local_max_dt = []
    for i in local_max:
        if i > 1 and i < len(prices) - 2:
            price_local_max_dt.append(prices.iloc[i-1:i+2].argmax())
    
    price_local_min_dt = []
    for i in local_min:
        if i > 1 and i < len(prices) - 2:
            price_local_min_dt.append(prices.iloc[i-1:i+2].argmin())
            
    #prices.name = 'price'
    maxima = pd.DataFrame(prices.loc[price_local_max_dt])
    minima = pd.DataFrame(prices.loc[price_local_min_dt])

    max_min = pd.concat([maxima, minima]).sort_index()
    #max_min.index.name = 'date'
    #max_min = max_min.reset_index()
    #max_min = max_min[~max_min.date.duplicated()]
    #p = prices.reset_index()
    #max_min['day_num'] = p[p['index'].isin(max_min.date)].index.values
    #max_min = max_min.set_index('day_num').price
    
    return max_min

In [None]:
def find_patterns(max_min, linear_threshold=.03, outer_slope_threshold=.05, inner_slope_threshold=.2, minimum_range=.1, window_size=40):
    
    spread = float(max_min.max() - max_min.min())
    zero = float(linear_threshold * spread)

    def transform(x):
        return (spread / (window_size - 1)) * (x - 1) + max_min.min()
    
    for i in range(len(max_min) - 4):
        window = max_min.iloc[i:i+5]
        if float((window.max() - window.min()) / spread) < minimum_range:
            print('too small')
            continue
        
        e1 = float(window.iloc[0])
        e2 = float(window.iloc[1])
        e3 = float(window.iloc[2])
        e4 = float(window.iloc[3])
        e5 = float(window.iloc[4])

        # Head and Shoulders (Bearish Reversal)
        if (e1 > e2) and (e3 > e1) and (e3 > e5) and (e5 > e4) and \
            (abs(e1 - e5) <= zero) and \
            (abs(e2 - e4) <= zero):
                return 'head_shoulders', indicies
                #patterns['head_shoulder'].append((window.index[0], window.index[-1]))

        # Inverse Head and Shoulders (Bullish Reversal)
        elif (e1 < e2) and (e3 < e1) and (e3 < e5) and (e5 < e4) and \
            (abs(e1 - e5) <= zero) and \
            (abs(e2 - e4) <= zero):
                return 'inverse_head_shoulders', indicies
            
        elif (abs(e1 - e3) <= zero) and \
            (abs(e1 - e5) <= zero) and \
            (abs(e3 - e5) <= zero):   
                
            # Triple Bottom (Bullish Reversal)
            if (e1 < e2) and (e3 < e4) and (e5 < e4) and \
                (abs(e2 - e4) <= zero):
                    return 'trip_bottom', indicies

            # Triple Top (Bearish Reversal)
            elif (e1 > e2) and (e3 > e4) and (e5 > e4) and \
                (abs(e2 - e4) <= zero):
                    return 'trip_top', indicies
                
        else:        
        
            extrema = [e1, e2, e3, e4]
            indicies = [window.index[j] for j in range(4)]

            slope_odds = float((e3 - e1) / (transform(indicies[2]) - transform(indicies[0])))
            slope_evens = float((e4 - e2) / (transform(indicies[3]) - transform(indicies[1])))
            base_slopes = [slope_odds, slope_evens] #[0 = odds, 1 = evens]

            if e1 > e2:
                direction = 'top'
            else:
                direction = 'bottom'
            
            for k in range(4+i,len(max_min)):
                e = max_min.iloc[k]
                index = max_min.index[k]
                
                sign = k % 2    # sign = 0 (Odds), sign = 1 (Evens)
                reference_slope = base_slopes[sign]
                slope = float((e - extrema[sign]) / (transform(index) - transform(indicies[sign])))

                lower_bound = None
                upper_bound = None
                
                if abs(reference_slope) < zero and abs(slope) < zero:
                    lower_bound = 'flat'
                    upper_bound = 'flat'
                    
                elif (direction == 'top' and sign == 0 and reference_slope >= zero) or \
                    (direction == 'bottom' and sign == 1 and reference_slope >= zero):
                        lower_bound = reference_slope*(1-inner_slope_threshold)
                        upper_bound = reference_slope*(1+outer_slope_threshold)
                         
                elif (direction == 'top' and sign == 0 and reference_slope <= -zero) or \
                    (direction == 'bottom' and sign == 1 and reference_slope <= -zero):
                        lower_bound = reference_slope*(1+inner_slope_threshold)
                        upper_bound = reference_slope*(1-outer_slope_threshold)
                            
                elif (direction == 'top' and sign == 1 and reference_slope >= zero) or \
                    (direction == 'bottom' and sign == 0 and reference_slope >= zero):
                        lower_bound = reference_slope*(1-outer_slope_threshold)
                        upper_bound = reference_slope*(1+inner_slope_threshold)
                        
                elif (direction == 'top' and sign == 1 and reference_slope <= -zero) or \
                    (direction == 'bottom' and sign == 0 and reference_slope <= -zero):
                        lower_bound = reference_slope*(1+outer_slope_threshold)
                        upper_bound = reference_slope*(1-inner_slope_threshold)

                if lower_bound and upper_bound:

                    if lower_bound == 'flat' and upper_bound =='flat':
                        extrema.append(e)
                        indicies.append(index)
                        print('added')
                    
                    elif lower_bound <= slope and slope <= upper_bound:
                        extrema.append(e)
                        indicies.append(index)
                        print('added')
                        
                    else:
                        break
                
                else:
                    break
                
                    
            
            # Must have at least 5 extrema and can't contain the final extrema (a check on whether the pattern is completed)
            if len(extrema) < 5 or max_min.index[-1] in indicies:
                continue
            
            # Right Angles (Reversals)
            if abs(slope_evens) <= zero:
                if direction == 'bottom':
                    if slope_odds >= zero:
                        return 'right_tri_bottom', indicies
                    elif slope_odds <= -zero:
                        return 'right_broad_bottom', indicies
                else:
                    if slope_odds <= -zero:
                        return 'right_tri_top', indicies
                    elif slope_odds >= zero:
                        return 'right_broad_top', indicies
            
            # Broadening (Reversals) [Higher highs, lower lows]
            elif slope_odds <= -zero and slope_evens >= zero:
                if direction == 'bottom':
                    return 'broad_bottom', indicies
            
                else:
                    return 'broad_top', indicies
            
            # Wedges/Triangles (Reversals) [Lower highs, higher lows]
            elif slope_odds >= zero and slope_evens <= -zero:
                if direction =='bottom':
                    return 'wedge_bottom', indicies
            
                else:
                    return 'wedge_top', indicies
                
            # Continuation triangles
            elif abs(slope_odds) <= zero:
                if direction == 'bottom':
                    if slope_evens <= -zero:
                        return 'desc_tri', indicies
                else:
                    if slope_evens >= zero:
                        return 'asc_tri', indicies
            
            # Channel Up [Higher highs, higher lows]    
            elif slope_odds >= zero and slope_evens >= zero:
                if direction == 'bottom':
                    return 'bearish_channel_up', indicies
                else:
                    return 'bullish_channel_up', indicies
            
            # Channel Down [Lower highs, lower lows]
            elif slope_odds <= -zero and slope_evens <= -zero:
                if direction == 'bottom':
                    return 'bearish_channel_down', indicies
                else:
                    return 'bullish_channel_down', indicies

        

In [None]:
for j in range(0,1200,40):
    max_min = find_max_min(aapl[j:j+40])
    zero,extrema,indicies,base_slopes = find_patterns(max_min)
    spread = max_min.max() - max_min.min()
    odds, evens = base_slopes[0], base_slopes[1]
    slope = float((extrema[-1] - extrema[0]) / (transform(indicies[-1]) - transform(indicies[0])))
    reference_slope = odds
    print('Point 1:', extrema[0], indicies[0], float(transform(indicies[0])))
    print('Point 3:', extrema[2], indicies[2], float(transform(indicies[2])))
    print('Point 5:', extrema[-1], indicies[-1], float(transform(indicies[-1])))
    print('Slope from 1 to 3:', reference_slope)
    print('Slope from 1 to 5:', slope)
    print('Zero:', zero)
    if extrema[0] > extrema[1]:
        direction = 'top'
    else:
        direction = 'bottom'
    lower_bound, upper_bound = slopes(reference_slope, slope, direction, inner_slope_threshold=.2, outer_slope_threshold=.05)
    print('Slope band:', [lower_bound, upper_bound])

    print(direction)

    if lower_bound and upper_bound:
        if lower_bound == 'flat' and upper_bound =='flat':
            print('added')

        elif lower_bound <= slope and slope <= upper_bound:
            print('added')

        else:
            'bust'

    else:
        'wtf'

    plt.plot(aapl[j:j+40])
    for i in range(5):
        plt.plot(indicies[i]-1,extrema[i], 'ro')
    plt.show();

In [None]:
def slopes(reference_slope, slope, direction, zero=zero, sign=0, outer_slope_threshold=.03, inner_slope_threshold=.1):
    if abs(reference_slope) < zero and abs(slope) < zero:
        lower_bound = 'flat'
        upper_bound = 'flat'

    elif (direction == 'top' and sign == 0 and reference_slope >= zero) or \
        (direction == 'bottom' and sign == 1 and reference_slope >= zero):
            lower_bound = reference_slope*(1-inner_slope_threshold)
            upper_bound = reference_slope*(1+outer_slope_threshold)


    elif (direction == 'top' and sign == 0 and reference_slope <= -zero) or \
        (direction == 'bottom' and sign == 1 and reference_slope <= -zero):
            lower_bound = reference_slope*(1+inner_slope_threshold)
            upper_bound = reference_slope*(1-outer_slope_threshold)

    elif (direction == 'top' and sign == 1 and reference_slope >= zero) or \
        (direction == 'bottom' and sign == 0 and reference_slope >= zero):
            lower_bound = reference_slope*(1-outer_slope_threshold)
            upper_bound = reference_slope*(1+inner_slope_threshold)

    elif (direction == 'top' and sign == 1 and reference_slope <= -zero) or \
        (direction == 'bottom' and sign == 0 and reference_slope <= -zero):
            lower_bound = reference_slope*(1+outer_slope_threshold)
            upper_bound = reference_slope*(1-inner_slope_threshold)
            
    return lower_bound, upper_bound

# Random pattern generation (FBM) and older code

In [None]:
def upsample(df, start, end, base=120, out_sample=False):
    
    if out_sample:
        df = df[start:end]
        
    duration = end - start
    
    if duration == base:
        return df
    
    if duration > base / 2:
        
        factor = duration / (base - duration)
        x = np.arange(0, duration, 1.)
        y = np.arange(0.5, duration, factor)
        series = df.reset_index().set_index(x)
        new_index = pd.Index(np.sort(np.append(x,y)))
        
    else:
        series = df.reset_index().set_index(np.arange(0, duration, 1.))
        new_index = pd.Index(np.arange(0, duration, duration/base))
    
    upsampled = series.reindex(new_index)
    
    return upsampled.interpolate(method='linear')

In [None]:
def sliding_window(df, duration, stride):
    start = 0
    end = duration
    length = len(df)
    while end < length:
        window = df.iloc[start:end]
        dates = window.index.values
        prices = upsample(window, start, end)
        start += stride
        end += stride
        prices.plot()
        plt.show()

In [None]:
raw_df = pd.DataFrame()
smooth_df = pd.DataFrame()

In [None]:
for i in range(10000):
    f = FBM(n=119, hurst=0.75, length=1, method='daviesharte')
    ts = normalize(f.fbm())
    raw_df[i] = ts
    smooth_df[i] = savgol_filter(ts,9,3)
#plt.plot(f.times(), ts)
#plt.plot(f.times(), savgol_filter(ts, 9, 3))

In [None]:
#for n in range(40,140,20):
count = 0
while count < 1000:
    #window = np.arange(39,139,20)
    #n = int(np.random.choice(window))
    n = 39
    hurst = np.random.uniform(.6,.7)
    #slices = int(np.ceil(np.sqrt(n/2)) // 2 * 2 + 1)
    slices=5
    f = FBM(n=n, hurst=hurst, length=n, method='daviesharte')
    ts = pd.Series(normalize(f.fbm()))

    max_min = find_max_min(ts, slices)
    patterns = find_patterns(max_min, .03, .05, 0)
    count += 1
    if patterns:
        print(count, patterns)
        plt.plot(f.times(), ts)
        plt.plot(f.times(), savgol_filter(ts, slices, 3))
        max_min.plot(style='ro')
        plt.show()

In [None]:
elif abs(slope5 - slope3) <= slope_threshold*abs(slope3):
            
            #Right angle
            if (abs(e2 - e4) <= linear_threshold*np.mean([e2,e4])):
        
                # Right Triangle Bottom (Bullish Reversal)
                if (e1 < e2) and (e3 < e4) and (e1 < e3) and (e3 < e5) and (e0 > e2):
                    patterns['right_tri_bottom'].append((window.index[1], window.index[-1]))

                # Right Triangle Top (Bearish Reversal)
                elif (e1 > e2) and (e3 > e4) and (e1 > e3) and (e3 > e5) and (e0 < e2):
                    patterns['right_tri_top'].append((window.index[1], window.index[-1]))

                
                # Right Broad Bottom (Bullish Reversal)
                elif (e1 < e2) and (e3 < e4) and (e1 > e3) and (e3 > e5) and (e0 > e2):
                    patterns['right_broad_bottom'].append((window.index[1], window.index[-1]))
                    
                # Right Broad Top (Bearish Reversal)
                elif (e1 > e2) and (e3 > e4) and (e1 < e3) and (e3 < e5) and (e0 < e2):
                    patterns['right_broad_top'].append((window.index[1], window.index[-1]))

                
            # Broad Bottom (Bullish Reversal)
            elif (e1 > e3) and (e3 > e5) and (e2 > e1) and (e4 > e2) and (e0 > e2):
                patterns['broad_bottom'].append((window.index[1], window.index[-1]))

            # Broad Top (Bearish Reversal)
            elif (e1 < e3) and (e3 < e5) and (e2 < e1) and (e4 < e2) and (e0 < e2):
                patterns['broad_top'].append((window.index[1], window.index[-1]))

            
            # Wedge Bottom (Bearish Reversal)
            elif (e1 < e3) and (e3 < e5) and (e1 < e2) and (e4 < e2) and (e0 > e2) and \
                (abs(abs(slope5) - abs(slope4)) <= 0.03*np.mean([abs(slope5),abs(slope4)])):
                patterns['wedge_bottom'].append((window.index[1], window.index[-1]))

            # Wedge Top (Bearish Reversal)
            elif (e1 > e3) and (e3 > e5) and (e1 > e2) and (e4 > e2) and (e0 < e2) and \
                (abs(abs(slope5) - abs(slope4)) <= 0.03*np.mean([abs(slope5),abs(slope4)])):
                patterns['wedge_top'].append((window.index[1], window.index[-1]))
                

            # Channel Down (Bearish Continuation)
            #elif (e1 < e2) and (e3 < e4) and (e1 > e3) and (e3 > e5) and (e2 > e4):
                #patterns['ch_down'].append((window.index[0],window.index[-1]))

            # Channel Up (Bullish Continuation)
            #elif (e1 > e2) and (e3 > e4) and (e1 < e3) and (e3 < e5) and (e2 < e4):
                #patterns['ch_up'].append((window.index[0],window.index[-1]))
                
        #Horizontal patterns
        elif (abs(e1 - e3) <= linear_threshold*np.mean([e1,e3])) and \
            (abs(e1 - e5) <= linear_threshold*np.mean([e1,e3])) and \
            (abs(e3 - e5) <= linear_threshold*np.mean([e1,e3])):


            # Ascending Triangle (Bullish Continuation)
            elif (e1 > e2) and (e3 > e4) and (e2 < e4) and (e5 > e4) and (e0 < e2):
                patterns['asc_tri'].append((window.index[1], window.index[-1]))

            # Descending Triangle (Bearish Continuation)
            elif (e1 < e2) and (e3 < e4) and (e2 > e4) and (e5 < e4) and (e0 > e2):
                patterns['desc_tri'].append((window.index[1], window.index[-1]))
                
        if patterns:
            return patterns

In [None]:
rtop_g1 = np.mean([e1,e3,e5])
rtop_g2 = np.mean([e2,e4])

# Rectangle Top
elif (e1 > e2) and (abs(e1-rtop_g1)/rtop_g1 < 0.0075) and \
    (abs(e3-rtop_g1)/rtop_g1 < 0.0075) and (abs(e5-rtop_g1)/rtop_g1 < 0.0075) and \
    (abs(e2-rtop_g2)/rtop_g2 < 0.0075) and (abs(e4-rtop_g2)/rtop_g2 < 0.0075) and \
    (min(e1, e3, e5) > max(e2, e4)):
        patterns['RTOP'].append((window.index[0], window.index[-1]))

# Rectangle Bottom
elif (e1 < e2) and (abs(e1-rtop_g1)/rtop_g1 < 0.0075) and \
    (abs(e3-rtop_g1)/rtop_g1 < 0.0075) and (abs(e5-rtop_g1)/rtop_g1 < 0.0075) and \
    (abs(e2-rtop_g2)/rtop_g2 < 0.0075) and (abs(e4-rtop_g2)/rtop_g2 < 0.0075) and \
    (max(e1, e3, e5) > min(e2, e4)):
        patterns['RBOT'].append((window.index[0], window.index[-1]))

In [None]:
if direction == 'top':
    if reference_slope > 0:
        if reference_slope*(1-outer_slope_threshold) < slope and slope < reference_slope*(1+inner_slope_threshold):
            extrema.append(e)
            indicies.append(index)
    elif reference_slope < 0:
        if reference_slope*(1+inner_slope_threshold) < slope and slope < reference_slope*(1-outer_slope_threshold):
            extrema.append(e)
            indicies.append(index)


else:
    if reference_slope > 0:
        if reference_slope*(1-inner_slope_threshold) < slope and slope < reference_slope*(1+outer_slope_threshold):
            extrema.append(e)
            indicies.append(index)
    elif reference_slope < 0:
        if reference_slope*(1+outer_slope_threshold) < slope and slope < reference_slope*(1-inner_slope_threshold):
            extrema.append(e)
            indicies.append(index)

if reference_slope == 0:
    if abs(slope) <= linear_threshold:
        extrema.append(e)
        indicies.append(index)





slope = ((e - extrema[sign]) / (transform(index) - transform(indicies[sign])))
if direction == 'top':
    if slope_1_3 > 0:
        if slope_1_3*(1-outer_slope_threshold) < slope and slope < slope_1_3*(1+inner_slope_threshold):
            extrema.append(e)
            indicies.append(index)
    elif slope_1_3 < 0:
        if slope_1_3*(1+inner_slope_threshold) < slope and slope < slope_1_3*(1-outer_slope_threshold):
            extrema.append(e)
            indicies.append(index)


else:
    if slope_1_3 > 0:
        if slope_1_3*(1-inner_slope_threshold) < slope and slope < slope_1_3*(1+outer_slope_threshold):
            extrema.append(e)
            indicies.append(index)
    elif slope_1_3 < 0:
        if slope_1_3*(1+outer_slope_threshold) < slope and slope < slope_1_3*(1-inner_slope_threshold):
            extrema.append(e)
            indicies.append(index)

if slope_1_3 == 0:
    if abs(slope) <= linear_threshold:
        extrema.append(e)
        indicies.append(index)

# Even extrema
elif k % 2 == 1:
slope = ((e - extrema) / (transform(index) - transform(indicies[1])))

if slope_2_4 > 0:
    if slope_2_4*(1-outer_slope_threshold) < slope and slope < slope_2_4*(1+inner_slope_threshold):
        extrema.append(e)
        indicies.append(index)
elif slope_2_4 < 0:
    if slope_2_4*(1+inner_slope_threshold) < slope and slope < slope_2_4*(1-outer_slope_threshold):
        extrema.append(e)
        indicies.append(index)
else:
    if abs(slope) <= linear_threshold:
        extrema.append(e)
        indicies.append(index)

else:
break