In [1]:
! pip install cbpro
! pip install pandas
! pip install numpy
! pip install plotly



In [2]:
import talib
import cbpro
import pandas as pd
import requests
import numpy as np

In [59]:
# The next few cells are different ways we can collect bitcoin candlestick info

# Strategy 1: cryptocompare.com
# we can get data from 01-10-2016 to today but only in 1 day candlesticks or more
# to get more granular candles, you need to get an enterprise account
link = "https://min-api.cryptocompare.com/data/histoday?fsym=BTC&tsym=USD&limit=2000&aggregate=1"
historical_get = requests.get(link)
historical_json = historical_get.json()
historical_dict = historical_json['Data']
df = pd.DataFrame(historical_dict,
                             columns=['close', 'high', 'low', 'open', 'time', 'volumefrom', 'volumeto'],
                             dtype='float64')
posix_time = pd.to_datetime(df['time'], unit='s')
df.insert(0, "Date", posix_time)
df.drop("time", axis = 1, inplace = True)
df

Unnamed: 0,Date,close,high,low,open,volumefrom,volumeto
0,2016-01-14,429.13,433.74,427.01,432.18,46172.51,1.994873e+07
1,2016-01-15,372.26,430.15,364.71,429.13,268141.73,1.070606e+08
2,2016-01-16,385.04,386.72,350.39,372.26,183962.03,6.829711e+07
3,2016-01-17,382.47,390.72,377.71,385.04,66089.34,2.546438e+07
4,2016-01-18,384.40,387.29,373.21,382.47,69301.33,2.654647e+07
...,...,...,...,...,...,...,...
1996,2021-07-02,33804.54,33968.47,32715.18,33543.11,33082.40,1.102040e+09
1997,2021-07-03,34682.84,34944.91,33333.05,33804.54,17319.23,5.951172e+08
1998,2021-07-04,35287.97,35945.36,34390.95,34682.84,15950.93,5.638916e+08
1999,2021-07-05,33702.79,35290.80,33151.51,35287.97,31526.39,1.072690e+09


In [84]:
# Strategy 2: coinbase public API
# we can only get 300 cells of data at a time, but we can specify the granularity and the start and end time
# so to get a bunch of info, we can use a for loop
public_client = cbpro.PublicClient()
hist_rates = public_client.get_product_historic_rates('BTC-USD', granularity=300, start='2021-07-05', end='2021-07-06')
# create dataframe with candlestick data, convert epoch time column into something useful
df = pd.DataFrame(hist_rates, columns=['time', 'low', 'high', 'open', 'close', 'volume'])
posix_time = pd.to_datetime(df['time'], unit='s')
df.insert(0, "Date", posix_time)
df.drop("time", axis = 1, inplace = True)
df


Unnamed: 0,Date,low,high,open,close,volume
0,2021-07-06 00:00:00,33655.15,33824.68,33697.78,33744.48,19.201527
1,2021-07-05 23:55:00,33683.11,33819.32,33772.28,33697.78,21.662950
2,2021-07-05 23:50:00,33728.04,33928.03,33915.72,33764.32,37.385043
3,2021-07-05 23:45:00,33845.65,33938.44,33845.65,33917.78,18.122880
4,2021-07-05 23:40:00,33844.81,33984.16,33984.16,33850.20,20.738017
...,...,...,...,...,...,...
284,2021-07-05 00:20:00,34910.82,35018.34,34918.91,34997.87,50.736483
285,2021-07-05 00:15:00,34882.05,35004.27,34967.49,34918.91,24.857063
286,2021-07-05 00:10:00,34938.88,35022.94,34942.67,34970.98,34.058740
287,2021-07-05 00:05:00,34914.41,35047.70,34973.49,34942.66,85.480285


In [130]:
from datetime import date
from datetime import time
from datetime import datetime
from datetime import timedelta

historic_rates = []
end_date = datetime.now()
start_date = end_date - timedelta(days=1)
for i in range(0, 100):
    historic_rates = historic_rates + public_client.get_product_historic_rates('BTC-USD', granularity=3600, start=start_date, end=end_date)
    end_date = start_date
    start_date = start_date - timedelta(days=1)
historic_rates


df = pd.DataFrame(historic_rates, columns=['time', 'low', 'high', 'open', 'close', 'volume'])
posix_time = pd.to_datetime(df['time'], unit='s')
df.insert(0, "Date", posix_time)
df.drop("time", axis = 1, inplace = True)
df



Unnamed: 0,Date,low,high,open,close,volume
0,2021-07-06 19:00:00,33767.00,34177.00,33848.30,34032.84,471.600212
1,2021-07-06 18:00:00,33736.84,34100.00,33948.65,33848.31,471.760907
2,2021-07-06 17:00:00,33526.05,34058.71,34058.71,33943.62,717.703997
3,2021-07-06 16:00:00,33761.01,34150.00,34051.33,34058.69,455.980338
4,2021-07-06 15:00:00,33874.80,34173.01,34141.96,34047.00,499.605355
...,...,...,...,...,...,...
2395,2021-03-29 00:00:00,55290.00,55908.93,55781.22,55389.00,452.041489
2396,2021-03-28 23:00:00,55505.61,55876.81,55596.01,55778.82,392.760232
2397,2021-03-28 22:00:00,54998.52,55647.70,55125.01,55596.01,436.329319
2398,2021-03-28 21:00:00,54807.87,55332.88,54839.22,55125.01,361.650380


In [131]:
# extract Open, high, low, close
op = df['open']
hi = df['high']
lo = df['low']
cl = df['close']

In [132]:
candle_names = talib.get_function_groups()['Pattern Recognition']

In [133]:
# create columns for each pattern
pd.get_option("display.max_columns")
pd.set_option("display.max_columns", None)
for candle in candle_names:
    # below is same as;
    # df["CDL3LINESTRIKE"] = talib.CDL3LINESTRIKE(op, hi, lo, cl)
    df[candle] = getattr(talib, candle)(op, hi, lo, cl)
    

In [134]:
# add new columns 
# in order to develop an ML algorithm, we need to understand how the crypto performs after a certain
# indicator hits. To do this, we will plot what the high price is 1, 2, 3, 5, 8, 13, and 21 blocks after
# a given block
# with this information we will be able to develop a regression model of some sort
df['growth_1'] = np.nan
df['growth_2'] = np.nan
df['growth_3'] = np.nan
df['growth_5'] = np.nan
df['growth_8'] = np.nan
df['growth_13'] = np.nan
df['growth_21'] = np.nan


# this adds the data mentioned above
for index, row in df.iterrows():
    close_price = df.loc[index, 'close']
    prices = []
    try:
        prices.append(df.loc[index+1, 'high'])
        df.loc[index, 'growth_1'] = (prices[-1] - close_price)/close_price
    except:
        pass
    try:
        prices.append(df.loc[index+2, 'high'])
        prices.sort()
        df.loc[index, 'growth_2'] = (prices[-1] - close_price)/close_price
    except:
        pass
    try:
        prices.append(df.loc[index+3, 'high'])
        prices.sort()
        df.loc[index, 'growth_3'] = (prices[-1] - close_price)/close_price
    except:
        pass
    try:
        prices.append(df.loc[index+5, 'high'])
        prices.sort()
        df.loc[index, 'growth_5'] = (prices[-1] - close_price)/close_price
    except:
        pass
    try:
        prices.append(df.loc[index+8, 'high'])
        prices.sort()
        df.loc[index, 'growth_8'] = (prices[-1] - close_price)/close_price
    except:
        pass
    try:
        prices.append(df.loc[index+13, 'high'])
        prices.sort()
        df.loc[index, 'growth_13'] = (prices[-1] - close_price)/close_price
    except:
        pass
    try:
        prices.append(df.loc[index+21, 'high'])
        prices.sort()
        df.loc[index, 'growth_21'] = (prices[-1] - close_price)/close_price
    except:
        pass
    
df

Unnamed: 0,Date,low,high,open,close,volume,CDL2CROWS,CDL3BLACKCROWS,CDL3INSIDE,CDL3LINESTRIKE,CDL3OUTSIDE,CDL3STARSINSOUTH,CDL3WHITESOLDIERS,CDLABANDONEDBABY,CDLADVANCEBLOCK,CDLBELTHOLD,CDLBREAKAWAY,CDLCLOSINGMARUBOZU,CDLCONCEALBABYSWALL,CDLCOUNTERATTACK,CDLDARKCLOUDCOVER,CDLDOJI,CDLDOJISTAR,CDLDRAGONFLYDOJI,CDLENGULFING,CDLEVENINGDOJISTAR,CDLEVENINGSTAR,CDLGAPSIDESIDEWHITE,CDLGRAVESTONEDOJI,CDLHAMMER,CDLHANGINGMAN,CDLHARAMI,CDLHARAMICROSS,CDLHIGHWAVE,CDLHIKKAKE,CDLHIKKAKEMOD,CDLHOMINGPIGEON,CDLIDENTICAL3CROWS,CDLINNECK,CDLINVERTEDHAMMER,CDLKICKING,CDLKICKINGBYLENGTH,CDLLADDERBOTTOM,CDLLONGLEGGEDDOJI,CDLLONGLINE,CDLMARUBOZU,CDLMATCHINGLOW,CDLMATHOLD,CDLMORNINGDOJISTAR,CDLMORNINGSTAR,CDLONNECK,CDLPIERCING,CDLRICKSHAWMAN,CDLRISEFALL3METHODS,CDLSEPARATINGLINES,CDLSHOOTINGSTAR,CDLSHORTLINE,CDLSPINNINGTOP,CDLSTALLEDPATTERN,CDLSTICKSANDWICH,CDLTAKURI,CDLTASUKIGAP,CDLTHRUSTING,CDLTRISTAR,CDLUNIQUE3RIVER,CDLUPSIDEGAP2CROWS,CDLXSIDEGAP3METHODS,growth_1,growth_2,growth_3,growth_5,growth_8,growth_13,growth_21
0,2021-07-06 19:00:00,33767.00,34177.00,33848.30,34032.84,471.600212,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.001973,0.001973,0.003443,0.006250,0.007294,0.023059,0.023059
1,2021-07-06 18:00:00,33736.84,34100.00,33948.65,33848.31,471.760907,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.006216,0.008913,0.009593,0.019884,0.019884,0.029749,0.029749
2,2021-07-06 17:00:00,33526.05,34058.71,34058.71,33943.62,717.703997,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.006080,0.006758,0.008895,0.016391,0.031661,0.031661,0.031661
3,2021-07-06 16:00:00,33761.01,34150.00,34051.33,34058.69,455.980338,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.003357,0.005486,0.013584,0.013584,0.030574,0.030574,0.030574
4,2021-07-06 15:00:00,33874.80,34173.01,34141.96,34047.00,499.605355,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.005831,0.013933,0.013933,0.013933,0.026467,0.026467,0.026467
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2395,2021-03-29 00:00:00,55290.00,55908.93,55781.22,55389.00,452.041489,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.008807,0.008807,0.008807,,,,
2396,2021-03-28 23:00:00,55505.61,55876.81,55596.01,55778.82,392.760232,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,-0.002351,-0.002351,-0.002351,,,,
2397,2021-03-28 22:00:00,54998.52,55647.70,55125.01,55596.01,436.329319,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.004733,-0.004733,,,,,
2398,2021-03-28 21:00:00,54807.87,55332.88,54839.22,55125.01,361.650380,0,0,0,0,0,0,0,0,0,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.001847,,,,,,


In [135]:
# this dictionary will contain statistical info about each candlestick
frame = df
candle_stats = {}
for candle_name in candle_names:
    candles = df[['Date', candle_name, 'growth_1', 'growth_2', 'growth_3', 'growth_5', 'growth_8', 'growth_13', 'growth_21']]
    
    candle_occurences = candles[candles[candle_name] == 100]
    # removes all rows with nan values in cells
    candle_occurences = candle_occurences.dropna()
    
    
    if candle_occurences.size > 100:
        sum_1_growth = 0
        sum_2_growth = 0
        sum_3_growth = 0
        sum_5_growth = 0
        sum_8_growth = 0
        sum_13_growth = 0
        sum_21_growth = 0
        count = 0
        for index, row in candle_occurences.iterrows():
            sum_1_growth += candle_occurences.loc[index, 'growth_1']
            sum_2_growth += candle_occurences.loc[index, 'growth_2']
            sum_3_growth += candle_occurences.loc[index, 'growth_3']
            sum_5_growth += candle_occurences.loc[index, 'growth_5']
            sum_8_growth += candle_occurences.loc[index, 'growth_8']
            sum_13_growth += candle_occurences.loc[index, 'growth_13']
            sum_21_growth += candle_occurences.loc[index, 'growth_21']
            count += 1
        candle_stats[candle_name] = [count, 
                                     sum_1_growth/count, 
                                     sum_2_growth/count, 
                                     sum_3_growth/count,
                                     sum_5_growth/count,
                                     sum_8_growth/count,
                                     sum_13_growth/count,
                                     sum_21_growth/count]

candle_stats
        

    
    

{'CDLBELTHOLD': [164,
  -0.005628922781306829,
  -0.0033829473547387946,
  -0.0006570101411802079,
  0.002213458886144424,
  0.005347931368930603,
  0.009130204175435086,
  0.01881585914669624],
 'CDLCLOSINGMARUBOZU': [128,
  -0.004075659339415659,
  -0.0014094099437396898,
  0.0008313601700849606,
  0.004453552160668835,
  0.007841328817658073,
  0.012610625140600292,
  0.01857258673132287],
 'CDLDOJI': [331,
  0.00688350608178967,
  0.01060644714656855,
  0.012927793098065576,
  0.01637416228084587,
  0.020705694419663766,
  0.02517482480597589,
  0.03198106384650725],
 'CDLDRAGONFLYDOJI': [31,
  0.003993117087581507,
  0.006395177360964171,
  0.008276561798631938,
  0.01218862705486218,
  0.015616026834280414,
  0.02083940110022715,
  0.03308391888994296],
 'CDLENGULFING': [105,
  -0.001632499905117702,
  0.0007615259031742495,
  0.00308959533458174,
  0.007378518428838818,
  0.011395096460230661,
  0.01894608211052548,
  0.026637335014889404],
 'CDLGRAVESTONEDOJI': [24,
  0.0073326

In [138]:
# now, we will rank how well bitcoin performs after a certain candlestick pattern is detected.
# we will do this by taking the average percent gain from the close price to the high price
# in the 1st, 2nd, 3rd, 5th, 8th, 13th, and 21st and ranking their performance over those intervals
fib = [1, 2, 3, 5, 8, 13, 21]
ranks_to_show = 11
for index in range(1, 8):
    print("Showing AVG Gain Rankings " + str(fib[index-1]) + " candles after the candlestick pattern was detected")
    sorted_candles = sorted(candle_stats.items(), key=lambda e: e[1][index])
    for index2 in range(1, ranks_to_show):
        print(str(index2) + ". " + sorted_candles[-index2][0] +  " up " + str(sorted_candles[-index2][1][index]) + " % on AVG, Occured " + str(sorted_candles[-index2][1][0]) + " times")
    
    


        
        

Showing AVG Gain Rankings 1 candles after the candlestick pattern was detected
1. CDLMATCHINGLOW up 0.012451812423452497 % on AVG, Occured 41 times
2. CDLHIGHWAVE up 0.007536399369733611 % on AVG, Occured 160 times
3. CDLGRAVESTONEDOJI up 0.007332663972578278 % on AVG, Occured 24 times
4. CDLLONGLEGGEDDOJI up 0.00688350608178967 % on AVG, Occured 331 times
5. CDLDOJI up 0.00688350608178967 % on AVG, Occured 331 times
6. CDLRICKSHAWMAN up 0.006767701141668271 % on AVG, Occured 247 times
7. CDLSPINNINGTOP up 0.006473331619614726 % on AVG, Occured 269 times
8. CDLTAKURI up 0.004216150810403202 % on AVG, Occured 32 times
9. CDLDRAGONFLYDOJI up 0.003993117087581507 % on AVG, Occured 31 times
10. CDLHAMMER up 0.003755286522361486 % on AVG, Occured 65 times
Showing AVG Gain Rankings 2 candles after the candlestick pattern was detected
1. CDLMATCHINGLOW up 0.014191914711012218 % on AVG, Occured 41 times
2. CDLHIGHWAVE up 0.012813728599266046 % on AVG, Occured 160 times
3. CDLSPINNINGTOP up 0.0