# Prepare Candlestick Data to CNN

In [1]:
import pandas as pd
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import logging
import shutil

## Set the logger

In [2]:
def setup_logger(logger_name, log_file, level=logging.INFO):
    l = logging.getLogger(logger_name)
    formatter = logging.Formatter('%(name)s|%(asctime)s|%(levelname)s|[%(filename)s:%(lineno)s - %(funcName)20s() ]|%(message)s')
    fileHandler = logging.FileHandler(log_file, mode='w')
    fileHandler.setFormatter(formatter)
    streamHandler = logging.StreamHandler()
    streamHandler.setFormatter(formatter)

    l.setLevel(level)
    l.addHandler(fileHandler)
    l.addHandler(streamHandler)

In [3]:
setup_logger('root', 'logs/prepare_candlesticks_to_CNN.log')

log_results = logging.getLogger('root')

## Get the data

In [4]:
input_data_path = '../data'
input_data_filename = 'binance_BTCUSDT_1m_from_2020_01_01_to_2021_12_31_candlesticks_signals_processed'
input_data_extension = ".csv"
full_path_input_data = os.path.join(input_data_path, input_data_filename + input_data_extension)

In [5]:
df = pd.read_csv(full_path_input_data)

In [6]:
df.head(10)

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,...,CDLINVERTEDHAMMER_NEW,CDLHAMMER_NEW,CDLPIERCING_NEW,CDLMORNINGSTAR_NEW,CDLENGULFINGBULLISH_NEW,CDLSHOOTINGSTAR_NEW,CDLHANGINGMAN_NEW,CDLDARKCLOUDCOVER_NEW,CDLEVENINGSTAR_NEW,CDLENGULFINGBEARISH_NEW
0,1577836800000,7195.24,7196.25,7183.14,7186.68,51.642812,1577836859999,371233.518355,493,19.59823,...,0,0,0,0,0,0,0,0,0,0
1,1577836860000,7187.67,7188.06,7182.2,7184.03,7.248148,1577836919999,52080.127788,135,2.031772,...,0,0,0,0,0,0,0,0,0,0
2,1577836920000,7184.41,7184.71,7180.26,7182.43,11.681677,1577836979999,83903.741635,202,5.479244,...,0,0,0,0,0,0,0,0,0,0
3,1577836980000,7183.83,7188.94,7182.49,7185.94,10.025391,1577837039999,72033.226649,136,3.294966,...,0,0,0,0,0,0,0,0,0,0
4,1577837040000,7185.54,7185.54,7178.64,7179.78,14.911105,1577837099999,107066.521825,161,2.369033,...,0,0,0,0,0,0,0,0,0,0
5,1577837100000,7179.76,7182.51,7178.2,7179.99,12.463243,1577837159999,89484.363535,143,3.652785,...,0,0,0,0,0,0,0,0,0,0
6,1577837160000,7180.0,7182.0,7179.99,7182.0,3.573774,1577837219999,25662.34791,72,1.678855,...,0,0,0,0,0,0,0,0,0,0
7,1577837220000,7181.7,7183.77,7180.91,7183.66,14.470782,1577837279999,103928.870431,147,7.166713,...,0,0,0,0,0,0,0,0,0,0
8,1577837280000,7183.9,7187.74,7183.45,7187.68,12.842443,1577837339999,92277.914467,134,4.996467,...,0,0,0,0,0,0,0,0,0,0
9,1577837340000,7187.68,7191.77,7186.02,7191.07,16.014983,1577837399999,115127.764021,135,7.271693,...,0,0,0,0,0,0,0,0,0,0


In [7]:
df.tail()

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volumne,number_of_trades,taker_buy_base_asset_volume,...,CDLINVERTEDHAMMER_NEW,CDLHAMMER_NEW,CDLPIERCING_NEW,CDLMORNINGSTAR_NEW,CDLENGULFINGBULLISH_NEW,CDLSHOOTINGSTAR_NEW,CDLHANGINGMAN_NEW,CDLDARKCLOUDCOVER_NEW,CDLEVENINGSTAR_NEW,CDLENGULFINGBEARISH_NEW
1048951,1640908560000,47098.19,47147.14,47098.18,47134.56,6.49057,1640908619999,305861.626894,387,4.02327,...,0,0,0,0,0,0,0,0,0,0
1048952,1640908620000,47134.56,47161.69,47129.96,47144.56,4.78281,1640908679999,225491.372765,324,1.69518,...,0,0,0,0,0,0,0,0,0,0
1048953,1640908680000,47144.56,47147.89,47122.96,47139.98,4.15772,1640908739999,195959.991784,249,1.43431,...,0,0,0,0,0,0,0,0,0,0
1048954,1640908740000,47139.98,47141.73,47116.71,47120.87,3.97618,1640908799999,187407.572887,336,0.88827,...,0,0,0,0,0,0,0,0,0,0
1048955,1640908800000,47120.88,47121.3,47080.0,47091.29,14.37694,1640908859999,677226.182537,514,3.85955,...,0,0,0,0,0,0,0,0,0,0


In [8]:
df.columns

Index(['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time',
       'quote_asset_volumne', 'number_of_trades',
       'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore',
       'formatted_open_time', 'formatted_close_time', 'upper_shadow',
       'lower_shadow', 'real_body', 'CDLINVERTEDHAMMER', 'CDLHAMMER',
       'CDLPIERCING', 'CDLMORNINGSTAR', 'CDLSHOOTINGSTAR', 'CDLHANGINGMAN',
       'CDLDARKCLOUDCOVER', 'CDLEVENINGSTAR', 'CDLENGULFING',
       'CDLENGULFINGBULLISH', 'CDLENGULFINGBEARISH', 'CDLINVERTEDHAMMER_NEW',
       'CDLHAMMER_NEW', 'CDLPIERCING_NEW', 'CDLMORNINGSTAR_NEW',
       'CDLENGULFINGBULLISH_NEW', 'CDLSHOOTINGSTAR_NEW', 'CDLHANGINGMAN_NEW',
       'CDLDARKCLOUDCOVER_NEW', 'CDLEVENINGSTAR_NEW',
       'CDLENGULFINGBEARISH_NEW'],
      dtype='object')

In [9]:
df.shape[0]

1048956

In [10]:
df.index = pd.DatetimeIndex(df['formatted_open_time'])

In [11]:
df_selected_columns = df[['open_time', 'close', 'upper_shadow', 'lower_shadow', 'real_body', 
'CDLINVERTEDHAMMER_NEW', 'CDLHAMMER_NEW', 'CDLMORNINGSTAR_NEW', 'CDLSHOOTINGSTAR_NEW', 
'CDLHANGINGMAN_NEW', 'CDLEVENINGSTAR_NEW', 'CDLENGULFINGBULLISH_NEW', 'CDLENGULFINGBEARISH_NEW']]

In [12]:
df_selected_columns.head(10)

Unnamed: 0_level_0,open_time,close,upper_shadow,lower_shadow,real_body,CDLINVERTEDHAMMER_NEW,CDLHAMMER_NEW,CDLMORNINGSTAR_NEW,CDLSHOOTINGSTAR_NEW,CDLHANGINGMAN_NEW,CDLEVENINGSTAR_NEW,CDLENGULFINGBULLISH_NEW,CDLENGULFINGBEARISH_NEW
formatted_open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-01-01 00:00:00,1577836800000,7186.68,1.01,3.54,8.56,0,0,0,0,0,0,0,0
2020-01-01 00:01:00,1577836860000,7184.03,0.39,1.83,3.64,0,0,0,0,0,0,0,0
2020-01-01 00:02:00,1577836920000,7182.43,0.3,2.17,1.98,0,0,0,0,0,0,0,0
2020-01-01 00:03:00,1577836980000,7185.94,3.0,1.34,2.11,0,0,0,0,0,0,0,0
2020-01-01 00:04:00,1577837040000,7179.78,0.0,1.14,5.76,0,0,0,0,0,0,0,0
2020-01-01 00:05:00,1577837100000,7179.99,2.52,1.56,0.23,0,0,0,0,0,0,0,0
2020-01-01 00:06:00,1577837160000,7182.0,0.0,0.01,2.0,0,0,0,0,0,0,0,0
2020-01-01 00:07:00,1577837220000,7183.66,0.11,0.79,1.96,0,0,0,0,0,0,0,0
2020-01-01 00:08:00,1577837280000,7187.68,0.06,0.45,3.78,0,0,0,0,0,0,0,0
2020-01-01 00:09:00,1577837340000,7191.07,0.7,1.66,3.39,0,0,0,0,0,0,0,0


In [13]:
df_selected_columns.rename(columns = {'CDLINVERTEDHAMMER_NEW':'inverted_hammer', 'CDLHAMMER_NEW':'hammer', 'CDLMORNINGSTAR_NEW':'morning_star', 'CDLSHOOTINGSTAR_NEW':'shooting_star',
                                        'CDLHANGINGMAN_NEW':'hanging_man', 'CDLEVENINGSTAR_NEW':'evening_star', 'CDLENGULFINGBULLISH_NEW':'bullish_engulfing', 'CDLENGULFINGBEARISH_NEW':'bearish_engulfing'}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_selected_columns.rename(columns = {'CDLINVERTEDHAMMER_NEW':'inverted_hammer', 'CDLHAMMER_NEW':'hammer', 'CDLMORNINGSTAR_NEW':'morning_star', 'CDLSHOOTINGSTAR_NEW':'shooting_star',


In [14]:
df_selected_columns.head(10)

Unnamed: 0_level_0,open_time,close,upper_shadow,lower_shadow,real_body,inverted_hammer,hammer,morning_star,shooting_star,hanging_man,evening_star,bullish_engulfing,bearish_engulfing
formatted_open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-01-01 00:00:00,1577836800000,7186.68,1.01,3.54,8.56,0,0,0,0,0,0,0,0
2020-01-01 00:01:00,1577836860000,7184.03,0.39,1.83,3.64,0,0,0,0,0,0,0,0
2020-01-01 00:02:00,1577836920000,7182.43,0.3,2.17,1.98,0,0,0,0,0,0,0,0
2020-01-01 00:03:00,1577836980000,7185.94,3.0,1.34,2.11,0,0,0,0,0,0,0,0
2020-01-01 00:04:00,1577837040000,7179.78,0.0,1.14,5.76,0,0,0,0,0,0,0,0
2020-01-01 00:05:00,1577837100000,7179.99,2.52,1.56,0.23,0,0,0,0,0,0,0,0
2020-01-01 00:06:00,1577837160000,7182.0,0.0,0.01,2.0,0,0,0,0,0,0,0,0
2020-01-01 00:07:00,1577837220000,7183.66,0.11,0.79,1.96,0,0,0,0,0,0,0,0
2020-01-01 00:08:00,1577837280000,7187.68,0.06,0.45,3.78,0,0,0,0,0,0,0,0
2020-01-01 00:09:00,1577837340000,7191.07,0.7,1.66,3.39,0,0,0,0,0,0,0,0


In [15]:
df_selected_columns.describe()

Unnamed: 0,open_time,close,upper_shadow,lower_shadow,real_body,inverted_hammer,hammer,morning_star,shooting_star,hanging_man,evening_star,bullish_engulfing,bearish_engulfing
count,1048956.0,1048956.0,1048956.0,1048956.0,1048956.0,1048956.0,1048956.0,1048956.0,1048956.0,1048956.0,1048956.0,1048956.0,1048956.0
mean,1609381000000.0,29166.95,9.66368,9.79552,22.24297,0.0002602588,0.001430947,0.0002640721,-0.004060228,-0.01237707,-0.002392855,0.001899031,-0.01831345
std,18207550000.0,19660.29,19.41676,18.89997,35.62057,0.01613045,0.0378008,0.01624816,0.06359045,0.1105617,0.04885828,0.0435365,0.1340824
min,1577837000000.0,3810.78,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,0.0,-1.0
25%,1593625000000.0,9686.18,0.51,0.54,2.92,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1609381000000.0,28706.91,2.93,3.01,9.15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1625151000000.0,47886.76,11.14,11.18,28.52,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1640909000000.0,69000.0,2698.84,1398.27,2157.91,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0


### Check lines with at least one non-zero value

In [16]:
list_candlestick_columns = ['inverted_hammer', 'hammer', 'morning_star', 'shooting_star',
            'hanging_man', 'evening_star', 'bullish_engulfing', 'bearish_engulfing']

In [17]:
abs(df_selected_columns[list_candlestick_columns].sum())

inverted_hammer        273
hammer                1501
morning_star           277
shooting_star         4259
hanging_man          12983
evening_star          2510
bullish_engulfing     1992
bearish_engulfing    19210
dtype: int64

In [18]:
abs(df_selected_columns[list_candlestick_columns].sum()).sum()

43005

In [19]:
filter_pos = ((df_selected_columns[list_candlestick_columns] == 1)).any(axis=1)
df_selected_columns[filter_pos]

Unnamed: 0_level_0,open_time,close,upper_shadow,lower_shadow,real_body,inverted_hammer,hammer,morning_star,shooting_star,hanging_man,evening_star,bullish_engulfing,bearish_engulfing
formatted_open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-01-01 08:20:00,1577866800000,7190.86,0.25,0.00,3.53,0,0,0,0,0,0,1,0
2020-01-01 23:17:00,1577920620000,7182.69,0.89,1.06,2.68,0,0,0,0,0,0,1,0
2020-01-02 01:37:00,1577929020000,7191.80,0.18,0.26,3.56,0,0,0,0,0,0,1,0
2020-01-02 01:50:00,1577929800000,7189.88,1.41,2.22,1.33,0,0,0,0,0,0,1,0
2020-01-02 20:23:00,1577996580000,6957.43,0.08,7.43,0.97,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-30 13:17:00,1640870220000,47470.84,9.65,3.58,40.86,0,0,0,0,0,0,1,0
2021-12-30 16:05:00,1640880300000,47187.96,12.04,0.00,12.26,0,0,0,0,0,0,1,0
2021-12-30 20:07:00,1640894820000,47532.84,10.76,0.13,32.44,0,0,0,0,0,0,1,0
2021-12-30 21:05:00,1640898300000,47042.41,0.01,122.97,13.67,0,1,0,0,0,0,0,0


In [20]:
filter_neg = ((df_selected_columns[list_candlestick_columns] == -1)).any(axis=1)
df_selected_columns[filter_neg]

Unnamed: 0_level_0,open_time,close,upper_shadow,lower_shadow,real_body,inverted_hammer,hammer,morning_star,shooting_star,hanging_man,evening_star,bullish_engulfing,bearish_engulfing
formatted_open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-01-01 00:50:00,1577839800000,7183.92,0.06,1.87,0.98,0,0,0,0,-1,0,0,0
2020-01-01 01:35:00,1577842500000,7211.61,0.26,0.00,7.50,0,0,0,0,0,0,0,-1
2020-01-01 01:41:00,1577842860000,7212.68,0.00,0.52,5.60,0,0,0,0,0,0,0,-1
2020-01-01 01:55:00,1577843700000,7209.03,0.62,0.82,0.72,0,0,0,0,0,0,0,-1
2020-01-01 02:06:00,1577844360000,7219.03,2.29,0.00,0.43,0,0,0,-1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-30 22:47:00,1640904420000,46967.01,19.43,19.27,66.00,0,0,0,0,0,0,0,-1
2021-12-30 22:49:00,1640904540000,46993.72,2.67,56.23,3.59,0,0,0,0,-1,0,0,0
2021-12-30 23:15:00,1640906100000,47133.76,6.08,44.86,23.40,0,0,0,0,-1,0,0,0
2021-12-30 23:53:00,1640908380000,47159.84,0.16,48.37,4.21,0,0,0,0,-1,0,0,0


In [21]:
# list_new_column_name_pos = ['CDLINVERTEDHAMMER_NEW', 'CDLHAMMER_NEW', 'CDLMORNINGSTAR_NEW', 'CDLENGULFINGBULLISH_NEW']
# list_new_column_name_neg = ['CDLSHOOTINGSTAR_NEW', 'CDLHANGINGMAN_NEW', 'CDLEVENINGSTAR_NEW', 'CDLENGULFINGBEARISH_NEW']

In [22]:
def normalize_array(array):
    '''
    Receives an numpy array and returns a normalized one
    
    Input:
        array:
            type: numpy array
            example: array([7186.68, 7184.03, 7182.43, 7185.94, 7179.78, 7179.99, 7182., 7183.66, 7187.68, 7191.07])
    Output:
        normalized_array:
            type: numpy array
            example: array([0.61116032, 0.37643933, 0.23472099, 0.54561559, 0., 0.01860053, 0.19663419, 0.34366696, 0.69973428, 1.])
    '''
    try:
        min_array = array.min()
        max_array = array.max()
        normalize = lambda x: (x - min_array) / (max_array - min_array)

        normalized_array = [normalize(x) for x in array]

        return np.array(normalized_array)

    except Exception as e:
        log_results.error(f'array:{array}|min_array:{min_array}|max_array:{max_array}|normalized_array:{normalized_array}')
        print(e)    

In [23]:
def get_arc_cosine(normalized_array):
    '''
    Get an normalised array and calculate the arc cosine
    
    Input:
        normalized_array:
            type: numpy array
            example: array([0.61116032, 0.37643933, 0.23472099, 0.54561559, 0., 0.01860053, 0.19663419, 0.34366696, 0.69973428, 1.])
    Output:
        normalized_arccos_array
            type: numpy array
            example: array([0.9132706 , 1.18484643, 1.3338648 , 0.99367283, 1.57079633, 1.55219472, 1.37287243, 1.21997741, 0.79577085, 0.])    
    '''
    try:
        normalized_arccos_array = np.arccos(normalized_array)
        return normalized_arccos_array
    except Exception as e:
        log_results.error(f'normalized_array:{normalized_array}')
        print(e)

In [24]:
# array_test2 = np.array([1,10])

# array_sum = np.empty(0)
# for elem1 in np.nditer(array_test2):
#     for elem2 in np.nditer(array_test2):
#         array_sum = np.append(array_sum, elem1 + elem2)

# reshaped_array_sum = array_sum.reshape((array_test2.size,array_test2.size))
# print(reshaped_array_sum)

In [25]:
def get_GAF(array_timeseries):
    '''
    Receives a historical series and calculate the Gramian Angular Field Matrix
    
    Input:
        array_timeseries:
            type: numpy array
            example: array([7186.68, 7184.03, 7182.43, 7185.94, 7179.78, 7179.99, 7182., 7183.66, 7187.68, 7191.07])
    Output:
        gaf_matrix:
            type: numpy 2d array --> shape (array_timeseries.size, array_timeseries.size)
            example: array([[-2.52966129e-01, -5.03219818e-01, -6.25942178e-01,
                            -3.29852287e-01, -7.91506832e-01, -7.80001990e-01,
                            -6.55879168e-01, -5.33261801e-01, -1.37805193e-01,
                            6.11160319e-01],
                        [-5.03219818e-01, -7.16586866e-01, -8.12200871e-01,
                            -5.70999583e-01, -9.26441273e-01, -9.19279023e-01,
                            -8.34333447e-01, -7.40643451e-01, -3.98445115e-01,
                            3.76439327e-01],
                            .
                            .
                            .
                        [ 6.11160319e-01,  3.76439327e-01,  2.34720992e-01,
                            5.45615589e-01,  6.12323400e-17,  1.86005314e-02,
                            1.96634190e-01,  3.43666962e-01,  6.99734278e-01,
                            1.00000000e+00]])
    
    '''
    try:
        
        array_timeseries_normalized = normalize_array(array_timeseries)
        array_timeseries_normalized_arccos = get_arc_cosine(array_timeseries_normalized)
        
        array_sum = np.empty(0)
        for elem1 in np.nditer(array_timeseries_normalized_arccos):
            for elem2 in np.nditer(array_timeseries_normalized_arccos):
                array_sum = np.append(array_sum, elem1 + elem2)
        
        reshaped_array_sum = array_sum.reshape((array_timeseries.size, array_timeseries.size))
        
        gaf_matrix = np.cos(reshaped_array_sum)
        
        return gaf_matrix

    except Exception as e:
        log_results.error(f'reshaped_array_sum:{reshaped_array_sum}')
        print(e)

In [26]:
slope_size = 10

In [27]:
#shutil.rmtree('../data') 

In [28]:
try: 
    shutil.rmtree('../data/dataset') 
except Exception as error: 
    pass

dict_count_dataset_status = {
    'training': {
            'bearish_engulfing': 0,
            'bullish_engulfing': 0,
            'evening_star': 0,
            'hammer': 0,
            'hanging_man': 0,
            'inverted_hammer': 0,
            'morning_star': 0,
            'shooting_star': 0
    },
    'validation': {
            'bearish_engulfing': 0,
            'bullish_engulfing': 0,
            'evening_star': 0,
            'hammer': 0,
            'hanging_man': 0,
            'inverted_hammer': 0,
            'morning_star': 0,
            'shooting_star': 0
    }
}

log_results.info(f'total lines to be processed: {df.shape[0]}')

for i in range(0,df.shape[0]-slope_size,1): # first 1 replace by df.shape[0]
    
    try:

        # Get the data base on the slope size
        df_selected_columns_slope_temp = df_selected_columns.iloc[i:i+slope_size]

        # Get the timeseries columns into an array
        array_close = df_selected_columns_slope_temp['close'].to_numpy()
        array_upper_shadow = df_selected_columns_slope_temp['upper_shadow'].to_numpy()
        array_lower_shadow = df_selected_columns_slope_temp['lower_shadow'].to_numpy()
        array_real_body = df_selected_columns_slope_temp['real_body'].to_numpy()

        # Get the GAF matrix for the selected timeseries
        gaf_close = get_GAF(array_close)
        gaf_upper_shadow = get_GAF(array_upper_shadow)
        gaf_lower_shadow = get_GAF(array_lower_shadow)
        gaf_real_body = get_GAF(array_real_body)

        concatenated_gaf = np.concatenate(([gaf_close], [gaf_upper_shadow], [gaf_lower_shadow], [gaf_real_body]), axis=0).reshape(slope_size,slope_size,4)

        # Get the last line of the previous df
        df_selected_columns_slope_temp_last_line = df_selected_columns_slope_temp.iloc[-1:]

        # Identify with columns has positive (pos) or negative (neg) candle patterns
        list_candle_patterns_pos = (np.where(df_selected_columns_slope_temp_last_line[list_candlestick_columns].eq(1))[1]).tolist()
        list_columns_candle_patterns_pos = df_selected_columns_slope_temp_last_line[list_candlestick_columns].iloc[:,list_candle_patterns_pos].columns.to_list()

        #log_results.info(f'open_time: {df_selected_columns_slope_temp_last_line["open_time"].values}|list_columns_candle_patterns_pos: {list_columns_candle_patterns_pos}')

        list_candle_patterns_neg = (np.where(df_selected_columns_slope_temp_last_line[list_candlestick_columns].eq(-1))[1]).tolist()
        list_columns_candle_patterns_neg = df_selected_columns_slope_temp_last_line[list_candlestick_columns].iloc[:,list_candle_patterns_neg].columns.to_list()

        #log_results.info(f'open_time: {df_selected_columns_slope_temp_last_line["open_time"].values}|list_columns_candle_patterns_neg: {list_columns_candle_patterns_neg}')

        list_candle_patterns_neutral = (np.where(df_selected_columns_slope_temp_last_line[list_candlestick_columns].eq(0))[1]).tolist()
        list_columns_candle_patterns_neutral = df_selected_columns_slope_temp_last_line[list_candlestick_columns].iloc[:,list_candle_patterns_neutral].columns.to_list()

        #log_results.info(f'open_time: {df_selected_columns_slope_temp_last_line["open_time"].values}|list_columns_candle_patterns_neutral: {list_columns_candle_patterns_neutral}')

        root_data_path = '../data/dataset'

        # first case: others pattern
        if(len(list_columns_candle_patterns_neutral) > 0):
            pass
        
        # second case: bullish patterns
        if(len(list_columns_candle_patterns_pos) > 0):
            for element in list_columns_candle_patterns_pos:
                
                # first 70% going to training set; the other part, go to validation
                if(i<=round(df.shape[0]*0.7,0)):
                    dataset_type_path = 'training' #'training_set'
                    label_path = str(element)
                    file_name = str(df_selected_columns_slope_temp_last_line['open_time'].values[0])
                    full_path = os.path.join(root_data_path, dataset_type_path, label_path, file_name)

                    try: 
                        os.makedirs(os.path.join(root_data_path, dataset_type_path, label_path)) 
                    except OSError as error: 
                        pass

                    #np.save(full_path, concatenated_gaf)

                    plt.imshow(concatenated_gaf, cmap='hot', interpolation='nearest')
                    plt.axis('off') 
                    plt.savefig(full_path, bbox_inches='tight', pad_inches=0, transparent=True)
                    plt.clf()

                    dict_count_dataset_status[dataset_type_path][label_path] = dict_count_dataset_status[dataset_type_path][label_path] + 1

                else:
                    dataset_type_path = 'validation' #'test_set'
                    label_path = str(element)
                    file_name = str(df_selected_columns_slope_temp_last_line['open_time'].values[0])
                    full_path = os.path.join(root_data_path, dataset_type_path, label_path, file_name)

                    try: 
                        os.makedirs(os.path.join(root_data_path, dataset_type_path, label_path)) 
                    except OSError as error: 
                        pass

                    #np.save(full_path, concatenated_gaf)

                    plt.imshow(concatenated_gaf, cmap='hot', interpolation='nearest')
                    plt.axis('off') 
                    plt.savefig(full_path, bbox_inches='tight', pad_inches=0, transparent=True)
                    plt.clf()

                    dict_count_dataset_status[dataset_type_path][label_path] = dict_count_dataset_status[dataset_type_path][label_path] + 1
        
        # third case: bearish patterns
        if(len(list_columns_candle_patterns_neg) > 0):
            for element in list_columns_candle_patterns_neg:
                
                # first 70% going to training set; the other part, go to validation
                if(i<=round(df.shape[0]*0.7,0)):
                    dataset_type_path = 'training' #'training_set'
                    label_path = str(element)
                    file_name = str(df_selected_columns_slope_temp_last_line['open_time'].values[0])
                    full_path = os.path.join(root_data_path, dataset_type_path, label_path, file_name)

                    try: 
                        os.makedirs(os.path.join(root_data_path, dataset_type_path, label_path)) 
                    except OSError as error: 
                        pass

                    #np.save(full_path, concatenated_gaf)

                    plt.imshow(concatenated_gaf, cmap='hot', interpolation='nearest')
                    plt.axis('off') 
                    plt.savefig(full_path, bbox_inches='tight', pad_inches=0, transparent=True)
                    plt.clf()

                    dict_count_dataset_status[dataset_type_path][label_path] = dict_count_dataset_status[dataset_type_path][label_path] + 1

                else:
                    dataset_type_path = 'validation' #'test_set'
                    label_path = str(element)
                    file_name = str(df_selected_columns_slope_temp_last_line['open_time'].values[0])
                    full_path = os.path.join(root_data_path, dataset_type_path, label_path, file_name)

                    try: 
                        os.makedirs(os.path.join(root_data_path, dataset_type_path, label_path)) 
                    except OSError as error: 
                        pass

                    #np.save(full_path, concatenated_gaf)

                    plt.imshow(concatenated_gaf, cmap='hot', interpolation='nearest')
                    plt.axis('off') 
                    plt.savefig(full_path, bbox_inches='tight', pad_inches=0, transparent=True)
                    plt.clf()

                    dict_count_dataset_status[dataset_type_path][label_path] = dict_count_dataset_status[dataset_type_path][label_path] + 1


        if(i % 10000 == 0):
            log_results.info(f'number of iteractions completed: {i}')
            log_results.info(f'count dataset status: {dict_count_dataset_status}')

    except Exception as error: 
        #log_results.error(f'i: {i}|open_time: {df_selected_columns_slope_temp_last_line["open_time"].values}')
        pass

log_results.info(f'final count dataset status: {dict_count_dataset_status}')
    

root|2022-07-18 10:56:25,774|INFO|[1915253130.py:29 -      <cell line: 29>() ]|total lines to be processed: 1048956
root|2022-07-18 10:56:25,779|INFO|[1915253130.py:169 -      <cell line: 31>() ]|number of iteractions completed: 0
root|2022-07-18 10:56:25,779|INFO|[1915253130.py:170 -      <cell line: 31>() ]|count dataset status: {'training': {'bearish_engulfing': 0, 'bullish_engulfing': 0, 'evening_star': 0, 'hammer': 0, 'hanging_man': 0, 'inverted_hammer': 0, 'morning_star': 0, 'shooting_star': 0}, 'validation': {'bearish_engulfing': 0, 'bullish_engulfing': 0, 'evening_star': 0, 'hammer': 0, 'hanging_man': 0, 'inverted_hammer': 0, 'morning_star': 0, 'shooting_star': 0}}
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for

<Figure size 432x288 with 0 Axes>

In [39]:
print(dict_count_dataset_status['training']['inverted_hammer'])
print(dict_count_dataset_status['training']['hammer'])
print(dict_count_dataset_status['training']['morning_star'])
print(dict_count_dataset_status['training']['shooting_star'])
print(dict_count_dataset_status['training']['hanging_man'])
print(dict_count_dataset_status['training']['evening_star'])
print(dict_count_dataset_status['training']['bullish_engulfing'])
print(dict_count_dataset_status['training']['bearish_engulfing'])
print(f'total:{sum(dict_count_dataset_status["training"].values())}')

177
947
191
3192
9341
1820
1306
14151
total:31125


In [38]:
print(dict_count_dataset_status['validation']['inverted_hammer'])
print(dict_count_dataset_status['validation']['hammer'])
print(dict_count_dataset_status['validation']['morning_star'])
print(dict_count_dataset_status['validation']['shooting_star'])
print(dict_count_dataset_status['validation']['hanging_man'])
print(dict_count_dataset_status['validation']['evening_star'])
print(dict_count_dataset_status['validation']['bullish_engulfing'])
print(dict_count_dataset_status['validation']['bearish_engulfing'])
print(f'total:{sum(dict_count_dataset_status["validation"].values())}')

96
554
86
1067
3642
690
686
5059
total:11880


In [None]:
print(dict_count_dataset_status['training']['inverted_hammer'] + dict_count_dataset_status['validation']['inverted_hammer'])
print(dict_count_dataset_status['training']['hammer'] + dict_count_dataset_status['validation']['hammer'])
print(dict_count_dataset_status['training']['morning_star'] + dict_count_dataset_status['validation']['morning_star'])
print(dict_count_dataset_status['training']['shooting_star'] + dict_count_dataset_status['validation']['shooting_star'])
print(dict_count_dataset_status['training']['hanging_man'] + dict_count_dataset_status['validation']['hanging_man'])
print(dict_count_dataset_status['training']['evening_star'] + dict_count_dataset_status['validation']['evening_star'])
print(dict_count_dataset_status['training']['bullish_engulfing'] + dict_count_dataset_status['validation']['bullish_engulfing'])
print(dict_count_dataset_status['training']['bearish_engulfing'] + dict_count_dataset_status['validation']['bearish_engulfing'])

273
1501
277
4259
12983
2510
1992
19210


```
inverted_hammer        273
hammer                1501
morning_star           277
shooting_star         4259
hanging_man          12983
evening_star          2510
bullish_engulfing     1992
bearish_engulfing    19210
```