In [1]:
# # ML Trader
import numpy as np
import pandas as pd

import collections

from tqdm import tqdm

from IPython import display
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from IPython.display import display

import datetime as dt

import warnings
warnings.filterwarnings('ignore')

In [2]:
def before_sema(data):   
    data['sema_list'].append(data['tick'])    
    return(data)

def after_sema(data):     
    data['sema_list'].popleft()
    data['sema_list'].append(data['tick'])
    data['sema'] = list(pd.DataFrame(list(data['sema_list'])).ewm(span=data['sema_len']).mean()[0])[-1]
    return(data)

def before_lema(data):   
    data['lema_list'].append(data['tick'])    
    return(data)

def after_lema(data):     
    data['lema_list'].popleft()
    data['lema_list'].append(data['tick'])
    data['lema'] = list(pd.DataFrame(list(data['lema_list'])).ewm(span=data['lema_len']).mean()[0])[-1]
    return(data)

In [3]:
def print_report(data):
    display.clear_output(wait = True)
    print('--------------------------')
    print(f'Total PL : {sum(data["pl_list"])}')
    print('-----------')
    print(data['pl_list'])
    print('--------------------------')
    return()

In [4]:
def read_data(data):
    source_file_path = f'..\\data\\yearly_tick_data\\{data["year"]}.csv'

    if data['input_rows'] is None:
        data["df"] = pd.read_csv(source_file_path)
    else:
        data["df"] = pd.read_csv(source_file_path, nrows=data['input_rows'])
        
    #data["df"] = data["df"][data["df"]['DateTime'].str.contains('|'.join(data['date_val']))]
    data["df"] = data["df"][data["df"]['DateTime'].str.contains(data['date_val'])]
    
    data["df"]['tick'] = np.float()
    data["df"]['sema'] = np.float()
    data["df"]['lema'] = np.float()
    
    return(data["df"])

In [5]:
def get_dir(data):
    
    if data['sema'] > data['lema']:
        data['position'] = 1
        
    elif data['sema'] < data['lema']:
        data['position'] = -1
        
    return(data)

In [6]:
def after_dir(data):   
    
    data['dir_list'].popleft()
    data['dir_list'].append(data['position'])   
    
    if sum(data['dir_list']) != 0:
        data['dir_change'] = False

    elif sum(data['dir_list']) == 0:
        data['dir_change'] = True
        #print(data['dir_list'])

    return(data)

In [7]:
def make_order(data):
    if not data['open_order']:
        if data['dir_change']:
            if data['position'] > 0:
                data['order_ask_price'] = data['ask']
                data['open_order'] = True
                
            elif data['position'] < 0:
                data['order_bid_price'] = data['bid']
                data['open_order'] = True
    return(data)

In [8]:
def close_order(data):
    if data['open_order']:
        if data['dir_change']:
            if data['position'] < 0:
                data['close_bid_price'] = data['bid']
                data['pl'] = data['close_bid_price'] - data['order_ask_price']
                data['pl_list'].append(data['pl'])
                data['open_order'] = False
                #print(f'order_ask_price:{data["order_ask_price"]}')
                #print(f'close_bid_price:{data["close_bid_price"]}')
                #print(f'pl:{data["pl"]}')
                
                data['order_bid_price'] = data['bid']
                data['open_order'] = True
                
                
            elif data['position'] > 0:
                data['close_ask_price'] = data['ask']
                data['pl'] = data['order_bid_price'] - data['close_ask_price']
                data['pl_list'].append(data['pl'])
                data['open_order'] = False
                #print(f'order_bid_price:{data["order_bid_price"]}')
                #print(f'close_ask_price:{data["close_ask_price"]}')
                #print(f'pl:{data["pl"]}')
                
                data['order_ask_price'] = data['ask']
                data['open_order'] = True
    return(data)

In [9]:
def get_date_list(data):
    
    data['start_date'] = dt.datetime(year=data['start_date']['year'],
                      month=data['start_date']['month'],
                      day=data['start_date']['date'])

    data['end_date'] = dt.datetime(year=data['end_date']['year'],
                      month=data['end_date']['month'],
                      day=data['end_date']['date'])

    
    date_list = list(pd.date_range(data['start_date'],data['end_date'],freq='d').values)
    data['date_list'] = [str(x).split('T')[0].replace('-','') for x in date_list]
    return(data)

In [10]:
def get_run_duration(data):
    run_time = len(data["date_list"]) * 6
    hours = run_time//60
    mins = run_time%60
    print(f'Approx run time : {hours}-hours : {mins}-mins')

In [11]:
def plot_graph(data):
    
    figure(num=None, figsize=(50, 10), dpi=80, facecolor='w', edgecolor='k')

    data["df"]['tick'].plot(label = 'tick')
    data["df"]['sema'].plot(label = 'sema')
    data["df"]['lema'].plot(label = 'lema')
    plt.legend(loc="upper left")

In [12]:
def print_report(data):
    pl_list = list(data['pl_list'])
    net_pl = round(np.sum(pl_list),5)

    positive_pls = [i for i in pl_list if i > 0]
    negative_pls = [i for i in pl_list if i < 0]

    num_positive = len(positive_pls)
    num_negative = len(negative_pls)

    sum_positive = round(np.sum(positive_pls),5)
    sum_negative = round(np.sum(negative_pls),5)
    sum_total    = round(sum_positive + abs(sum_negative),5) 

    avg_positive = round(np.mean(positive_pls),5)
    avg_negative = round(np.mean(negative_pls),5)

    
    print('==============================')
    print(f'date_val          : {data["date_val"]}')    
    print(f'Total PL : {sum(data["pl_list"])}')
    print('-------------')
    print(f'net_pl            : {net_pl}/{sum_total}')  
    print('-------------')
    print(f'+ve               : num= {num_positive}    sum= +{sum_positive}    avg= +{avg_positive}')
    print(f'-ve               : num= {num_negative}    sum= {sum_negative}    avg= {avg_negative}')
    print('==============================')
    
#     data["pl_list"] = pd.DataFrame(pl_list, columns = ['pls'])
#     values = list(data["df"]['pls'].value_counts().index)
#     counts = list(data["df"]['pls'].value_counts().values)
#     data["df_report"] = pd.DataFrame({'values':values,'counts':counts})
#     data["df_report"]['act_val'] = data["df_report"]['values'] * data["df_report"]['counts']
#     data["df_report"] = data["df_report"].sort_values(by='values')
#     data["df_report"] = data["df_report"].reset_index(drop = True)    
#     display(data["df_report"])

In [13]:
def run_engine(data):
    for i in tqdm(range(0, len(data["df"]))):
        data['ask'] = data["df"]['Ask'][i]
        data['bid'] = data["df"]['Bid'][i]
        data['tick'] = (data['ask'] + data['bid'])/2    
        data["df"]['tick'][i] = data['tick']

        # sema before after loops --------------------------------
        if len(data['sema_list']) < data['sema_len']:
            data =  before_sema(data)
            continue

        if len(data['sema_list']) == data['sema_len']:
            data = after_sema(data)     
        # ----------------------------------------------------------


        # lema before after loops --------------------------------
        if len(data['lema_list']) < data['lema_len']:
            data = before_lema(data)        
            continue

        if len(data['lema_list']) == data['lema_len']:
            data = after_lema(data)        
        # ----------------------------------------------------------


        data["df"]['sema'][i] = data['sema']
        data["df"]['lema'][i] = data['lema']

        data = get_dir(data)

        # lema before after loops --------------------------------
        if len(data['dir_list']) < 2:
            data['dir_list'].append(data['position'])   
            continue
        elif len(data['dir_list']) == 2:
            data = after_dir(data)
        # ----------------------------------------------------------

        data = close_order(data)
        data = make_order(data)    

    data["df"] = data["df"][data["df"]['sema'] != 0]
    data["df"] = data["df"][data["df"]['lema'] != 0]
    data["df"] = data["df"].reset_index(drop = True)
    return(data)

In [14]:
data = {}
data["year"] = 2021
data['total_df'] = pd.DataFrame()
data['sema_list'] = collections.deque()
data['lema_list'] = collections.deque()
data['dir_list'] = collections.deque()
data['pl_list'] =  collections.deque()

In [15]:
data['input_rows'] = None

data['start_date'] = {'year':2021, 'month':3, 'date':1}
data['end_date']   = {'year':2021, 'month':3, 'date':5}

data['sema_len'] = 1500
data['lema_len'] = 15000

In [16]:
data = get_date_list(data)

In [17]:
for data['date_val'] in data['date_list']:
    read_data(data)
#     data["df"] = data["df"][0:100]
    print(f'Record num : {len(data["df"])}')    
    data["df"] = data["df"].reset_index(drop = True)
    
    data['open_order'] = False    
    data = run_engine(data)
    
    #plot_graph(data)
    print_report(data)
    temp_df = pd.DataFrame({'date':data['date_val'], 'pls': data['pl_list']})
    data['total_df'] = data['total_df'].append(temp_df)
    data["total_df"] = data["total_df"].reset_index(drop = True)
    data['total_df'].to_csv('total_df.csv')
    
data["total_df"] = data["total_df"].reset_index(drop = True)
data['file_name'] = f'{data["start_date"].year}-{data["start_date"].month}-({data["start_date"].day}-{data["end_date"].day}).csv'
data['total_df'].to_csv(data['file_name'], index = False) 

  1%|▍                                     | 995/88994 [00:00<00:08, 9926.18it/s]

Record num : 88994


100%|█████████████████████████████████████| 88994/88994 [07:29<00:00, 198.01it/s]


Total PL : -0.0032500000000004192
-------------
date_val          : 20210301
-------------
net_pl            : -0.00325/0.01223
-------------
+ve               : num= 5    sum= +0.00449    avg= +0.0009
-ve               : num= 17    sum= -0.00774    avg= -0.00046
-------------


  0%|                                        | 16/91013 [00:00<09:32, 158.83it/s]

Record num : 91013


100%|█████████████████████████████████████| 91013/91013 [08:58<00:00, 169.05it/s]


Total PL : 0.0021500000000000963
-------------
date_val          : 20210302
-------------
net_pl            : 0.00215/0.02593
-------------
+ve               : num= 10    sum= +0.01404    avg= +0.0014
-ve               : num= 26    sum= -0.01189    avg= -0.00046
-------------


  0%|                                        | 17/97956 [00:00<10:09, 160.80it/s]

Record num : 97956


100%|█████████████████████████████████████| 97956/97956 [09:31<00:00, 171.40it/s]


Total PL : 0.002460000000001017
-------------
date_val          : 20210303
-------------
net_pl            : 0.00246/0.03492
-------------
+ve               : num= 13    sum= +0.01869    avg= +0.00144
-ve               : num= 38    sum= -0.01623    avg= -0.00043
-------------


  0%|                                       | 16/125449 [00:00<13:33, 154.22it/s]

Record num : 125449


100%|███████████████████████████████████| 125449/125449 [11:54<00:00, 175.60it/s]


Total PL : 0.0007100000000015427
-------------
date_val          : 20210304
-------------
net_pl            : 0.00071/0.05281
-------------
+ve               : num= 15    sum= +0.02676    avg= +0.00178
-ve               : num= 57    sum= -0.02605    avg= -0.00046
-------------


  0%|                                       | 20/130957 [00:00<12:26, 175.34it/s]

Record num : 130957


100%|███████████████████████████████████| 130957/130957 [11:58<00:00, 182.25it/s]

Total PL : -0.012629999999998143
-------------
date_val          : 20210305
-------------
net_pl            : -0.01263/0.07285
-------------
+ve               : num= 19    sum= +0.03011    avg= +0.00158
-ve               : num= 88    sum= -0.04274    avg= -0.00049
-------------





In [38]:
data['total_df'].groupby(['date']).sum()

Unnamed: 0_level_0,pls
date,Unnamed: 1_level_1
20210301,-0.00325
20210302,0.00215
20210303,0.00246
20210304,0.00071
20210305,-0.01263
