In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import winsound
import datetime as dt

import warnings
warnings.filterwarnings('ignore')

In [2]:
raw_df = pd.read_csv('data\M1_2019.csv')
df = raw_df[['TS', 'Close']]
df.head()

Unnamed: 0,TS,Close
0,2019-01-01 17:02,1.14598
1,2019-01-01 17:03,1.14607
2,2019-01-01 17:04,1.14606
3,2019-01-01 17:05,1.14621
4,2019-01-01 17:06,1.14665


In [3]:
roll_len = 61
diff_pip = 0.0001
min_bar = 5

In [4]:
#df = df[:10000]
df = df.iloc[::min_bar]
print(f'DF len : {len(df)}')
print(df.head())
df = df.reset_index(drop=True)

DF len : 74506
                  TS    Close
0   2019-01-01 17:02  1.14598
5   2019-01-01 17:07  1.14607
10  2019-01-01 17:12  1.14608
15  2019-01-01 17:17  1.14607
20  2019-01-01 17:22  1.14606


In [5]:
def new_df(roll_len):
    col_list = []

    for i in range(roll_len):
        col_list.append(i)
    
    col_list.append('TS')
    return(col_list)

In [6]:
def reorder_df(df, roll_len):
    tot_list = []
    for i, val in tqdm(df.iterrows()):
        act_row = i+roll_len-1
        l = list(df['Close'][i:act_row+1])    
        if len(l) == roll_len:
            tot_list.append(l)
            l.append(df['TS'][act_row])
    df1=pd.DataFrame(tot_list,columns=col_list)
    return(df1)

In [7]:
def get_year(row):
    year_val = dt.datetime.strptime(row['TS'], '%Y-%m-%d %H:%M').date().year
    return(year_val)

def get_month(row):
    month_val = dt.datetime.strptime(row['TS'], '%Y-%m-%d %H:%M').date().month
    return(month_val)

def get_day(row):
    day_val = dt.datetime.strptime(row['TS'], '%Y-%m-%d %H:%M').date().day
    return(day_val)   

def get_hour(row):
    hour_val = dt.datetime.strptime(row['TS'], '%Y-%m-%d %H:%M').time().hour
    return(hour_val)   

def get_min(row):
    min_val = dt.datetime.strptime(row['TS'], '%Y-%m-%d %H:%M').time().minute
    return(min_val)   

def get_dir(row):
    global diff_pip
    diff = row[roll_len-1] - row[roll_len-2]
    
    if diff > diff_pip:
        direction = 'increase'
    elif diff < -diff_pip:
        direction = 'decrease'
    else:
        direction = 'same'        
    
    return(direction)

def split_ts(df1, diff_pip):
    #df1['year'] = df1.apply(get_year, axis=1)
    df1['month'] = df1.apply(get_month, axis=1)
    df1['day'] = df1.apply(get_day, axis=1)
    df1['hour'] = df1.apply(get_hour, axis=1)
    df1['min'] = df1.apply(get_min, axis=1)
    df1['direction'] = df1.apply(get_dir, axis=1)
    
    cols = list(df1)

    cols.insert(0, cols.pop(cols.index('min')))
    cols.insert(0, cols.pop(cols.index('hour')))
    cols.insert(0, cols.pop(cols.index('day')))
    cols.insert(0, cols.pop(cols.index('month')))
    #cols.insert(0, cols.pop(cols.index('year')))

    df1 = df1[cols]

    del df1['TS']
    return(df1)

In [8]:
col_list = new_df(roll_len)
rolled_df = reorder_df(df, roll_len)

74506it [00:13, 5405.99it/s]


In [9]:
cols = list(rolled_df)
cols.insert(0, cols.pop(cols.index('TS')))
rolled_df = rolled_df[cols]

In [10]:
rolled_df = split_ts(rolled_df, diff_pip)
rolled_df.head()

Unnamed: 0,month,day,hour,min,0,1,2,3,4,5,...,52,53,54,55,56,57,58,59,60,direction
0,1,1,22,2,1.14598,1.14607,1.14608,1.14607,1.14606,1.14606,...,1.14497,1.14481,1.14492,1.14447,1.14452,1.14433,1.14443,1.14448,1.14458,same
1,1,1,22,7,1.14607,1.14608,1.14607,1.14606,1.14606,1.14607,...,1.14481,1.14492,1.14447,1.14452,1.14433,1.14443,1.14448,1.14458,1.14452,same
2,1,1,22,12,1.14608,1.14607,1.14606,1.14606,1.14607,1.14607,...,1.14492,1.14447,1.14452,1.14433,1.14443,1.14448,1.14458,1.14452,1.14452,same
3,1,1,22,17,1.14607,1.14606,1.14606,1.14607,1.14607,1.14625,...,1.14447,1.14452,1.14433,1.14443,1.14448,1.14458,1.14452,1.14452,1.14464,increase
4,1,1,22,22,1.14606,1.14606,1.14607,1.14607,1.14625,1.14628,...,1.14452,1.14433,1.14443,1.14448,1.14458,1.14452,1.14452,1.14464,1.14465,same


In [11]:
print(rolled_df['direction'].value_counts())
print('--------------------------')
print(rolled_df['direction'].value_counts(normalize=True))

same        40584
increase    17053
decrease    16809
Name: direction, dtype: int64
--------------------------
same        0.545147
increase    0.229065
decrease    0.225788
Name: direction, dtype: float64


In [12]:
del rolled_df[roll_len-1]
rolled_df.to_csv('data/rolled_df.csv', index = False)
winsound.PlaySound('C:\\Windows\\Media\\tada.wav', winsound.SND_ASYNC)