In [1]:
import pandas as pd
import numpy as np
import time

In [139]:
def create_X(filename, path='data', timeframe):
    """[Changes the csv file to X_csv file to display only the changes values like high-low or open-close
        instead of the actuals. Result file will be stored in the path folder]
        
    Arguments:
        filename {[string]} -- [the file name with path to csv file.]
        path {[string]} -- [default = data, set the dirctory/folder where the file is stored]
    
    Returns:
        [string] -- [Filename of generated fiels]
    """
    # Create X
    df = pd.read_csv(path +'/'+ filename)
    X = pd.DataFrame()
    X['high_low'] = df.High - df.Low
    X['high_open'] = df.High - df.Open
    X['high_close'] = df.High - df.Close
    X['low_open'] = df.Low - df.Open
    X['low_close'] = df.Low - df.Close
    X['close_open'] = df.Close - df.Open
    X['volume'] = df['Volume ']
    X[:-30].to_csv(path + '/X_' + filename, index=None)
    
    return 'X_' + filename

In [131]:
def create_y(filename, path='data', timeframe=30, boarder=0.2, loss_ratio=2):
    """[This can take a while :-), we are looping over the hole csv file to create new files.
        Created will be X_filename and y_filename.
        X_filename: ]
        
    Arguments:
        filename {[string]} -- [the file name with path to csv file.]
        path {[string]} -- [default = data, set the dirctory where the file is stored]
        timeframe {[int]} -- [default is set to 30, means that we check 30 minutes in the future if we reach the
                               set boarder. E.g. Dataframe[i:i+30] are checked if the boarder is hit]
        boarder {[float]} -- [boarder describes how many pips we want to go in minus befor hiting our target.
                                It is to define if the trade would be successfull]
        loss_ratio {[int]} -- [Loss_ratio describes how many times * we want to get out of the trade. If
                                the boarder is set to 0.2 and the loss ratio = 2 than we are looking for a 0.4
                                take profit and a 0.2 stop loss. Is loss ratio = 3 than take porfit = 0.6 and
                                stop loss = 0.2. If loss_ratio is set to 10 we will run over 10-2 stimes. 
                                Therefore it creates then 8 entries for y]

    Returns:
        [dict] -- [Filename of generated fiels]
    """
    # read file
    df = pd.read_csv(path +'/'+ filename)
    df_y = pd.DataFrame()
    # creates y
    for l in range(2,loss_ratio):
        y_ = []
        for k in range(df.shape[0] - timeframe):
            y = 0
            # open value for the next bar current start timeframe (i.open)
            close = df.iloc[k].Close
            # minimum / maximum value for the next timeframe e.g. k + 1(next bar) + 30 minutes
            low = df.iloc[k+1:k+1+timeframe].Low.min()
            high = df.iloc[k+1:k+1+timeframe].High.max()
            lsr = boarder * l
            if((close - low) > lsr or (high - close) > lsr):
                for i in range(1, timeframe):
                    # get current bar / bar range
                    bar = df.iloc[k+1:k+i]
                    # get los and high from start (closeing course fist bar) to check if we hitting the boarder
                    open_low = close - bar.Low.min()
                    high_open = bar.High.max() - close
                    if (high_open > lsr and open_low < boarder):
                        y = 1
                        i = timeframe+1
                    if (open_low > lsr and high_open < boarder):
                        # open - low is bigger than X pips and the boarder was not hit before
                        y = 2
                        i = timeframe+1
            # append y to our array y_ to store it later in pandas dataframe
            y_.append(y)
        print(y_)
        # store y_ in the pandas dataframe
        df_y[l] = pd.DataFrame(y_)[0]
    # save y file
    df_y.to_csv(path +'/y_'+filename, index=None)
    
    return 'y_' + filename

In [132]:
print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))
create_y(filename='PAH3DEEUR_1 Min_Bid_2008.10.21_2018.10.27.csv', path='data', loss_ratio=3)
print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



'y_PAH3DEEUR_1 Min_Bid_2008.10.21_2018.10.27.csv'

In [142]:
create_X(filename='PAH3DEEUR_1 Min_Bid_2008.10.21_2018.10.27.csv', path='data')

'X_PAH3DEEUR_1 Min_Bid_2008.10.21_2018.10.27.csv'

In [135]:
pd.read_csv('data/y_PAH3DEEUR_1 Min_Bid_2008.10.21_2018.10.27.csv').shape

(1322728, 1)

In [143]:
pd.read_csv('data/X_PAH3DEEUR_1 Min_Bid_2008.10.21_2018.10.27.csv').shape

(1322758, 7)

In [145]:
ds = pd.read_csv('data/X_PAH3DEEUR_1 Min_Bid_2008.10.21_2018.10.27.csv')
ds[:-30]

Unnamed: 0,high_low,high_open,high_close,low_open,low_close,close_open,volume
0,0.384,0.275,0.354,-0.109,-0.030,-0.079,0.00330
1,0.578,0.540,0.054,-0.038,-0.524,0.486,0.01623
2,0.344,0.093,0.276,-0.251,-0.068,-0.183,0.06653
3,0.084,0.075,0.074,-0.009,-0.010,0.001,0.00276
4,0.139,0.104,0.039,-0.035,-0.100,0.065,0.01012
5,0.144,0.133,0.000,-0.011,-0.144,0.133,0.00747
6,0.277,0.105,0.000,-0.172,-0.277,0.105,0.01951
7,0.321,0.321,0.206,0.000,-0.115,0.115,0.00917
8,0.119,0.075,0.033,-0.044,-0.086,0.042,0.00211
9,0.129,0.112,0.098,-0.017,-0.031,0.014,0.00728


In [2]:
# df = pd.read_csv('data/result.csv', header=None)
df_orig = pd.read_csv('data/PAH3DEEUR_1 Min_Bid_2008.10.21_2018.10.27.csv')

In [3]:
# timeframe from orginal model, how many bars aare pased in
timeframe = 60
# t_range how many steps in the future it has to fullfill the breakout or target 
t_range = 30
# boarder are 20 pips and x Loss_ratio for win so 0.20 * 3 = 0.60 or 60 pips win
boarder = 0.20
loss_ratio = 2

In [4]:
df_orig = df_orig[timeframe:].reset_index(drop=True)

In [5]:
for p in range(0, 10):
    # just for printing
    multi = 1
    # create target y_ 
    y_ = []
    # set loss ratio
    loss_ratio = loss_ratio + p
    field_name = str(loss_ratio) + '_' + str(boarder)
    print(loss_ratio)
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))
    for k in range(df_orig.shape[0] - t_range):
        y = 0
        start = df_orig.iloc[k]
        low = df_orig.iloc[k+1:k+t_range].Low.min()
        high = df_orig.iloc[k+1:k+t_range].High.max()
        if(((start.Open - low) > (boarder*loss_ratio)) or 
           ((high - start.Open) > (boarder*loss_ratio))):
            for i in range(1, t_range):
                current = df_orig.iloc[k+i]
                li = start.Open - current.Low
                hi = current.High - start.Open
                if(hi > (boarder*loss_ratio) and li < boarder):
                    y = 1
                    break
                if(hi < boarder and li > (boarder*loss_ratio)):
                    y = 2
                    break
        y_.append(y)
        if((k / (200000*multi)) == 1):
            multi += 1
            print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))
            print(k)
    df_orig[field_name] = pd.DataFrame(y_).astype(int)
    
df_orig = df_orig[:-(t_range + timerange)]
df_orig.to_csv('data/result_y_30mins_from_2-23_boarder_0.20.csv', header=None)

2
2019-03-09 21:12:04
2019-03-09 21:16:49
200000
2019-03-09 21:21:01
400000
2019-03-09 21:24:35
600000
2019-03-09 21:27:51
800000
2019-03-09 21:44:05
1000000
2019-03-09 21:47:54
1200000
3
2019-03-09 21:50:08
2019-03-09 21:55:04
200000
2019-03-09 21:59:46
400000
2019-03-09 22:04:33
600000
2019-03-09 22:08:53
800000
2019-03-09 22:13:09
1000000
2019-03-09 22:17:40
1200000
5
2019-03-09 22:20:36
2019-03-09 22:25:19
200000
2019-03-09 22:29:54
400000
2019-03-09 22:34:24
600000
2019-03-09 22:38:50
800000
2019-03-09 22:43:18
1000000
2019-03-09 22:47:49
1200000
8
2019-03-09 22:50:34
2019-03-09 22:55:18
200000
2019-03-09 22:59:58
400000
2019-03-09 23:04:39
600000
2019-03-09 23:09:17
800000
2019-03-09 23:13:57
1000000
2019-03-09 23:18:38
1200000
12
2019-03-09 23:21:30
2019-03-09 23:26:25
200000
2019-03-09 23:31:19
400000
2019-03-09 23:36:12
600000
2019-03-09 23:41:13
800000
2019-03-09 23:46:10
1000000
2019-03-09 23:51:06
1200000
17
2019-03-09 23:54:08
2019-03-09 23:59:19
200000
2019-03-10 00:04:28

In [32]:
df_orig = df_orig[:-t_range]
df_orig.to_csv('data/result_y_30mins_from_2-23_boarder_0.20.csv')

In [10]:
unique, counts = np.unique(y_, return_counts=True)
dict(zip(unique, counts))

{0: 1322659, 2: 9}

In [None]:
df_orig.drop(['30_0.2', '38_0.2', '47_0.2'], axis=1, inplace=True)

In [28]:
y_ = df_orig['23_0.2']

In [29]:
unique, counts = np.unique(y_, return_counts=True)
dict(zip(unique, counts))

{0.0: 1322556, 2.0: 112}

In [30]:
# df_y = pd.DataFrame(y_)
df_orig.to_csv('data/result_y_30mins_from_2-23_boarder_0.20.csv', header=None)

In [9]:
# schneide die letzten timelines weg 
df_orig = df_orig[:-t_range]
df = df[:-t_range]
df_orig['nn_result'] = df[1]
df_orig['y'] = df_y[0].astype(int)

NameError: name 'df' is not defined

In [None]:
df_orig.to_csv('data/result_nn_y_boarder_20_ratio_3_200_20_in_30mins.csv')

In [None]:
df = pd.read_csv('data/result_nn_y_boarder_20_ratio_3_60_20.csv')

In [None]:
field_name = '999_'

df_orig[field_name] = pd.DataFrame(y_).astype(int)
df_orig

In [None]:
for i in range(1,30):
    print i