In [1]:
import pandas as pd
import os 
import numpy as np

In [2]:
def column_name_converter(string):
    return string.lower().replace("\n", "_").replace(" ", "_")

In [124]:
def change_column_names(df):
    column_name0 = list(df.columns)
    column_name1 =[column_name_converter(column) for column in column_name0]
    column_name2 = list(df.iloc[0])
    assert len(column_name1) == len(column_name2)
    
    prefix = ''
    for i, name in enumerate(column_name2):
        if isinstance(name, str):
            if prefix != column_name1[i] and 'unnamed:' not in column_name1[i]:
                prefix = column_name1[i]
            new_name = prefix + "_" + column_name_converter(name)
            column_name2[i] = new_name
        else:
            column_name2[i] = None

    column_name3 = []
    for i, name in enumerate(column_name2):
        if name is not None:
            column_name3.append(name)
        else:
            column_name3.append(column_name1[i])            
    
    df = df.rename(columns={column_name0[i]: column_name1[i] for i in range(len(column_name1))})
    df = df.drop(0)
    df = df.sort_values(by='date').reset_index()
    del df['index']
    return df

In [125]:
def handle_neural_index(df):
    key = 'neural_index'
    if key in list(df.keys()):
        for i in df.index:
            df.loc[i, key] = 0.0 if df.loc[i, key] == 'down' else 1.0
    print(df[key])

In [126]:
def rule_closeup(df):
    """
        returns a DataFrame column that is true (1) if the current dates 
        close_price is higher than the close_price of the previous date
    """
    assert 'close_price' in list(df.columns)
    assert 'date' in list(df.columns)
    
    closeup = [None]
    for i in range(1, len(df.index)):
        previous_close_price = df.iloc[i-1]['close_price']
        current_close_price = df.iloc[i]['close_price']        
        closeup.append(current_close_price > previous_close_price)
    assert len(closeup) == len(df.index)
    return closeup
        

In [127]:
df = pd.read_excel('res/Euro.xls')
df = change_column_names(df)
closeup = rule_closeup(df)
closeup_df = pd.DataFrame(data=closeup, columns=['rule_closeup'])
df = pd.concat([df, closeup_df], axis=1)

In [128]:
test_df = df.iloc[1:20]
handle_neural_index(test_df)

1     1
2     0
3     0
4     0
5     0
6     0
7     1
8     1
9     0
10    1
11    1
12    1
13    1
14    1
15    0
16    0
17    0
18    1
19    1
Name: neural_index, dtype: object


In [20]:
test_df.head()

Unnamed: 0,date,short_term_difference,medium_term_difference,long_term_difference,open_price,high_price,low_price,close_price,predicted_high_price,predicted_low_price,...,unnamed:_14,unnamed:_15,medium_term_crossover,unnamed:_17,long_term_crossover,unnamed:_19,triple_cross,unnamed:_21,unnamed:_22,rule_closeup
1,2016-03-02,-0.002044,-0.000104,-0.000391,1.19495,1.19495,1.19495,1.19495,1.191601,1.191601,...,-0.052571,-0.0104528,1.19497,1.19508,1.19468,1.19508,1.19319,1.19506,1.19482,False
2,2016-03-03,0.004044,0.001034,-0.001153,1.20965,1.20965,1.20965,1.20965,1.211909,1.206795,...,0.917568,0.406533,1.20097,1.19993,1.19878,1.19993,1.20132,1.19837,1.19631,True
3,2016-03-04,0.006921,0.001307,-0.001653,1.21555,1.21555,1.21555,1.21555,1.2145,1.212397,...,0.563195,0.32536,1.20514,1.20384,1.20218,1.20384,1.20773,1.20127,1.19783,True
4,2016-03-07,0.008592,0.003142,-0.000585,1.21855,1.21855,1.21855,1.21855,1.218084,1.216402,...,0.381044,0.244432,1.20992,1.20678,1.20619,1.20678,1.21271,1.20479,1.19976,True
5,2016-03-08,0.002059,0.002758,0.000106,1.2115,1.2115,1.2115,1.2115,1.209945,1.209945,...,-0.269272,0.270142,1.21032,1.20757,1.20767,1.20757,1.21158,1.20615,1.20099,False


In [76]:
config = {'sequence_length': 10,
          'tgt_key': 'rule_closeup',
          'forbidden_keys': ['date', 
                             'up']}

In [77]:
training_df = df.iloc[:int(n_columns*0.9)]
valid_df = df.iloc[int(n_columns*0.9):]

In [78]:
test_df = df.iloc[1:20]

In [79]:
print([config['tgt_key']] + config['forbidden_keys'])

['rule_closeup', 'date', 'up']


In [80]:
def df_to_src_tgt(config=None, df=None):
    src_keys = list(df.keys())
    tgt_keys = [config['tgt_key']]
    for key in tgt_keys + config['forbidden_keys']:
        if key in src_keys:
            src_keys.remove(key)
    src_data = df[src_keys].to_numpy()
    tgt_data = df[tgt_keys].to_numpy()
    return src_data, tgt_data, src_keys, tgt_keys

In [81]:
src_data, tgt_data, src_keys, tgt_keys = df_to_src_and_tgt(config=config, df=test_df)

In [82]:
src_data

array([[-0.002044081687927246, -0.00010442733764648438,
        -0.0003911256790161133, 1.194949984550476, 1.194949984550476,
        1.194949984550476, 1.194949984550476, 1.1916007995605469,
        1.1916007995605469, 0.0, nan, 1.1930309534072876,
        1.1950750350952148, -0.05257095769047737, -0.010452755726873875,
        1.1949706077575684, 1.1950750350952148, 1.1946839094161987,
        1.1950750350952148, 1.1931878328323364, 1.1950621604919434,
        1.1948195695877075],
       [0.0040444135665893555, 0.0010342597961425781,
        -0.001153111457824707, 1.2096500396728516, 1.2096500396728516,
        1.2096500396728516, 1.2096500396728516, 1.2119086980819702,
        1.206795334815979, 0.0, 'down', 1.2039778232574463,
        1.199933409690857, 0.9175679683685303, 0.40653300285339355,
        1.2009676694869995, 1.199933409690857, 1.1987802982330322,
        1.199933409690857, 1.2013195753097534, 1.198373556137085,
        1.1963144540786743],
       [0.006921052932739258,

In [73]:
def create_input(config=None, src_data=None, tgt_data=None):
    assert src_data.shape[1] == tgt_data.shape[1]
    n_datapoints = src_data.shape[1]
    sequence_length = config['sequence_length']
    for i in range(sequence_length, n_datapoints):
        print(src_data[i-sequence_length:i].shape)
        print(tgt_data[i])
        print(src_data[i+1])
        break
    

In [74]:
create_input(config=config, src_data=src_data, tgt_data=tgt_data)

(10, 22)
[True]
[0.006837129592895508 0.008920907974243164 0.012210488319396973
 1.2443000078201294 1.2443000078201294 1.2443000078201294
 1.2443000078201294 1.2436795234680176 1.2429492473602295 0.0 'up'
 1.240977168083191 1.2341400384902954 0.4312145411968231
 0.17127211391925812 1.235070824623108 1.2261499166488647
 1.2323105335235596 1.2201000452041626 1.2386353015899658
 1.2285948991775513 1.217734694480896]


In [57]:
test_df[src_keys].head()

Unnamed: 0,short_term_difference,medium_term_difference,long_term_difference,open_price,high_price,low_price,close_price,predicted_high_price,predicted_low_price,volume,...,unnamed:_13,unnamed:_14,unnamed:_15,medium_term_crossover,unnamed:_17,long_term_crossover,unnamed:_19,triple_cross,unnamed:_21,unnamed:_22
1,-0.002044,-0.000104,-0.000391,1.19495,1.19495,1.19495,1.19495,1.191601,1.191601,0.0,...,1.19508,-0.052571,-0.0104528,1.19497,1.19508,1.19468,1.19508,1.19319,1.19506,1.19482
2,0.004044,0.001034,-0.001153,1.20965,1.20965,1.20965,1.20965,1.211909,1.206795,0.0,...,1.19993,0.917568,0.406533,1.20097,1.19993,1.19878,1.19993,1.20132,1.19837,1.19631
3,0.006921,0.001307,-0.001653,1.21555,1.21555,1.21555,1.21555,1.2145,1.212397,0.0,...,1.20384,0.563195,0.32536,1.20514,1.20384,1.20218,1.20384,1.20773,1.20127,1.19783
4,0.008592,0.003142,-0.000585,1.21855,1.21855,1.21855,1.21855,1.218084,1.216402,0.0,...,1.20678,0.381044,0.244432,1.20992,1.20678,1.20619,1.20678,1.21271,1.20479,1.19976
5,0.002059,0.002758,0.000106,1.2115,1.2115,1.2115,1.2115,1.209945,1.209945,0.0,...,1.21004,-0.269272,0.270142,1.21032,1.20757,1.20767,1.20757,1.21158,1.20615,1.20099


array([[-0.002044081687927246, -0.00010442733764648438,
        -0.0003911256790161133, 1.194949984550476, 1.194949984550476,
        1.194949984550476, 1.194949984550476, 1.1916007995605469,
        1.1916007995605469, 0.0, nan, 1.1930309534072876,
        1.1950750350952148, -0.05257095769047737, -0.010452755726873875,
        1.1949706077575684, 1.1950750350952148, 1.1946839094161987,
        1.1950750350952148, 1.1931878328323364, 1.1950621604919434,
        1.1948195695877075],
       [0.0040444135665893555, 0.0010342597961425781,
        -0.001153111457824707, 1.2096500396728516, 1.2096500396728516,
        1.2096500396728516, 1.2096500396728516, 1.2119086980819702,
        1.206795334815979, 0.0, 'down', 1.2039778232574463,
        1.199933409690857, 0.9175679683685303, 0.40653300285339355,
        1.2009676694869995, 1.199933409690857, 1.1987802982330322,
        1.199933409690857, 1.2013195753097534, 1.198373556137085,
        1.1963144540786743],
       [0.006921052932739258,