In [1]:
import pandas as pd
import os 
import numpy as np

In [9]:
def column_name_converter(string):
    return string.lower().replace("\n", "_").replace(" ", "_")

def change_column_names(df):
    column_name0 = list(df.columns)
    column_name1 =[column_name_converter(column) for column in column_name0]
    column_name2 = list(df.iloc[0])
    assert len(column_name1) == len(column_name2)
    print(column_name1)
    print(column_name2)
    print(column_name0)
    prefix = ''
    for i, name in enumerate(column_name2):
        if isinstance(name, str):
            if prefix != column_name1[i] and 'unnamed:' not in column_name1[i]:
                prefix = column_name1[i]
            new_name = prefix + "_" + column_name_converter(name)
            column_name2[i] = new_name
        else:
            column_name2[i] = None

    column_name3 = []
    for i, name in enumerate(column_name2):
        if name is not None:
            column_name3.append(name)
        else:
            column_name3.append(column_name1[i])            
    
    df = df.rename(columns={column_name0[i]: column_name3[i] for i in range(len(column_name1))})
    df = df.drop(0)
    df = df.sort_values(by='date').reset_index()
    del df['index']
    return df

def handle_neural_index(df):
    key = 'neural_index'
    if key in list(df.keys()):
        for i in df.index:
            df.loc[i, key] = 0.0 if df.loc[i, key] == 'down' else 1.0
    return df

def rule_closeup(df):
    """
        returns a DataFrame column that is true (1) if the current dates 
        close_price is higher than the close_price of the previous date
    """
    assert 'close_price' in list(df.columns)
    assert 'date' in list(df.columns)
    
    closeup = [None]
    for i in range(1, len(df.index)):
        previous_close_price = df.iloc[i-1]['close_price']
        current_close_price = df.iloc[i]['close_price']        
        closeup.append(int(current_close_price > previous_close_price))
    assert len(closeup) == len(df.index)
    return closeup

def df_to_src_tgt(config=None, df=None):
    src_keys = list(df.keys())
    
    tgt_keys = [config['tgt_key']]
    for key in tgt_keys + config['forbidden_keys']:
        if key in src_keys:
            src_keys.remove(key)
    src_pos_dict = {key: i for i, key in enumerate(src_keys)}
    src_data = df[src_keys].to_numpy()
    tgt_data = df[tgt_keys].to_numpy()
    return src_data, tgt_data, src_keys, tgt_keys, src_pos_dict

def create_input(config=None, src_data=None, src_keys=None, tgt_data=None, src_pos_dict=None):    
    assert src_data.shape[0] == tgt_data.shape[0]
    
    n_datapoints = src_data.shape[0]
    sequence_length = config['sequence_length']
    data = []
    close_price_idx = src_pos_dict['close_price']
    for i in range(sequence_length, n_datapoints, sequence_length):
        src = src_data[i-sequence_length:i]
        tgt = tgt_data[i]
        assert bool(tgt[0]) == (src[-1,close_price_idx] < src_data[i,close_price_idx])
        data.append((src, tgt))
    return data
    

In [10]:

config = {'bs': 8,
          'n_layers': 6,
          'd_model': 10,
          'nhead':16,
          'sequence_length': 10,
          'tgt_key': 'rule_closeup',
          'forbidden_keys': ['date',
                             'up']}

['date', 'short_term_difference', 'medium_term_difference', 'long_term_difference', 'open_price', 'high_price', 'low_price', 'close_price', 'predicted_high_price', 'predicted_low_price', 'volume', 'neural_index', 'short_term_crossover', 'unnamed:_13', 'unnamed:_14', 'unnamed:_15', 'medium_term_crossover', 'unnamed:_17', 'long_term_crossover', 'unnamed:_19', 'triple_cross', 'unnamed:_21', 'unnamed:_22']
[NaT, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 'Short Trend PMA', 'Short Trend AMA', 'PMA ROC%', 'AMA ROC%', 'Medium PMA', 'Medium AMA', 'Long PMA', 'Long AMA', 'Short', 'Medium', 'Long']
['Date', 'Short\nTerm\nDifference', 'Medium\nTerm\nDifference', 'Long\nTerm\nDifference', 'Open\nPrice', 'High\nPrice', 'Low\nPrice', 'Close\nPrice', 'Predicted\nHigh\nPrice', 'Predicted\nLow\nPrice', 'Volume', 'Neural\nIndex', 'Short Term Crossover', 'Unnamed: 13', 'Unnamed: 14', 'Unnamed: 15', 'Medium Term Crossover', 'Unnamed: 17', 'Long Term Crossover', 'Unnamed: 19', 'Triple Cross', '

In [18]:
df = pd.read_excel('res/Euro.xls')
df = change_column_names(df)
df = handle_neural_index(df)
df = pd.concat([df, pd.DataFrame(data=rule_closeup(df), columns=['rule_closeup'])], axis=1)
n_columns = len(df.index)
training_df = df.iloc[:int(n_columns*0.9)]
valid_df = df.iloc[int(n_columns*0.9):]
src_data, tgt_data, src_keys, tgt_keys, src_pos_dict = df_to_src_tgt(config=config, df=training_df)
data = create_input(config=config, src_data=src_data, tgt_data=tgt_data, src_pos_dict=src_pos_dict)

In [27]:
df.head()

Unnamed: 0,date,short_term_difference,medium_term_difference,long_term_difference,open_price,high_price,low_price,close_price,predicted_high_price,predicted_low_price,...,short_term_crossover_short_trend_ama,short_term_crossover_pma_roc%,short_term_crossover_ama_roc%,medium_term_crossover_medium_pma,medium_term_crossover_medium_ama,long_term_crossover_long_pma,long_term_crossover_long_ama,triple_cross_short,triple_cross_medium,triple_cross_long
0,2016-03-01,-0.001541,-2.3e-05,-0.000301,1.1952,1.1952,1.1952,1.1952,1.192888,1.192888,...,1.1952,0.0,0.0,1.19518,1.1952,1.1949,1.1952,1.19366,1.19518,1.1949
1,2016-03-02,-0.002044,-0.000104,-0.000391,1.19495,1.19495,1.19495,1.19495,1.191601,1.191601,...,1.19508,-0.052571,-0.0104528,1.19497,1.19508,1.19468,1.19508,1.19319,1.19506,1.19482
2,2016-03-03,0.004044,0.001034,-0.001153,1.20965,1.20965,1.20965,1.20965,1.211909,1.206795,...,1.19993,0.917568,0.406533,1.20097,1.19993,1.19878,1.19993,1.20132,1.19837,1.19631
3,2016-03-04,0.006921,0.001307,-0.001653,1.21555,1.21555,1.21555,1.21555,1.2145,1.212397,...,1.20384,0.563195,0.32536,1.20514,1.20384,1.20218,1.20384,1.20773,1.20127,1.19783
4,2016-03-07,0.008592,0.003142,-0.000585,1.21855,1.21855,1.21855,1.21855,1.218084,1.216402,...,1.20678,0.381044,0.244432,1.20992,1.20678,1.20619,1.20678,1.21271,1.20479,1.19976


In [14]:
src_keys

['short_term_difference',
 'medium_term_difference',
 'long_term_difference',
 'open_price',
 'high_price',
 'low_price',
 'close_price',
 'predicted_high_price',
 'predicted_low_price',
 'volume',
 'neural_index',
 'short_term_crossover',
 'unnamed:_13',
 'unnamed:_14',
 'unnamed:_15',
 'medium_term_crossover',
 'unnamed:_17',
 'long_term_crossover',
 'unnamed:_19',
 'triple_cross',
 'unnamed:_21',
 'unnamed:_22']