In [1]:
import os
import pandas as pd
import numpy as np

In [8]:
def get_ask_name(extract_name):
    extract_name[2] = "Ask"
    s = "_"
    s = s.join(extract_name)
    return s

def change_index_to_time(df):
    df['Time (UTC)'] = pd.to_datetime(df['Time (UTC)'])
    df.set_index('Time (UTC)', inplace = True, drop = True)

def merge_bid_ask(bid, ask):  
    change_index_to_time(bid)
    change_index_to_time(ask)
    result = pd.merge(bid, ask, how = 'inner', right_index = True, left_index = True, suffixes=('', '_spread'))
    
    for col in bid.columns:
        result[col + "_spread"] = result[col + "_spread"] - result[col] 

    return result

def add_label(bid_ask):
    #4h = 8, 12h = 24, 24h = 48 entries
    windows = [8, 24, 48]
    
    for window in windows :
        High_series = bid_ask["High"]
        Low_series = bid_ask["Low"]
                
        bid_ask["H-" + str(int(window/2)) + "hr"] = High_series[::-1].rolling(window = window, min_periods=0).max()[::-1]
        bid_ask["L-" + str(int(window/2)) + "hr"] = Low_series[::-1].rolling(window = window, min_periods=0).min()[::-1]
                
def add_fft(bid_ask):
    features = ["High", "Low", "Open", "Close"]
    components = [32, 128, 512]
    
    for feature in features:
        close_fft = np.fft.fft(np.array(bid_ask[feature]))
        fft_list = np.array(pd.DataFrame({'fft':close_fft}))
        for component in components:
            fft_list_m10 = np.copy(fft_list)
            fft_list_m10[component:-component] = 0
            bid_ask[feature + "_fft_" + str(component)] = np.fft.ifft(fft_list_m10)

def create_data(dir_name):
    for asset_class in os.listdir(dir_name):
        for asset in os.listdir(dir_name +  "/" + asset_class):    
            extract_name = asset.split("_")
            if extract_name[2] == "Bid":
                bid = pd.read_csv(dir_name + "/" + asset_class + "/" + asset)
                ask = pd.read_csv(dir_name + "/" + asset_class + "/" + get_ask_name(extract_name))
                bid_ask = merge_bid_ask(bid, ask)
                add_fft(bid_ask)
                add_label(bid_ask)
                bid_ask.to_csv(dir_name + "/" + asset_class + "/" + extract_name[0] + ".csv")
                os.remove(dir_name + "/" + asset_class + "/" + asset)
                os.remove(dir_name + "/" + asset_class + "/" + get_ask_name(extract_name)) 

In [9]:
dir_name = "Data"
create_data(dir_name)