In [45]:
import os
import pandas as pd
import numpy as np
from datetime import datetime, time, timedelta

path = 'data/stock_data.csv'

tick_df = pd.read_csv(path)

In [83]:

def pct_change(old_value, new_value):
    change = new_value - old_value
    percentage_change = (change / old_value)
    return percentage_change

class Trend(object):
    def __init__(self, direction, DC_start, DCC, OS_end, DC_start_index, DCC_index, OS_end_index, timestamp):
        self.direction, self.DC_start, self.DCC, self.OS_end = direction, DC_start, DCC, OS_end
        self.DC_start_index, self.DCC_index, self.OS_end_index = DC_start_index, DCC_index, OS_end_index
        self.timestamp = timestamp

        self.data_dict = {
                'Direction': self.direction,
                'Start': round(self.DC_start, 6),
                'DCC': round(self.DCC, 6),
                'End': round(self.OS_end, 6),
                'Start Index': round(self.DC_start_index, 6),
                'DCC Index': round(self.DCC_index, 6),
                'End Index': round(self.OS_end_index, 6),
                'DCC Timestamp': timestamp
            }

    def __str__(self):
        return str(self.data_dict)

In [95]:
def profile_data(df, theta):
    # direction: -1 is downturn, 1 is upturn
    starting_price = df['ALL'][0]
    starting_timestamp = df.index[0]
    # direction, DC_start, DCC, OS_end, DC_start_index, DCC_index, OS_end
    trend_buffer = [1, starting_price, starting_price, starting_price, 0, 0, 0, starting_timestamp]
    trends = []

    # direction, recent DCC, current price
    price_buffer = [1, starting_price, starting_price]
    live_states = []

    # iterate over midprices
    for index, midprice in enumerate(df['ALL'].values):

        # for upturn
        if trend_buffer[0] == 1:
            # threshold broken
            if pct_change(trend_buffer[3], midprice) < -trend_buffer[0] * theta:
                # log old event
                trends.append(Trend(*trend_buffer))
                # setup new event
                trend_buffer = [-1, trend_buffer[3], midprice, midprice, trend_buffer[6], index, index, df.index[index]]
                price_buffer = [-1, midprice, midprice]
            # new extreme
            elif midprice > trend_buffer[3]:
                trend_buffer[3], trend_buffer[6] = midprice, index

        # for downturn
        elif trend_buffer[0] == -1:
            # threshold broken
            if pct_change(trend_buffer[3], midprice) > -trend_buffer[0] * theta:
                # log old event
                trends.append(Trend(*trend_buffer))
                # setup new event
                trend_buffer = [1, trend_buffer[3], midprice, midprice, trend_buffer[6], index, index, df.index[index]]
                price_buffer = [1, midprice, midprice]
            # new extreme
            elif midprice < trend_buffer[3]:
                trend_buffer[3], trend_buffer[6] = midprice, index

        price_buffer[2] = midprice
        live_states.append(price_buffer.copy())
    return pd.DataFrame([trend.data_dict for trend in trends], columns=['Direction', 'Start','DCC','End','Start Index', 'DCC Index','End Index','DCC Timestamp']), pd.DataFrame(live_states, columns=['Direction', 'DCC', 'Price'])

In [96]:
def generate_data(df, thresholds):
    data_dict = {}
    trend_dict = {}
    
    for theta in thresholds:
        trend_df, live_df = profile_data(df, theta)
        data_dict[theta] = trend_df
        trend_dict[theta] = trend_df

    return data_dict, trend_dict

In [97]:
thresholds = (
        np.array([0.098, 0.22, 0.48, 0.72, 0.98, 1.22, 1.55, 1.70, 2, 2.55])
        / 100
    )

data_dict, trend_dict = generate_data(tick_df, thresholds)

In [104]:
data_dict[0.00098][:20]



Unnamed: 0,Direction,Start,DCC,End,Start Index,DCC Index,End Index,DCC Timestamp
0,1,20.831642,20.831642,21.015869,0,0,2,0
1,-1,21.015869,20.986393,20.757956,2,3,4,3
2,1,20.757956,21.074818,21.074818,4,5,5,5
3,-1,21.074818,20.83901,20.433729,5,6,7,6
4,1,20.433729,20.559004,21.096926,7,9,10,9
5,-1,21.096926,20.934811,20.934811,10,12,12,12
6,1,20.934811,21.16324,21.16324,12,13,13,13
7,-1,21.16324,20.861124,20.861124,13,14,14,14
8,1,20.861124,21.236933,22.49699,14,15,22,15
9,-1,22.49699,22.135931,22.135931,22,23,23,23


In [105]:
from helper.dc import calculate_dc, merge_dc_events, DCEvent

upturn_dc, downturn_dc, p_ext = calculate_dc(tick_df['ALL'], 0.098 / 100)
upturn_dc = [DCEvent(x[0], x[1], "UR") for x in upturn_dc]
downturn_dc = [DCEvent(x[0], x[1], "DR") for x in downturn_dc]
p_ext = [DCEvent(x[1], x[0], x[2]) for x in p_ext]
dc_data, p_ext_data = merge_dc_events(upturn_dc, downturn_dc, p_ext)


In [106]:
dc_data[:20]

Unnamed: 0,price,event
1,20.934811,UR
3,20.986393,DR
5,21.074818,UR
6,20.83901,DR
9,20.559004,UR
12,20.934811,DR
13,21.16324,UR
14,20.861124,DR
15,21.236933,UR
23,22.135931,DR
