In [229]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px

In [230]:
def get_order_book(file_dir):
    '''
    Return the orderbook by reading the file directory provided
    Parameters
    ----------
    string : file_dir
        file directory containing the text tick data information
    '''

    # read the file
    file = pd.read_csv(
        file_dir,
        dtype="str",
        usecols = np.arange(8),
        header=None,
        names=["Date1", "Date2", "id", "Type", "Source", "Mix", "Value", "Quantity"]
    ).fillna(0)

    # specifies the type
    file = file.astype({
        "id": np.int64 or 0,
        "Mix": np.float64 or 0,
        "Value": np.float64 or 0,
        "Quantity": np.int64
    })

    # Distinguish trade book and price book
    file["Price"] = file["Value"]
    file.loc[file["Type"]=="T", "Quantity"]= file[file["Type"]=="T"]["Price"]
    file.loc[file["Type"]=="T", "Price"]= file[file["Type"]=="T"]["Mix"]


    file = file.drop(columns=["Date2", "Mix", "Value"])

    trade_book = file.loc[file["Type"] == "T"]
    # price_book = file.loc[file["Type"] == "P"]
    # price_book.loc[:, ("Date1")] = price_book.loc[:, ('Date1')].apply(lambda s : s.split(" ")[1].split(".")[0])
    trade_sorted = trade_book.sort_values('Date1')


    return trade_sorted

In [265]:
file = pd.read_csv(
        "k1_pnl.csv",
        dtype="str",
    ).fillna(0)
file["Cumulative PnL"] = file["Cumulative PnL"].astype(np.float32)
file.head()

Unnamed: 0,Name,Time,Cumulative PnL
0,kf3,2022-Jan-03 14:30:03.241744,0.0
1,kf3,2022-Jan-03 14:31:03.367597,-311.723999
2,kf3,2022-Jan-03 14:32:03.596173,269.3638
3,kf3,2022-Jan-03 14:33:03.686106,-255.246216
4,kf3,2022-Jan-03 14:34:03.743565,981.072205


In [266]:
date = file["Time"]
file["Date"] = date.apply(lambda s : s.split(" ")[0])
file["Clock"] = date.apply(lambda s : s.split(" ")[1])
file["Date"].head()

0    2022-Jan-03
1    2022-Jan-03
2    2022-Jan-03
3    2022-Jan-03
4    2022-Jan-03
Name: Date, dtype: object

In [267]:
date = file["Date"]
unique_date = list(dict.fromkeys(date))

In [274]:
def saving_time(date):
    winter = {"Jan", "Feb"}
    if date.split("-")[1] in winter:
        return True
    if date.split("-")[1] == "Mar" and int(date.split("-")[-1]) < 14:
        return True
    return False

In [288]:
fig = go.Figure()
fig.update_layout(
    title='Kalman Filter Strategy Studio Profit and Loss',
    xaxis_title='Date',
    yaxis_title='Dollar ($)')

counts = [0]
count = 0
date = ["Begin"]
for udate in unique_date:
    is_saving_time = saving_time(udate)
    date_data = file.loc[file["Date"] == udate]
    pnl = date_data["Cumulative PnL"]
    price_open = None
    price_close = None
    begin_hour = "13"
    end_hour = "19"

    begin_idx = -1
    end_idx = -1
    if is_saving_time:
        begin_hour = "14"
        end_hour = "20"
    clock = date_data["Clock"]
    for (i, t) in enumerate(clock):
        ts = t.split(":")
        if ts[0] == begin_hour and ts[1] == "31":
            price_open = pnl.iloc[i]
            begin_idx = i
            break
    for (i, t) in reversed(list(enumerate(clock))):
        ts = t.split(":")
        if ts[0] == end_hour and ts[1] == "59":
            price_close = pnl.iloc[i]
            end_idx = i
            break
    color = "indianred"
    if price_open is None or price_close is None:
        continue
    if price_close > price_open:
        color = "lightseagreen"
    count += price_close - price_open
    counts.append(count)
    date.append(udate)
    fig.add_trace(go.Box(y=pnl.iloc[begin_idx:end_idx] - pnl.min(),name=udate,marker_color=color))
fig.add_trace(go.Scatter(x=date, y=counts, line=dict(color='royalblue', width=4, dash='dot')))
fig.show()

In [236]:
# fig = go.Figure()
# for udate in unique_date:
#     date_data = file.loc[file["Date"] == udate]
#     pnl = date_data["Cumulative PnL"]
#     open_price = pnl.iloc[0]
#     closed_price = pnl.iloc[-1]

#     diff_price = closed_price - open_price
#     color = "indianred"
#     if closed_price > open_price:
#         color = "lightseagreen"
#     if np.abs(diff_price) > 1000000:
#         print("Date: ", udate)
#         date_clock = date_data["Clock"]
#         left = 0
#         right = len(date_clock) - 1
#         while (left < right):
#             med = (left + right) // 2
#             med_time = date_clock.iloc[med]
#             if (int(med_time.split(":")[0])) < 20:
#                 left = med + 1
#             elif (int(med_time.split(":")[0])) > 20:
#                 right = med - 1
#             else:
#                 prev_time = date_clock.iloc[med - 1]
#                 if (int(prev_time.split(":")[0])) != 20:
#                     price_20 = pnl.iloc[med]
#                     price_19 = pnl.iloc[med - 1]
#                     diff = np.float32(price_20) - np.float32(price_19)
#                     if diff > -500000:
#                         break
#                     print("Discovered a price drop of ", diff)
#                     bad_idx = pnl.index[med]
#                     file.loc[bad_idx:, "Cumulative PnL"] += np.abs(diff)
#                     date_data = file.loc[file["Date"] == udate]
#                     pnl = date_data["Cumulative PnL"]
#                     print("Modified price on date: ", udate)
#                     break
#                 else:
#                     right = med - 1
#         left = 0
#         right = len(date_clock) - 1
#         while (left <= right):
#             med = (left + right) // 2
#             med_time = date_clock.iloc[med]
#             if (int(med_time.split(":")[0])) < 21:
#                 left = med + 1
#             elif (int(med_time.split(":")[0])) > 21:
#                 right = med - 1
#             else:
#                 prev_time = date_clock.iloc[med - 1]
#                 if (int(prev_time.split(":")[0])) != 21:
#                     price_21 = pnl.iloc[med]
#                     price_20 = pnl.iloc[med - 1]
#                     diff = np.float32(price_21) - np.float32(price_20)
#                     if diff > -500000:
#                         break
#                     print("Discovered a price drop of ", diff)
#                     bad_idx = pnl.index[med]
#                     file.loc[bad_idx:, "Cumulative PnL"] += np.abs(diff)
#                     date_data = file.loc[file["Date"] == udate]
#                     pnl = date_data["Cumulative PnL"]
#                     print("Modified price on date: ", udate)
#                     break
#                 else:
#                     right = med - 1
#     open_price = pnl.iloc[0]
#     closed_price = pnl.iloc[-1]

#     diff_price = closed_price - open_price
#     color = "indianred"
#     if closed_price > open_price:
#         color = "lightseagreen"
#     fig.add_trace(go.Box(y=pnl,name=udate,marker_color=color))
# fig.show()