# Convert trade log 
1. into basic pnl design matrix for modeling
1. Design matrix is one record per row


## imports

In [1]:
import pandas as pd
import numpy as np  # for np.nan
import glob  # for text matching
import os  # for path

import tradehelper as th  # local class

pd.set_option("display.max_rows", 20)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 150)

## read in raw data

In [2]:
# activity file csv export from Interactive Brokers, max 30 cols
globbed_files = glob.glob("data/*U106*.csv")
col_names_temp = list(range(30))
df_raw = pd.DataFrame(columns=col_names_temp)

# initial date
init_date = "2015-06-30"

for csv in globbed_files:
    frame = pd.read_csv(csv, names=col_names_temp)
    frame["filename"] = os.path.basename(csv)
    df_raw = df_raw.append(frame)

  df_raw = df_raw.append(frame)
  df_raw = df_raw.append(frame)


  df_raw = df_raw.append(frame)


  df_raw = df_raw.append(frame)


  df_raw = df_raw.append(frame)


  df_raw = df_raw.append(frame)


  df_raw = df_raw.append(frame)


  df_raw = df_raw.append(frame)


In [3]:
# Understand df_raw
df_raw.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,filename
0,Statement,Header,Field Name,Field Value,,,,,,,,,,,,,,,,,,,,,,,,,,,F1056881_U1060261_20150102_20151231_AS_Fv2_6ea...
1,Statement,Data,BrokerName,Interactive Brokers,,,,,,,,,,,,,,,,,,,,,,,,,,,F1056881_U1060261_20150102_20151231_AS_Fv2_6ea...
2,Statement,Data,BrokerAddress,,,,,,,,,,,,,,,,,,,,,,,,,,,,F1056881_U1060261_20150102_20151231_AS_Fv2_6ea...
3,Statement,Data,Title,Activity Statement,,,,,,,,,,,,,,,,,,,,,,,,,,,F1056881_U1060261_20150102_20151231_AS_Fv2_6ea...
4,Statement,Data,Period,"January 1, 2015 - December 31, 2015",,,,,,,,,,,,,,,,,,,,,,,,,,,F1056881_U1060261_20150102_20151231_AS_Fv2_6ea...


In [4]:
# See df_raw available data
df_raw[0].value_counts()[:10]

Borrow Fee Details                                                                                          8871
Trades                                                                                                      5809
IBKR Managed Securities Lent Activity (Stock Yield Enhancement Program)                                     5389
IBKR Managed Securities Lent Activity (Stock Yield Enhancement Program) (Post February 28, 2017)            3604
IBKR Managed Securities Lent Interest Details (Stock Yield Enhancement Program) (Post February 28, 2017)    2467
IBKR Managed Securities Lent Fee Details (Stock Yield Enhancement Program)                                  2305
Change in Dividend Accruals                                                                                 1682
IBKR Managed Securities Lent Interest Details (Stock Yield Enhancement Program)                             1542
Dividends                                                                                       

## Create trading list, after first activity file

In [5]:
df_trades = df_raw[df_raw[0] == "Trades"]
df_trades.columns = df_trades.iloc[0, :]
df_trades.columns = [*df_trades.columns[:-1], "filename"]
cols = df_trades.columns[~df_trades.columns.isin([np.nan])]
df_trades = df_trades[cols]
df_trades = df_trades[df_trades["Header"] == "Data"]
df_trades = df_trades[df_trades["filename"] != os.path.basename(globbed_files[0])]
df_trades.head()

Unnamed: 0,Trades,Header,DataDiscriminator,Asset Category,Currency,Symbol,Date/Time,Quantity,T. Price,C. Price,Proceeds,Comm/Fee,Basis,Realized P/L,MTM P/L,Code,filename
388,Trades,Data,Order,Stocks,USD,AAPL,"2016-01-11, 10:20:02",200,98.159,98.53,-19631.8,-1.0,19632.8,2566.569764,74.2,O,F1056881_U1060261_20160104_20161230_AS_Fv2_6b2...
389,Trades,Data,Order,Stocks,USD,AAPL,"2016-01-25, 09:44:44",150,100.4,99.44,-15060.0,-1.0,15061.0,0.0,-144.0,O;P,F1056881_U1060261_20160104_20161230_AS_Fv2_6b2...
390,Trades,Data,Order,Stocks,USD,AAPL,"2016-01-27, 12:22:10",-600,95.80105,93.42,57480.63,-3.457643592,-70599.8163,-13122.643944,1428.63,C;LI;P,F1056881_U1060261_20160104_20161230_AS_Fv2_6b2...
391,Trades,Data,Order,Stocks,USD,AAPL,"2016-01-29, 09:51:29",600,95.58,97.34,-57348.0,-3.0,57351.0,13122.643944,1056.0,O,F1056881_U1060261_20160104_20161230_AS_Fv2_6b2...
392,Trades,Data,Order,Stocks,USD,AAPL,"2016-02-11, 13:48:52",-600,93.17055,93.7,55902.33,-3.818670794,-70473.643944,-14575.132614,-317.67,C;LI;P,F1056881_U1060261_20160104_20161230_AS_Fv2_6b2...


## create initial portfolio based on first activity file, add port to trades

In [6]:
df_port_init = df_raw[df_raw[0] == "Open Positions"]
df_port_init.columns = df_port_init.iloc[0, :]
df_port_init = df_port_init[df_port_init["Header"] == "Data"]
df_port_init.columns = [*df_port_init.columns[:-1], "filename"]
cols = df_port_init.columns[~df_port_init.columns.isin([np.nan])]
df_port_init = df_port_init[cols]

df_port_init = df_port_init[
    df_port_init["filename"] == os.path.basename(globbed_files[0])
]

df_port_init.head()

# add to trades
df_port_init["Date/Time"] = init_date
df_port_init["T. Price"] = df_port_init["Cost Price"]

df_trades = pd.concat([df_port_init, df_trades])

In [7]:
# update data types for trades & fill nas

df_trades["Date/Time"] = pd.to_datetime(df_trades["Date/Time"], errors="coerce")
numeric_cols = [
    "T. Price",
    "Comm/Fee",
    "Quantity",
]  # T. Price for opening trade includes comm
for col in numeric_cols:
    df_trades[col] = (
        df_trades[col]
        .astype(str)
        .str.strip()
        .str.replace("$", "", regex=False)
        .str.replace(",", "", regex=False)
        .astype(float)
    )

df_trades["Comm/Fee"] = df_trades["Comm/Fee"].fillna(0)

# QA
df_trades.dtypes

Open Positions        object
Header                object
DataDiscriminator     object
Asset Category        object
Currency              object
                      ...   
Proceeds              object
Comm/Fee             float64
Basis                 object
Realized P/L          object
MTM P/L               object
Length: 24, dtype: object

In [8]:
# create trades action col and normalize quantity and add ratio for later
df_trades["Action"] = np.where(df_trades["Quantity"] > 0, "B", "S")
df_trades["Quantity"] = abs(df_trades["Quantity"])
df_trades["RatioNewOld"] = 1

## consider corporate actions

In [9]:
# pull corp actions
df_corpact = df_raw[df_raw[0] == "Corporate Actions"]
df_corpact.columns = df_corpact.iloc[0, :]  # col name is at top of block
df_corpact = df_corpact[df_corpact["Header"] == "Data"]
df_corpact.columns = [*df_corpact.columns[:-1], "filename"]
cols = df_corpact.columns[~df_corpact.columns.isin([np.nan])]
df_corpact = df_corpact[cols]

df_corpact = df_corpact[~df_corpact["Description"].isna()]  # remove na's

# add cols to match trades
df_corpact["Symbol"] = (
    df_corpact["Description"]
    .str.split("(", expand=True)[0]
    .str.split(".", expand=True)[0]
)
df_corpact["Action"] = "CA"
df_corpact["Date/Time"] = pd.to_datetime(df_corpact["Date/Time"], errors="coerce")
condlist = [df_corpact["Description"].str.contains("Split"), True]
choicelist = ["Split", ""]
df_corpact["ActionType"] = np.select(condlist, choicelist)

df_splits = df_corpact.loc[df_corpact["ActionType"] == "Split", :]

df_splits["RatioNewOld"] = 1
df_splits.loc[:, "RatioNewOld"] = (
    df_splits["Description"]
    .astype(str)
    .str.split(" for ", expand=True)[0]
    .str.split(" FOR ", expand=True)[0]
    .str.split(" ", expand=True)
    .iloc[:, -1]
    .str[0:2]
    .astype(float)
) / (
    df_splits["Description"]
    .astype(str)
    .str.split(" for ", expand=True)
    .iloc[:, -1]
    .str.split(" FOR ", expand=True)[0]
    .str.split(" ", expand=True)[0]
    .str[0:2]
    .astype(float)
)

# sort by time
df_trades = pd.concat([df_trades, df_splits]).sort_values("Date/Time", ascending=True)

# ratio
df_trades["RatioNewOld"] = df_trades["RatioNewOld"].fillna(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_splits["RatioNewOld"] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_splits.loc[:, "RatioNewOld"] = (


## create completed trade list

In [10]:
# QA
# df_trades = pd.read_csv('data-tests/tradelog2_corpact.csv')

tm = th.TradeManager(store_trades=True, print_trades=False)

tm.process_df(df_trades)

# list of trade objects
complete_trades = tm.get_copy_of_closed_trades()

# pushed to dataframe
df_complete_trades = pd.concat([x.to_df() for x in complete_trades]).reset_index(
    drop=True
)

tm.get_pnl()

475552.1800000005

In [11]:
# understand data types

df_complete_trades.dtypes

Open_Date      datetime64[ns]
Close_Date     datetime64[ns]
Symbol                 object
Quantity              float64
Pnl                   float64
OpenAct                object
CloseAct               object
Open_Price            float64
Close_Price           float64
Comm_Tot              float64
dtype: object

## save output

In [12]:
df_complete_trades.to_csv("output/a_completelog.csv")