  ## Convert trade log into basic pnl design matrix for modeling
  Design matrix is one record per row

In [1]:
# imports

import pandas as pd
import numpy as np # for np.nan
import glob # for text matching
import os # for path

import tradehelper as th # local class


In [2]:
# read in raw data
### INPUT ###

# activity file csv export from Interactive Brokers, max 30 cols
globbed_files = glob.glob('data/*U106*.csv') 
col_names_temp = list(range(30))
df_raw = pd.DataFrame(columns = col_names_temp)

# initial date
init_date = '2015-06-30'

for csv in globbed_files:
    frame = pd.read_csv(csv, names=col_names_temp)
    frame['filename'] = os.path.basename(csv)
    df_raw = df_raw.append(frame)




In [3]:
# Understand df_raw
df_raw.head()



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,filename
0,Statement,Header,Field Name,Field Value,,,,,,,...,,,,,,,,,,F1056881_U1060261_20150102_20151231_AS_Fv2_6ea...
1,Statement,Data,BrokerName,Interactive Brokers,,,,,,,...,,,,,,,,,,F1056881_U1060261_20150102_20151231_AS_Fv2_6ea...
2,Statement,Data,BrokerAddress,,,,,,,,...,,,,,,,,,,F1056881_U1060261_20150102_20151231_AS_Fv2_6ea...
3,Statement,Data,Title,Activity Statement,,,,,,,...,,,,,,,,,,F1056881_U1060261_20150102_20151231_AS_Fv2_6ea...
4,Statement,Data,Period,"January 1, 2015 - December 31, 2015",,,,,,,...,,,,,,,,,,F1056881_U1060261_20150102_20151231_AS_Fv2_6ea...


In [4]:
# See df_raw available data
df_raw[0].value_counts()[:10]





Borrow Fee Details                                                                                          6089
IBKR Managed Securities Lent Activity (Stock Yield Enhancement Program)                                     5037
Trades                                                                                                      4239
IBKR Managed Securities Lent Activity (Stock Yield Enhancement Program) (Post February 28, 2017)            3604
IBKR Managed Securities Lent Interest Details (Stock Yield Enhancement Program) (Post February 28, 2017)    2467
IBKR Managed Securities Lent Fee Details (Stock Yield Enhancement Program)                                  2305
IBKR Managed Securities Lent Interest Details (Stock Yield Enhancement Program)                             1253
Change in Dividend Accruals                                                                                 1170
Dividends                                                                                       

In [5]:
# Create trading list, after first activity file
df_trades = df_raw[df_raw[0]=='Trades']
df_trades.columns  = df_trades.iloc[0,:]
df_trades.columns = [*df_trades.columns[:-1], 'filename']
cols = df_trades.columns[~df_trades.columns.isin([np.nan])]
df_trades = df_trades[cols]
df_trades = df_trades[df_trades['Header'] == 'Data']
df_trades = df_trades[df_trades['filename'] != os.path.basename(globbed_files[0])]
df_trades.head()



Unnamed: 0,Trades,Header,DataDiscriminator,Asset Category,Currency,Symbol,Date/Time,Quantity,T. Price,C. Price,Proceeds,Comm/Fee,Basis,Realized P/L,MTM P/L,Code,filename
388,Trades,Data,Order,Stocks,USD,AAPL,"2016-01-11, 10:20:02",200,98.159,98.53,-19631.8,-1.0,19632.8,2566.569764,74.2,O,F1056881_U1060261_20160104_20161230_AS_Fv2_6b2...
389,Trades,Data,Order,Stocks,USD,AAPL,"2016-01-25, 09:44:44",150,100.4,99.44,-15060.0,-1.0,15061.0,0.0,-144.0,O;P,F1056881_U1060261_20160104_20161230_AS_Fv2_6b2...
390,Trades,Data,Order,Stocks,USD,AAPL,"2016-01-27, 12:22:10",-600,95.80105,93.42,57480.63,-3.457643592,-70599.8163,-13122.643944,1428.63,C;LI;P,F1056881_U1060261_20160104_20161230_AS_Fv2_6b2...
391,Trades,Data,Order,Stocks,USD,AAPL,"2016-01-29, 09:51:29",600,95.58,97.34,-57348.0,-3.0,57351.0,13122.643944,1056.0,O,F1056881_U1060261_20160104_20161230_AS_Fv2_6b2...
392,Trades,Data,Order,Stocks,USD,AAPL,"2016-02-11, 13:48:52",-600,93.17055,93.7,55902.33,-3.818670794,-70473.643944,-14575.132614,-317.67,C;LI;P,F1056881_U1060261_20160104_20161230_AS_Fv2_6b2...


In [6]:
# create initial portfolio based on first activity file, add port to trades
df_port_init = df_raw[df_raw[0]=='Open Positions']
df_port_init.columns  = df_port_init.iloc[0,:]
df_port_init = df_port_init[df_port_init['Header'] == 'Data']
df_port_init.columns = [*df_port_init.columns[:-1], 'filename']
cols = df_port_init.columns[~df_port_init.columns.isin([np.nan])]
df_port_init = df_port_init[cols]

df_port_init = df_port_init[df_port_init['filename'] == os.path.basename(globbed_files[0])]

df_port_init.head()

# add to trades
df_port_init['Date/Time'] = '2015-06-30'
df_port_init['T. Price'] = df_port_init['Cost Price']

df_trades = pd.concat([df_port_init, df_trades])



In [7]:
# update data types for trades & fill nas

df_trades['Date/Time'] = pd.to_datetime(df_trades['Date/Time'],errors='coerce') 
numeric_cols = ['T. Price','Comm/Fee','Quantity']
for col in numeric_cols:
    df_trades[col] = (df_trades[col].astype(str).str.strip()
        .str.replace('$','').str.replace(',','').astype(float)
        )
    
df_trades['Comm/Fee'] = df_trades['Comm/Fee'].fillna(0) 
    
# QA
df_trades.dtypes  


Open Positions               object
Header                       object
DataDiscriminator            object
Asset Category               object
Currency                     object
Symbol                       object
Quantity                    float64
Mult                         object
Cost Price                   object
Cost Basis                   object
Close Price                  object
Value                        object
Unrealized P/L               object
Code                         object
filename                     object
Date/Time            datetime64[ns]
T. Price                    float64
Trades                       object
C. Price                     object
Proceeds                     object
Comm/Fee                    float64
Basis                        object
Realized P/L                 object
MTM P/L                      object
dtype: object

In [8]:
# create trades action col and normalize quantity
df_trades['Action'] = np.where(df_trades['Quantity'] > 0, 'B', 'S')
df_trades['Quantity'] = abs(df_trades['Quantity'])



In [9]:
# create completed trade list

tm = th.TradeManager(store_trades=True, print_trades=False)

tm.process_df(df_trades)

# list of trade objects
complete_trades = tm.get_copy_of_closed_trades() 

# pushed to dataframe
df_complete_trades = pd.concat([x.to_df() for x in complete_trades]).reset_index(drop=True)

tm.get_pnl()


106959.93999999999

In [10]:
# understand data types

df_complete_trades.dtypes


Open_Date      datetime64[ns]
Close_Date     datetime64[ns]
Symbol                 object
Quantity                int64
Pnl                   float64
OpenAct                object
CloseAct               object
Open_Price            float64
Close_Price           float64
Comm_Tot              float64
dtype: object

In [11]:
# save output

df_complete_trades.to_csv('output/a_completelog.csv')

