 # add attributes to trade log
 Design matrix is one record per row

## imports

In [1]:
import pandas as pd
import numpy as np # for np.nan
import os # for path

## INPUT ###

In [2]:
# read in raw data


# formatted tradelog
trades_filename = 'output/a_completelog.csv'
df_complete_trades = pd.read_csv(trades_filename)

# attributes 1 from own log
attr_filename = 'data/PCM-Tracking - LogHist.csv'
df_raw_attr = pd.read_csv(attr_filename)
df_raw_attr['filename'] = os.path.basename(attr_filename)
df_raw_attr = df_raw_attr.append(df_raw_attr)

# attributes 2 
# TODO 

  df_raw_attr = df_raw_attr.append(df_raw_attr)


## ensure date time for open for complete trades

In [3]:
df_complete_trades['Open_Date'] = pd.to_datetime(df_complete_trades['Open_Date'], errors='coerce')

In [4]:
# check complete trades

df_complete_trades.dtypes

Unnamed: 0              int64
Open_Date      datetime64[ns]
Close_Date             object
Symbol                 object
Quantity              float64
Pnl                   float64
OpenAct                object
CloseAct               object
Open_Price            float64
Close_Price           float64
Comm_Tot              float64
dtype: object

## clean attribute columns

In [5]:
col_dict_attr = {
    'DATE' : 'DATE',
    'CONTRACT' : 'CONTRACT',
    'TIME':'TIME',
    'ACTION':'ACTION',
    'PRICE':'PRICE',
    'QTYCHG':'QTYCHG',
    'COMMISSION':'COMMISSION',
    
    'PCTRETURN': 'PCTRETURN',
}

df_clean_attr = df_raw_attr.copy(deep=True)
df_clean_attr.columns = pd.Series(df_clean_attr.columns.astype(str).str.upper().str.strip())
df_clean_attr.columns = pd.Series(df_clean_attr.columns).map(col_dict_attr)    .fillna(pd.Series(df_clean_attr.columns))

df_clean_attr['ACTION'] = df_clean_attr['ACTION'].astype(str).str.strip()

# pull out macro / non trades
df_macro = df_clean_attr[
    ~ df_clean_attr['ACTION'].astype(str).str.contains('BOT') & 
    ~ df_clean_attr['ACTION'].astype(str).str.contains('SLD') &
    ~ df_clean_attr['ACTION'].astype(str).str.contains('END')
]
                    
df_clean_attr = df_clean_attr[ 
    df_clean_attr['ACTION'].astype(str).str.contains('BOT') | 
    df_clean_attr['ACTION'].astype(str).str.contains('SLD') |
    df_clean_attr['ACTION'].astype(str).str.contains('END')
]

df_clean_attr.head()

Unnamed: 0,DATE,ACTION,CONTRACT,QTYCHG,PRICE,TIME,UNNAMED: 6,COMMISSION,UNNAMED: 8,CASH CHG (PNL),...,DAYSTOFYEND,FYEPSNXT,GROWTH*0.5TO0.75,ROIC (BW ROA ROE),TGT FWD P/E,YEARS TO NORMALIZATION,LASTUPDATED,CATEGORY,COMMENTS.1,FILENAME
4,5/30/2012,BOT,AIG,500,$29.70,1:30:19 PM,,2.5,,"($14,852.50)",...,,,,,,,,,,PCM-Tracking - LogHist.csv
5,5/30/2012,BOT,AAPL,12,$569.32,1:36:31 PM,,1.0,,"($6,832.84)",...,,,,,,,,,,PCM-Tracking - LogHist.csv
6,5/30/2012,SLD,KGC,76,$7.87,1:56:01 PM,,1.0,,$597.12,...,,,,,,,,,,PCM-Tracking - LogHist.csv
7,5/30/2012,SLD,KGC,100,$7.87,1:56:01 PM,,0.0,,$787.00,...,,,,,,,,,,PCM-Tracking - LogHist.csv
8,5/30/2012,SLD,KGC,224,$7.87,1:56:01 PM,,1.0,,"$1,761.88",...,,,,,,,,,,PCM-Tracking - LogHist.csv


In [6]:
# update data types for attr

df_clean_attr['DATE'] = pd.to_datetime(df_clean_attr['DATE'],errors='coerce') 
numeric_cols = ['PRICE','COMMISSION','QTYCHG']
for col in numeric_cols:
    df_clean_attr[col] = (
        df_clean_attr[col].astype(str).str.strip()
        .str.replace('$','').str.replace(',','').astype(float)
    )

# QA
df_clean_attr.dtypes   

  df_clean_attr[col].astype(str).str.strip()


DATE                                    datetime64[ns]
ACTION                                          object
CONTRACT                                        object
QTYCHG                                         float64
PRICE                                          float64
TIME                                            object
UNNAMED: 6                                      object
COMMISSION                                     float64
UNNAMED: 8                                      object
CASH CHG (PNL)                                  object
COMMENTS                                        object
PCTRETURN                                       object
DETAILS                                        float64
STARTDATE                                       object
COMPANY NAME (IN ALPHABETICAL ORDER)            object
TICKER                                          object
STOP                                            object
% TO STOP                                       object
CURRENT PR

## Create More Features

In [9]:
df_clean_attr['DayOfWeek0Mon'] = df_clean_attr['DATE'].dt.dayofweek

## merge attr to completed trades

In [10]:


df_complete_trades = df_complete_trades.sort_values(['Open_Date']) 
df_clean_attr = df_clean_attr.sort_values(['DATE'])

df_clean_attr = df_clean_attr.rename(columns={'CONTRACT':'Symbol'}) 

# get closeset match
df_comptrade_wattr = pd.merge_asof(
    df_complete_trades, df_clean_attr, by = 'Symbol', left_on=['Open_Date'], 
    right_on=['DATE'], suffixes=('','_a') 
)

In [11]:
# save output

df_comptrade_wattr.to_csv('output/b_completewattr.csv')