# 1. SETTINGS

In [None]:
# libraries
import numpy as np
import pandas as pd
import datetime

In [None]:
# warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
# pandas options
pd.set_option("display.max_columns", None)

# 2. IMPORT

In [None]:
# import datasets
trade = pd.read_csv("../data/raw/Trade.csv")

In [None]:
# check trade
print("Trade data:", trade.shape)
trade.head()

# 3. PREPROCESSING

In [None]:
# create target variable
trade["CustomerInterest"] = 1
trade["CustomerInterest"][trade["TradeStatus"] == "Holding"] = 0

In [None]:
# deleting holding cases
trade = trade[trade["TradeStatus"] != "Holding"]

In [None]:
# convert dates
trade["TradeDateKey"] = pd.to_datetime(trade["TradeDateKey"], format = '%Y%m%d')

In [None]:
# add week index
trade["Week"] = (trade.TradeDateKey.dt.year - 2016) * 52 + (trade.TradeDateKey.dt.week)

In [None]:
# aggregate weekly data: target = 1 if there is at least one 1 during week
trade = trade.groupby(["CustomerIdx", "Week", "IsinIdx", "BuySell"], as_index = False).agg({'CustomerInterest': 'max',
                                                                                            'Price': 'mean',
                                                                                            'NotionalEUR': 'mean'})

In [None]:
# select features
trade = trade[["CustomerIdx", "IsinIdx", "Week", "BuySell", "Price", "NotionalEUR"]]

In [None]:
# export CSV
trade.to_csv("../data/prepared/price_notional.csv", index = False, compression = "gzip")