# 1. SETTINGS

In [None]:
# libraries
import numpy as np
import pandas as pd
from datetime import date

In [None]:
# warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
# plots
import matplotlib as plt
%matplotlib inline

In [None]:
# pandas options
pd.set_option("display.max_columns", None)

# 2. CUSTOMER DATA

In [None]:
# import
cust  = pd.read_csv("../data/raw/Customer.csv")

In [None]:
# sector labels
cust.loc[cust.Subsector == "Bank","Sector"] = "Bank"
cust.loc[cust.Subsector == "Broker Dealer","Sector"] = "Broker"
cust.loc[cust.Subsector == "Hedge Fund","Sector"] = "Hedgefund"
cust.loc[cust.Subsector == "Independent Asset Manager","Sector"] = "Independent"

In [None]:
# create dummies for customers
cust_dummies = pd.concat([cust.CustomerIdx, pd.get_dummies(cust.Sector), pd.get_dummies(cust.Region)], axis = 1)

In [None]:
# check
print(cust_dummies.shape)
cust_dummies.head()

In [None]:
# export
cust_dummies.to_csv("../data/prepared/data_cust_v1.csv", index = False, compression = "gzip")

# 3. BOND DATA

In [None]:
# import
bond  = pd.read_csv("../data/raw/Isin.csv")

In [None]:
# convert dates
bond["ActualMaturityDateKey"] = pd.to_datetime(bond["ActualMaturityDateKey"], format = '%Y%m%d')
bond["IssueDateKey"]          = pd.to_datetime(bond["IssueDateKey"], format = '%Y%m%d')

# convert to week
bond["MaturityWeek"] = (bond.ActualMaturityDateKey.dt.year - 2016) * 52 + (bond.ActualMaturityDateKey.dt.week)
bond["IssueWeek"]    = (bond.IssueDateKey.dt.year - 2016)  * 52 + (bond.IssueDateKey.dt.week)

In [None]:
# simple rating
bond["Rating"] = "NR"
bond["Rating"][bond.CompositeRating.isin(["A-", "A+", "A", "AA-", "AA+", "AA", "AAA-", "AAA+", "AAA"])] = "A"
bond["Rating"][bond.CompositeRating.isin(["B-", "B+", "B", "BB-", "BB+", "BB", "BBB-", "BBB+", "BBB"])] = "B"
bond["Rating"][bond.CompositeRating.isin(["C-", "C+", "C", "CC-", "CC+", "CC", "CCC-", "CCC+", "CCC"])] = "C"
bond["Rating"][bond.CompositeRating.isin(["D-", "D+", "D", "DD-", "DD+", "DD", "DDD-", "DDD+", "DDD"])] = "D"

In [None]:
# create bond dummies
bond_dummies = pd.concat([bond.IsinIdx,
                          bond.MaturityWeek,
                          bond.IssueWeek,
                          pd.get_dummies(bond.Seniority),
                          pd.get_dummies(bond.ActivityGroup), 
                          pd.get_dummies(bond.Rating),
                          pd.get_dummies(bond.CouponType)], axis = 1)

In [None]:
# check
print(bond_dummies.shape)
bond_dummies.head()

In [None]:
# export
bond_dummies.to_csv("../data/prepared/data_bond_v1.csv", index = False, compression = "gzip")

# 4. MACRO DATA

In [None]:
# import data
macro = pd.read_csv("../data/raw/MarketData_Macro.csv")

In [None]:
# fill missings
macro = macro.fillna(macro.shift(1)).fillna(macro.shift(2)).fillna(macro.shift(-1)).fillna(macro.shift(-2))

In [None]:
# convert dates
macro["DateKey"] = pd.to_datetime(macro["DateKey"], format = '%Y%m%d')

# compute week and month
macro["Week"]  = (macro.DateKey.dt.year - 2016) * 52 + (macro.DateKey.dt.week)
macro["Month"] = (macro.DateKey.dt.year - 2016) * 12 + (macro.DateKey.dt.month)
del macro["DateKey"]

In [None]:
# weekly and monthly aggregation
macro1 = macro.groupby(["Week"]).agg("mean")
macro2 = macro.groupby(["Month"]).agg("mean")

In [None]:
# percentage differences
macro_diff1 = ((macro1 - macro1.shift(1))/macro1).fillna(0)
macro_diff2 = ((macro2 - macro2.shift(1))/macro2).fillna(0)

In [None]:
# marlket indices
ind_trend1 = macro_diff1\
    .filter(["SSE","DAX","EUROSTOXX","VSTOXX","FTSE100","HSI","NIKKEI","DOWJONAES_INDU","SP500","VIX"])\
    .reset_index()
ind_trend2 = macro_diff2\
    .filter(["SSE","DAX","EUROSTOXX","VSTOXX","FTSE100","HSI","NIKKEI","DOWJONAES_INDU","SP500","VIX"])\
    .reset_index()

# variable names
ind_trend1.columns = ['Week', 'week_SSE', 'week_DAX', 'week_EUROSTOXX', 'week_VSTOXX', 
                      'week_FTSE100', 'week_HSI', 'week_NIKKEI', 'week_SP500', 'week_VIX']
ind_trend2.columns = ['Month', 'month_SSE', 'month_DAX', 'month_EUROSTOXX', 'month_VSTOXX', 
                      'month_FTSE100', 'month_HSI', 'month_NIKKEI', 'month_SP500', 'month_VIX']

In [None]:
### exchange rates (week)
fx_diff1 = macro_diff1.filter(like = "FX", axis = 1)
fx_diff1["USD"] = 1
fx_diff1 = fx_diff1.reset_index().melt(id_vars = "Week", var_name = "Currency", value_name = "cur_trend_week")
fx_diff1.Currency = fx_diff1.Currency.str[-3:]


### exchange rates (month)
fx_diff2 = macro_diff2.filter(like = "FX", axis = 1)
fx_diff2["USD"] = 1
fx_diff2 = fx_diff2.reset_index().melt(id_vars = "Month", var_name = "Currency", value_name = "cur_trend_month")
fx_diff2.Currency = fx_diff2.Currency.str[-3:]

In [None]:
# remove irrelevant weeks
ind_trend1 = ind_trend1[(ind_trend1.Week > 0)  & (ind_trend1.Week < 122)]
ind_trend2 = ind_trend2[(ind_trend2.Month > 0) & (ind_trend2.Month < 29)]
fx_diff1   = fx_diff1[(fx_diff1.Week > 0)      & (fx_diff1.Week < 122)]
fx_diff2   = fx_diff2[(fx_diff2.Month > 0)     & (fx_diff2.Month < 29)]

In [None]:
# check
print(ind_trend1.shape)
print(ind_trend2.shape)
print(fx_diff1.shape)
print(fx_diff2.shape)

In [None]:
# export
ind_trend1.to_csv("../data/prepared/data_ind1_v1.csv", index = False, compression = "gzip")
ind_trend2.to_csv("../data/prepared/data_ind2_v1.csv", index = False, compression = "gzip")
fx_diff1.to_csv("../data/prepared/data_fx1_v1.csv",    index = False, compression = "gzip")
fx_diff2.to_csv("../data/prepared/data_fx2_v1.csv",    index = False, compression = "gzip")

# 5. MARKET DATA

In [None]:
# load dataset
market  = pd.read_csv("../data/raw/Market.csv")

# convert dates
market["DateKey"] = pd.to_datetime(market["DateKey"], format = '%Y%m%d')

# add week index
market["Week"]  = (market.DateKey.dt.year - 2016) * 52 + (market.DateKey.dt.week)
market["Month"] = (market.DateKey.dt.year - 2016) * 12 + (market.DateKey.dt.month)
del market["DateKey"]

In [None]:
# compute weekly difference
tmp1 = market.groupby(["IsinIdx", "Week"]).agg(["mean"])
tmp1.columns = ["_diff_week_".join(col).strip() for col in tmp1.columns.values]
tmp1 = tmp1.groupby("IsinIdx").pct_change()
tmp1 = tmp1.reset_index()
tmp1["Week"] = tmp.Week + 1

# compute monthly difference
tmp2 = market.groupby(["IsinIdx", "Month"]).agg(["mean"])
tmp2.columns = ["_diff_month_".join(col).strip() for col in tmp2.columns.values]
tmp2 = tmp2.groupby("IsinIdx").pct_change()
tmp2 = tmp2.reset_index()

In [None]:
# merge data
market = market[["IsinIdx", "Week", "Month"]]
market = market.merge(tmp1, how = "left", on = ["IsinIdx", "Week"])
market = market.merge(tmp2, how = "left", on = ["IsinIdx", "Month"])
del market["Month"]

In [None]:
# check
print(market.shape)
market.head()

In [None]:
# save data
market.to_csv("../data/prepared/data_market_v1.csv", index = False, compression = "gzip")