# 1. SETTINGS

In [2]:
# libraries
import numpy as np
import pandas as pd
import seaborn as sns
from datetime import date
import matplotlib.pyplot as plt
from IPython.display import display
import scipy.stats

In [3]:
# pandas options
pd.set_option("display.max_columns", None)

In [4]:
# ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [5]:
# garbage collection
import gc
gc.enable()

# 2. FUNCTIONS

In [6]:
### FUNCTION FOR COUNTING MISSINGS
def count_missings(data):
    total = data.isnull().sum().sort_values(ascending = False)
    percent = (data.isnull().sum() / data.isnull().count() * 100).sort_values(ascending = False)
    table = pd.concat([total, percent], axis = 1, keys = ["Total", "Percent"])
    table = table[table["Total"] > 0]
    return table

# 3. DATA IMPORT

In [7]:
# import datasets
test  = pd.read_csv("../data/raw/Challenge_20180423.csv")
cust  = pd.read_csv("../data/raw/Customer.csv")
bond  = pd.read_csv("../data/raw/Isin.csv")
markt = pd.read_csv("../data/raw/Market.csv")
macro = pd.read_csv("../data/raw/MarketData_Macro.csv")
trade = pd.read_csv("../data/raw/Trade.csv")

In [43]:
# check all datasets
display(test.head(3))
print("Test data:", test.shape)
print("------------------------------")
display(cust.head(3))
print("Customer data:", cust.shape)
print("------------------------------")
display(bond.head(3))
print("Bonds data:", bond.shape)
print("------------------------------")
display(markt.head(3))
print("Market data:", markt.shape)
print("------------------------------")
display(macro.head(3))
print("Macroeconomic data:", macro.shape)
print("------------------------------")
display(trade.head(3))
print("Trade data:", trade.shape)

Unnamed: 0,PredictionIdx,DateKey,CustomerIdx,IsinIdx,BuySell,CustomerInterest
0,a1e0d80784,20180423,1856,13323,Buy,
1,c2cc6cc2a8,20180423,1856,9230,Buy,
2,a8e94f6344,20180423,1780,9157,Buy,


Test data: (484758, 6)
------------------------------


Unnamed: 0,CustomerIdx,Sector,Subsector,Region,Country
0,2975,Banks and Intermediaries,Bank,Americas,BARBADOS
1,1594,Asset Managers & Hedge Funds,,Americas,BERMUDA
2,399,Corporation,Corp - Comm. & Prof. Services,Americas,BERMUDA


Customer data: (3471, 5)
------------------------------


Unnamed: 0,IsinIdx,TickerIdx,ActualMaturityDateKey,IssueDateKey,Seniority,Currency,ActivityGroup,Region,Activity,RiskCaptain,Owner,CompositeRating,IndustrySector,IndustrySubgroup,MarketIssue,IssuedAmount,CouponType
0,0,238,20381231,20051129,GOV,USD,FLOW LOCAL MARKET,AMERICAS,ARGENTINA,ARGENTINA,EMK ARGENTINA,NR,Government,Sovereign,Domestic,1246002000.0,STEP CPN
1,1,238,20331231,20051129,GOV,USD,FLOW LOCAL MARKET,AMERICAS,ARGENTINA,ARGENTINA,EMK ARGENTINA,NR,Government,Sovereign,Domestic,4901086000.0,FIXED
2,2,238,20331231,20051129,GOV,ARS,FLOW LOCAL MARKET,AMERICAS,ARGENTINA,ARGENTINA,EMK ARGENTINA,NR,Government,Sovereign,Domestic,15012450000.0,FIXED


Bonds data: (27411, 17)
------------------------------


Unnamed: 0,IsinIdx,DateKey,Price,Yield,ZSpread
0,1,20160101,104.25,7.835,5.505
1,7,20160101,107.5,7.52,5.541
2,102,20160101,100.746,4.048,2.085


Market data: (9867747, 5)
------------------------------


Unnamed: 0,DateKey,SSE,DAX,EUROSTOXX,VSTOXX,FTSE100,HSI,NIKKEI,DOWJONES_INDU,SP500,VIX,FX_USD.ARS,FX_USD.AUD,FX_USD.BRL,FX_USD.CAD,FX_USD.CHF,FX_USD.CNO,FX_USD.CNY,FX_USD.EUR,FX_USD.GBP,FX_USD.HKD,FX_USD.IDR,FX_USD.JPY,FX_USD.NOK,FX_USD.SGD,FX_USD.TRY,FX_USD.ZAR,MoneyMarket_ARS3M,MoneyMarket_AUD3M,MoneyMarket_CAD3M,MoneyMarket_CHF3M,MoneyMarket_CNO3M,MoneyMarket_CNY3M,MoneyMarket_EUR3M,MoneyMarket_GBP3M,MoneyMarket_HKD3M,MoneyMarket_IDR3M,MoneyMarket_JPY3M,MoneyMarket_NOK3M,MoneyMarket_SGD3M,MoneyMarket_TRY3M,MoneyMarket_USD3M,MoneyMarket_ZAR3M,Swap_ARS10Y,Swap_ARS2Y,Swap_ARS5Y,Swap_AUD10Y,Swap_AUD2Y,Swap_AUD30Y,Swap_AUD5Y,Swap_BRL10Y,Swap_BRL2Y,Swap_BRL5Y,Swap_CAD10Y,Swap_CAD2Y,Swap_CAD30Y,Swap_CAD5Y,Swap_CHF10Y,Swap_CHF2Y,Swap_CHF30Y,Swap_CHF5Y,Swap_CNH10Y,Swap_CNH2Y,Swap_CNH30Y,Swap_CNH5Y,Swap_CNO10Y,Swap_CNO2Y,Swap_CNO30Y,Swap_CNO5Y,Swap_CNY10Y,Swap_CNY2Y,Swap_CNY30Y,Swap_CNY5Y,Swap_EUR10Y,Swap_EUR2Y,Swap_EUR30Y,Swap_EUR5Y,Swap_GBP10Y,Swap_GBP2Y,Swap_GBP30Y,Swap_GBP5Y,Swap_HKD10Y,Swap_HKD2Y,Swap_HKD30Y,Swap_HKD5Y,Swap_IDR10Y,Swap_IDR2Y,Swap_IDR30Y,Swap_IDR5Y,Swap_JPY10Y,Swap_JPY2Y,Swap_JPY30Y,Swap_JPY5Y,Swap_NOK10Y,Swap_NOK2Y,Swap_NOK30Y,Swap_NOK5Y,Swap_SGD10Y,Swap_SGD2Y,Swap_SGD30Y,Swap_SGD5Y,Swap_TRY10Y,Swap_TRY2Y,Swap_TRY5Y,Swap_USD10Y,Swap_USD2Y,Swap_USD30Y,Swap_USD5Y,Swap_ZAR10Y,Swap_ZAR2Y,Swap_ZAR30Y,Swap_ZAR5Y
0,20150101,,,,,,,,,,19.2,0.116959,0.8162,0.377658,0.86103,1.006264,0.161512,0.163399,1.2101,1.55775,0.128941,8.1e-05,0.008341,0.133451,0.756287,0.428964,0.086457,35.230452,2.72454,1.303309,-0.063143,3.701588,3.617925,0.077487,0.561227,0.381829,7.17183,0.112078,1.459648,0.709986,10.001685,0.25657,6.130798,19.989643,29.980614,21.985733,3.04115,2.39895,3.625076,2.558248,,,,2.277279,,2.760043,1.775859,0.409878,,1.082563,-0.046046,3.749868,3.418686,,3.549519,3.750081,3.419989,,3.550031,3.439794,3.218679,,3.239508,0.697389,,1.408397,0.233572,,,,,2.252274,0.95467,,1.814259,8.955845,8.368177,9.552123,8.708471,0.444559,,1.233032,0.180984,1.806932,,2.179804,1.295793,,,,,9.885583,10.069276,9.91512,2.295037,,2.712112,1.774452,7.963276,6.856526,8.331874,7.403867
1,20150102,,9764.73,3139.44,26.2531,6547.8,23857.82,,17832.99,2058.2,17.79,0.116891,0.8114,0.371292,0.851136,0.998851,0.161186,0.163292,1.2008,1.5334,0.128934,8e-05,0.008308,0.131841,0.752757,0.427241,0.085609,33.0,2.736281,1.30285,-0.063,3.701588,3.617925,0.077179,0.56338,0.385,7.1,0.112078,1.49,0.79,10.02,0.255218,6.125,20.0,30.0,22.0,3.108394,2.452905,3.692179,2.625847,11.76786,12.807338,12.202677,2.231223,1.445105,2.716551,1.748069,0.367391,,1.040147,-0.058452,3.750081,3.419989,,3.550031,3.750081,3.419989,,3.550031,3.439794,3.218679,,3.239508,0.640191,0.06212,1.347191,0.211268,1.67489,0.810358,2.109754,1.305976,2.29,0.975,2.75196,1.84,8.95,8.35,9.55,8.7,0.444559,,1.233032,0.180984,1.794443,1.10022,2.168744,1.280511,2.447394,1.132505,2.9519,1.948474,9.94626,10.151881,9.976101,2.23691,0.893242,2.647499,1.746344,7.94,6.84,8.31,7.39
2,20150105,3350.519,9473.16,3023.14,29.6236,6417.16,23721.32,17408.71,17501.65,2020.58,19.92,0.116782,0.80905,0.366757,0.850268,0.993764,0.160822,0.163265,1.1939,1.52555,0.128926,7.9e-05,0.008366,0.131277,0.74926,0.429304,0.085426,32.0,2.742523,1.303171,-0.063,3.749732,3.691439,0.075252,0.56338,0.38357,7.1,0.11214,1.45,0.875,9.9934,0.254194,6.125,20.0,30.0,22.0,3.007227,2.423082,3.591836,2.566016,11.809585,12.736379,12.154331,2.180266,1.420899,2.668708,1.713829,0.350011,-0.263049,0.981356,-0.069792,3.70001,3.40507,,3.500016,3.70001,3.40507,,3.500016,3.45001,3.23007,,3.250016,0.658914,0.063256,1.351414,0.220293,1.613752,0.799833,2.043961,1.251222,2.19,0.94,2.6528,1.75,8.95,8.35,9.55,8.7,0.436972,0.112315,1.225011,0.178599,1.747772,1.063624,2.122179,1.238903,2.417213,1.152474,2.92079,1.923255,9.564968,9.720274,9.575104,2.148063,0.891052,2.549802,1.695687,7.84,6.78,8.21,7.3


Macroeconomic data: (877, 112)
------------------------------


Unnamed: 0,TradeDateKey,CustomerIdx,IsinIdx,BuySell,NotionalEUR,Price,TradeStatus,CustomerInterest
0,20161207,2789,8478,Sell,653168.0,0.0,Unknown,1.0
1,20170329,2574,14562,Buy,1656487.0,0.0,Unknown,1.0
2,20170418,2574,4747,Buy,939673.0,0.0,Unknown,1.0


Trade data: (6762021, 8)


# 4. PREPROCESSING

## 4.1. TRADE DATA

In [44]:
# check missings
count_missings(trade)

Unnamed: 0,Total,Percent
Price,4617933,68.292201


In [45]:
trade.describe()

Unnamed: 0,TradeDateKey,CustomerIdx,IsinIdx,NotionalEUR,Price,CustomerInterest
count,6762021.0,6762021.0,6762021.0,6762021.0,2144088.0,6762021.0
mean,20167500.0,1922.354,14603.69,6313228.0,177900.8,0.3271106
std,6768.082,857.9293,7963.809,271240800.0,1318503.0,0.4691581
min,20160100.0,0.0,0.0,-148554700.0,-999999.0,0.0
25%,20160630.0,1288.0,7392.0,341098.0,93.1378,0.0
50%,20170230.0,2090.0,15229.0,881212.0,101.0,0.0
75%,20170930.0,2574.0,22119.0,2136842.0,107.5,1.0
max,20180420.0,3470.0,27394.0,200000000000.0,9999999.0,1.0


In [10]:
trade["CustomerInterest"] = 1
trade["CustomerInterest"][trade["TradeStatus"] == "Holding"] = 0

In [11]:
# convert dates
trade["TradeDateKey"] = pd.to_datetime(trade["TradeDateKey"], format = '%Y%m%d')

In [12]:
### add week index

# function
def week_lag_idx(date, end_date):
    return round((end_date - date).dt.days / 7 + 0.4)

# week index
trade["WeekLag"] = week_lag_idx(trade["TradeDateKey"], date(2018, 4, 23))
test["WeekLag"] = 0

In [39]:
# aggregate weekly data
vars = ["WeekLag", "CustomerIdx", "IsinIdx", "BuySell", "CustomerInterest"]
train = trade[vars].groupby(["WeekLag", "CustomerIdx", "IsinIdx", "BuySell"], as_index = False).max()

The number of observations and bonds per customer is different in trade and test data. Trade data only contains bonds that a given customer has actually traded. In test data, for each customer, the set of bonds is only a subset of the bonds that he actually traded in the past (but not the whole set, which leads to a smaller number of observations per customer in the test data). Also, the test set is biased towards very active traders.

In [50]:
# check number of cases per customer
display(trade.groupby("CustomerIdx").IsinIdx.nunique().describe())
print("------------------------------")
display(test.groupby("CustomerIdx").IsinIdx.nunique().describe())

count     3439.000000
mean       405.832800
std       1145.875089
min          1.000000
25%          5.000000
50%         33.000000
75%        233.500000
max      14132.000000
Name: IsinIdx, dtype: float64

------------------------------


count    2495.000000
mean       97.145892
std       301.375388
min         1.000000
25%         3.000000
50%        12.000000
75%        58.000000
max      5044.000000
Name: IsinIdx, dtype: float64

In [None]:
### ADDING MISSING COMBINATIONS

# extract all unique values
#l1 = list(trade.TradeDateKey.unique())
#l2 = list(trade.CustomerIdx.unique())
#l3 = list(trade.IsinIdx.unique())
#l4 = list(trade.BuySell.unique())

# create lists with combinations
#lp1, lp2, lp3, lp4 = pd.core.reshape.util.cartesian_product([l1, l2, l3, l4])

# convert to dataframe
#combs = pd.DataFrame(dict(TradeDateKey = lp1, CustomerIdx = lp2, IsinIdx = lp3, BuySell = lp4))

# merge missing combinations
#print(trade.shape)
#trade = trade.merge(combs, how = "right")
#print(trade.shape)

## 4.2. CUSTOMER DATA

TODO: Solve the issue of biased sampling due to the mandatory reporting before trusting the statistics!

There are 3471 unique customers in the training data of which 2495 show up in the test set. There are no unknown customers in the test set.

In [14]:
cust.CustomerIdx.nunique()

3471

In [15]:
# Compare number of test customers showing up in cust set to number of unique customers in test set
np.sum(np.in1d(test.CustomerIdx.unique(), cust.CustomerIdx.unique()))/test.CustomerIdx.nunique()

1.0

In [13]:
np.percentile(trade.groupby("CustomerIdx").size(), range(0,100,10))

array([1.0000e+00, 2.0000e+00, 5.0000e+00, 1.1000e+01, 2.5000e+01,
       5.5000e+01, 1.3600e+02, 3.6060e+02, 9.6940e+02, 3.9168e+03])

Customer information:
- 5 different sectors
- 41 subsectors
- 3 regions
- 99 countries

We can include sectors and regions directly, but may want to reduce the dimension of subsectors and countries

In [26]:
cust.columns

Index(['CustomerIdx', 'Sector', 'Subsector', 'Region', 'Country'], dtype='object')

In [28]:
cust_dummies = pd.concat([cust.CustomerIdx, pd.get_dummies(cust.Sector), pd.get_dummies(cust.Region)], axis=1)

In [30]:
cust_dummies.describe()

Unnamed: 0,CustomerIdx,Asset Managers & Hedge Funds,Asset Owners,Banks and Intermediaries,Corporation,Official Institution - OI,Americas,Asia Pacific,"Europe, Middle East and Africa"
count,3471.0,3471.0,3471.0,3471.0,3471.0,3471.0,3471.0,3471.0,3471.0
mean,1735.0,0.431288,0.096226,0.414002,0.018438,0.040046,0.252089,0.222126,0.525785
std,1002.135719,0.495327,0.294943,0.49262,0.13455,0.196096,0.434274,0.415735,0.499407
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,867.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1735.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
75%,2602.5,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0
max,3470.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


Most customers are asset managers or banks/intermediaries (possibly high volume groups?). Distribution over Americas, Europe/Africa, Asia is 53/25/22

In [40]:
train = train.merge(cust_dummies, on = "CustomerIdx", how='left')
test = test.merge(cust_dummies, on = "CustomerIdx", how='left')

In [41]:
train.columns

Index(['WeekLag', 'CustomerIdx', 'IsinIdx', 'BuySell', 'CustomerInterest',
       'Asset Managers & Hedge Funds', 'Asset Owners',
       'Banks and Intermediaries', 'Corporation', 'Official Institution - OI',
       'Americas', 'Asia Pacific', 'Europe, Middle East and Africa'],
      dtype='object')

In [42]:
train[['Asset Managers & Hedge Funds', 'Asset Owners',
       'Banks and Intermediaries', 'Corporation', 'Official Institution - OI']].mean()

Asset Managers & Hedge Funds    0.735066
Asset Owners                    0.056058
Banks and Intermediaries        0.199650
Corporation                     0.001259
Official Institution - OI       0.007966
dtype: float64

## 4.3. MACROECONOMIC DATA

In [46]:
macro.describe()

Unnamed: 0,DateKey,SSE,DAX,EUROSTOXX,VSTOXX,FTSE100,HSI,NIKKEI,DOWJONES_INDU,SP500,VIX,FX_USD.ARS,FX_USD.AUD,FX_USD.BRL,FX_USD.CAD,FX_USD.CHF,FX_USD.CNO,FX_USD.CNY,FX_USD.EUR,FX_USD.GBP,FX_USD.HKD,FX_USD.IDR,FX_USD.JPY,FX_USD.NOK,FX_USD.SGD,FX_USD.TRY,FX_USD.ZAR,MoneyMarket_ARS3M,MoneyMarket_AUD3M,MoneyMarket_CAD3M,MoneyMarket_CHF3M,MoneyMarket_CNO3M,MoneyMarket_CNY3M,MoneyMarket_EUR3M,MoneyMarket_GBP3M,MoneyMarket_HKD3M,MoneyMarket_IDR3M,MoneyMarket_JPY3M,MoneyMarket_NOK3M,MoneyMarket_SGD3M,MoneyMarket_TRY3M,MoneyMarket_USD3M,MoneyMarket_ZAR3M,Swap_ARS10Y,Swap_ARS2Y,Swap_ARS5Y,Swap_AUD10Y,Swap_AUD2Y,Swap_AUD30Y,Swap_AUD5Y,Swap_BRL10Y,Swap_BRL2Y,Swap_BRL5Y,Swap_CAD10Y,Swap_CAD2Y,Swap_CAD30Y,Swap_CAD5Y,Swap_CHF10Y,Swap_CHF2Y,Swap_CHF30Y,Swap_CHF5Y,Swap_CNH10Y,Swap_CNH2Y,Swap_CNH30Y,Swap_CNH5Y,Swap_CNO10Y,Swap_CNO2Y,Swap_CNO30Y,Swap_CNO5Y,Swap_CNY10Y,Swap_CNY2Y,Swap_CNY30Y,Swap_CNY5Y,Swap_EUR10Y,Swap_EUR2Y,Swap_EUR30Y,Swap_EUR5Y,Swap_GBP10Y,Swap_GBP2Y,Swap_GBP30Y,Swap_GBP5Y,Swap_HKD10Y,Swap_HKD2Y,Swap_HKD30Y,Swap_HKD5Y,Swap_IDR10Y,Swap_IDR2Y,Swap_IDR30Y,Swap_IDR5Y,Swap_JPY10Y,Swap_JPY2Y,Swap_JPY30Y,Swap_JPY5Y,Swap_NOK10Y,Swap_NOK2Y,Swap_NOK30Y,Swap_NOK5Y,Swap_SGD10Y,Swap_SGD2Y,Swap_SGD30Y,Swap_SGD5Y,Swap_TRY10Y,Swap_TRY2Y,Swap_TRY5Y,Swap_USD10Y,Swap_USD2Y,Swap_USD30Y,Swap_USD5Y,Swap_ZAR10Y,Swap_ZAR2Y,Swap_ZAR30Y,Swap_ZAR5Y
count,877.0,875.0,876.0,876.0,875.0,876.0,876.0,875.0,876.0,876.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,877.0,876.0,876.0,876.0,877.0,876.0,877.0,877.0,877.0,875.0,877.0,877.0,877.0,877.0,848.0,877.0,877.0,877.0,848.0,877.0,877.0,877.0,848.0,877.0,877.0,876.0,877.0,877.0,876.0,876.0,876.0,876.0,877.0,877.0,876.0,877.0,877.0,877.0,877.0,877.0,877.0,875.0,877.0,877.0,877.0,876.0,877.0,877.0,876.0,876.0,876.0,876.0,877.0,877.0,877.0,877.0,876.0,877.0,877.0,877.0,877.0,877.0,877.0
mean,20162780.0,3310.48748,11355.682289,3331.244218,20.305856,6873.943179,24726.4875,19148.118977,19709.190297,2256.015862,14.840485,0.075868,0.757052,0.302329,0.7719,1.025955,0.153128,0.153606,1.127517,1.391046,0.128601,7.5e-05,0.008848,0.122075,0.729053,0.31765,0.075063,33.804797,1.982505,1.06166,-0.743487,2.952843,2.93402,-0.21763,0.492492,0.696107,6.820818,0.01337,1.074551,1.204376,10.906146,0.91557,6.911882,14.731049,23.861161,17.2271,2.692911,1.987294,3.110268,2.284876,11.953918,11.489892,11.785195,1.930044,1.198501,2.353916,1.467808,-0.025181,-0.758534,0.567229,-0.496103,3.549205,3.032505,3.841399,3.284564,3.557818,3.037124,3.847369,3.291735,3.431292,2.973169,3.739949,3.199941,0.667481,-0.186979,1.280212,0.093212,1.375099,0.690448,1.641403,1.015341,2.031283,1.22418,2.353585,1.688991,8.927836,8.344221,9.525786,8.683634,0.249206,0.00216,0.860108,0.060174,1.766093,1.003113,2.034099,1.311117,2.465752,1.449929,2.830213,1.98497,11.074696,11.873001,11.49666,2.137045,1.320768,2.46845,1.755107,8.180788,7.216463,8.185374,7.683478
std,9840.161,413.973635,1136.826936,255.117381,5.979965,515.046544,3406.147361,2008.34828,2709.444089,245.959213,4.483849,0.022469,0.026942,0.027013,0.02812,0.028106,0.005431,0.006053,0.05002,0.109302,0.00049,2e-06,0.000494,0.004979,0.01809,0.046718,0.006563,43.144767,0.255217,0.287314,0.080378,0.520162,0.53943,0.137594,0.120332,0.301233,0.922109,0.056469,0.190191,0.258097,1.426325,0.561695,0.474417,3.790999,8.042445,3.858594,0.297298,0.161837,0.313235,0.225607,1.947546,2.670257,2.09401,0.379176,0.474463,0.321232,0.477383,0.246417,0.129639,0.258789,0.210866,0.524074,0.547862,0.585479,0.53127,0.540401,0.553749,0.605678,0.541006,0.576992,0.594971,0.632832,0.59001,0.241029,0.125608,0.276064,0.19479,0.349254,0.22237,0.354662,0.336306,0.344827,0.397272,0.353851,0.379872,0.743028,0.432856,0.742885,0.605465,0.174278,0.091894,0.34516,0.117368,0.275849,0.175876,0.270041,0.259457,0.283805,0.253222,0.285553,0.272496,0.924916,1.715101,1.314005,0.389535,0.54397,0.318286,0.45561,0.438166,0.426848,0.396605,0.46926
min,20150100.0,2655.661,8752.87,2680.35,10.6783,5536.97,18319.58,14952.02,15660.18,1829.08,9.14,0.043478,0.6858,0.241255,0.68646,0.970968,0.14374,0.143657,1.0388,1.2044,0.127389,6.8e-05,0.00796,0.111581,0.689489,0.232794,0.059245,14.85,1.688565,0.714657,-0.964,2.125232,2.125416,-0.331896,0.2765,0.370238,5.140509,-0.076,0.75,0.575,7.335673,0.250925,6.1,9.5,15.0,11.5,1.881958,1.592485,2.220374,1.650611,9.474949,7.184092,8.889641,1.189374,0.583393,1.535012,0.759421,-0.68476,-1.180953,-0.105589,-1.002629,2.735,2.21,2.935,2.455,2.735,2.21,2.935,2.455,2.495,2.06,2.710542,2.27,0.102019,-0.390065,0.645678,-0.335051,0.551349,0.239086,0.787508,0.305858,1.31,0.71,1.641529,1.03,-13.048889,-4.424857,-12.448889,-9.220862,-0.127489,-0.226529,0.053681,-0.241656,1.067163,0.643951,1.327615,0.71309,1.885492,0.831519,2.166265,1.418448,8.735739,8.499006,8.594088,1.252937,0.669068,1.665022,0.933159,6.93,6.06,7.29,6.37
25%,20151100.0,3085.148,10377.2675,3069.8825,15.28635,6401.5375,22137.215,17348.85,17731.5675,2076.5475,11.625,0.058997,0.7393,0.287051,0.752672,1.006441,0.149432,0.14941,1.0907,1.29675,0.128162,7.4e-05,0.008394,0.118018,0.714082,0.275756,0.070482,20.8,1.754142,0.889615,-0.764,2.42,2.399989,-0.328916,0.37594,0.395148,6.65712,-0.03057,0.94,1.01,10.171043,0.344131,6.317,12.0,17.65,14.2,2.582161,1.8865,3.014314,2.202451,10.21696,8.949007,9.921952,1.705337,0.874734,2.205445,1.110166,-0.129575,-0.83285,0.437192,-0.67795,3.05,2.5,3.325528,2.775,3.040225,2.495,3.310137,2.77,2.935,2.395,3.162121,2.640659,0.519186,-0.268855,1.08753,-0.038827,1.163678,0.49115,1.43687,0.767719,1.8,0.91,2.126322,1.44,8.95,8.35,9.55,8.7,0.145152,-0.009253,0.660009,0.0288,1.608502,0.891767,1.92551,1.124531,2.233207,1.263777,2.61142,1.765711,10.432889,10.737509,10.556425,1.921299,0.893981,2.329321,1.383992,7.86,6.89,7.865,7.32
50%,20160910.0,3222.167,11448.48,3407.065,19.918,6936.395,24113.73,19344.15,18330.89,2162.43,13.77,0.066094,0.75985,0.307342,0.77009,1.025247,0.152718,0.152917,1.1196,1.39185,0.128887,7.5e-05,0.00887,0.121448,0.732681,0.329707,0.074733,23.0,1.952123,0.93276,-0.7358,3.07297,3.05,-0.300924,0.56,0.594295,6.89692,-0.01093,1.07,1.167792,11.34,0.843249,7.1,12.9,19.65,15.0,2.756156,2.004276,3.160294,2.342816,11.738689,12.130004,11.69585,1.907906,1.010741,2.442966,1.327658,0.023635,-0.769582,0.627546,-0.474264,3.410307,3.165,3.57,3.239983,3.410302,3.169797,3.560133,3.239509,3.23,3.03,3.512576,3.03,0.698583,-0.232796,1.389596,0.11622,1.330328,0.732546,1.581378,0.999849,2.025,1.1175,2.325214,1.62,8.95,8.35,9.55,8.7,0.196813,0.016129,0.737895,0.069581,1.794198,1.008213,2.064948,1.337324,2.436889,1.402201,2.813255,1.991618,11.131677,11.820483,11.555321,2.196584,1.047324,2.530799,1.731862,8.135,7.15,8.085,7.67
75%,20170710.0,3381.2465,12299.285,3553.5125,23.93445,7327.5225,27525.5425,20261.175,21487.745,2438.4675,16.64,0.104482,0.77555,0.317531,0.792801,1.040799,0.157803,0.157803,1.1606,1.4961,0.128966,7.6e-05,0.009154,0.126018,0.741977,0.346512,0.080488,26.0,2.170082,1.282437,-0.7292,3.44,3.44,-0.068986,0.58313,0.925515,7.16673,0.080932,1.17,1.353324,11.911,1.302139,7.342,20.0,35.0,22.0,2.856709,2.100036,3.280426,2.433674,12.680171,13.322674,12.654833,2.20479,1.498809,2.569204,1.828203,0.128387,-0.658001,0.736323,-0.353748,4.1,3.58,4.412087,3.785,4.1,3.58,4.425,3.835718,4.0,3.57,4.360206,3.775,0.848908,-0.124955,1.48925,0.226731,1.627334,0.879813,1.931239,1.309584,2.235,1.48,2.546965,1.905,8.95,8.35,9.55,8.7,0.411556,0.068158,1.243102,0.132445,1.912485,1.099766,2.172147,1.438079,2.679414,1.595179,3.061667,2.177027,11.485299,12.38486,11.927217,2.352726,1.597077,2.658366,1.997856,8.43,7.54,8.4625,7.9325
max,20180510.0,5166.35,13559.6,3828.78,40.803,7778.64,33154.12,24124.15,26616.71,2872.87,40.74,0.116959,0.8237,0.389226,0.86103,1.169967,0.161512,0.16372,1.2509,1.58805,0.129034,8.1e-05,0.010008,0.13712,0.766019,0.438597,0.088519,259.0,2.751131,1.761234,-0.063,4.149995,4.150653,0.077487,0.79022,1.76985,8.85667,0.11214,1.52,1.88,15.154165,2.374899,7.375,20.07113,38.0,23.082059,3.395894,2.478127,3.877377,2.771396,16.698401,16.730151,16.659714,2.752419,2.328255,2.884327,2.610601,0.435067,-0.263049,1.082563,-0.046046,4.56,3.915,4.96,4.17,4.61829,3.915,5.016856,4.17,4.46,3.895,4.87,4.14,1.231249,0.096493,1.777802,0.507139,2.157094,1.118024,2.41691,1.692267,2.9475,2.435,3.319784,2.795,9.025143,8.562539,9.568177,8.799996,0.677186,0.131985,1.521979,0.295395,2.348926,1.424674,2.621116,1.897248,3.131805,2.14744,3.426263,2.671293,14.415906,17.894594,16.105111,3.055699,2.784215,3.088962,2.958393,9.74,8.315,9.765,9.24


## 4.4. BONDS DATA

Check that bonds after maturity are not tradeable. If this is correct, the easiest solution would be to manually correct these to 0 in model predictions

In [52]:
bond.describe(include="all")

Unnamed: 0,IsinIdx,TickerIdx,ActualMaturityDateKey,IssueDateKey,Seniority,Currency,ActivityGroup,Region,Activity,RiskCaptain,Owner,CompositeRating,IndustrySector,IndustrySubgroup,MarketIssue,IssuedAmount,CouponType
count,27411.0,27411.0,27411.0,27411.0,27411,27411,27411,27411,27411,27411,27411,27411,27406,27406,27394,27411.0,27411
unique,,,,,9,23,3,8,16,38,105,29,13,338,14,,6
top,,,,,SEN,USD,FLOW G10,AMERICAS,HG CASH,HG CASH NONFIN,US NONFIN 5Y AND IN CASH,NR,Financial,Commer Banks Non-US,Euro mtn,,FIXED
freq,,,,,21233,16358,16552,10732,7928,5608,1714,4874,10046,3715,7374,,24105
mean,13705.0,1748.038598,20239120.0,20131630.0,,,,,,,,,,,,2044049000.0,
std,7913.018451,1028.719783,85155.1,41651.86,,,,,,,,,,,,52740430000.0,
min,0.0,0.0,20150430.0,19550100.0,,,,,,,,,,,,0.0,
25%,6852.5,855.0,20190420.0,20120130.0,,,,,,,,,,,,500000000.0,
50%,13705.0,1753.0,20220130.0,20140620.0,,,,,,,,,,,,750000000.0,
75%,20557.5,2647.0,20250910.0,20160710.0,,,,,,,,,,,,1200000000.0,


In [54]:
bond_dummies = pd.concat([bond.IsinIdx, 
                          pd.get_dummies(bond.ActivityGroup), 
                          pd.get_dummies(bond.CompositeRating)], axis=1)

In [55]:
train = train.merge(bond_dummies, on = "IsinIdx", how='left')
test = test.merge(bond_dummies, on = "IsinIdx", how='left')

## 4.5. MARKET DATA

# 5. DATA EXPORT

In [17]:
# export CSV
train.to_csv("../data/prepared/train.csv", index = False, float_format = "%.8f")
test.to_csv("../data/prepared/test.csv",   index = False, float_format = "%.8f")