# 1. SETTINGS

In [1]:
# libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display
import scipy.stats

In [2]:
# pandas options
pd.set_option("display.max_columns", None)

In [3]:
# ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [4]:
# garbage collection
import gc
gc.enable()

# 2. FUNCTIONS

In [5]:
### FUNCTION FOR COUNTING MISSINGS
def count_missings(data):
    total = data.isnull().sum().sort_values(ascending = False)
    percent = (data.isnull().sum() / data.isnull().count() * 100).sort_values(ascending = False)
    table = pd.concat([total, percent], axis = 1, keys = ["Total", "Percent"])
    table = table[table["Total"] > 0]
    return table

# 3. DATA IMPORT

In [6]:
# import datasets
test  = pd.read_csv("../data/raw/Challenge_20180423.csv")
cust  = pd.read_csv("../data/raw/Customer.csv")
bond  = pd.read_csv("../data/raw/Isin.csv")
markt = pd.read_csv("../data/raw/Market.csv")
macro = pd.read_csv("../data/raw/MarketData_Macro.csv")
trade = pd.read_csv("../data/raw/Trade.csv")

In [11]:
# check all datasets
display(test.head(3))
print("Test data:", test.shape)
print("------------------------------")
display(cust.head(3))
print("Customer data:", cust.shape)
print("------------------------------")
display(bond.head(3))
print("Bonds data:", bond.shape)
print("------------------------------")
display(markt.head(3))
print("Market data:", markt.shape)
print("------------------------------")
display(macro.head(3))
print("Macroeconomic data:", macro.shape)
print("------------------------------")
display(trade.head(3))
print("Trade data:", trade.shape)

Unnamed: 0,PredictionIdx,DateKey,CustomerIdx,IsinIdx,BuySell,CustomerInterest
0,a1e0d80784,20180423,1856,13323,Buy,
1,c2cc6cc2a8,20180423,1856,9230,Buy,
2,a8e94f6344,20180423,1780,9157,Buy,


Test data: (484758, 6)
------------------------------


Unnamed: 0,CustomerIdx,Sector,Subsector,Region,Country
0,2975,Banks and Intermediaries,Bank,Americas,BARBADOS
1,1594,Asset Managers & Hedge Funds,,Americas,BERMUDA
2,399,Corporation,Corp - Comm. & Prof. Services,Americas,BERMUDA


Customer data: (3471, 5)
------------------------------


Unnamed: 0,IsinIdx,TickerIdx,ActualMaturityDateKey,IssueDateKey,Seniority,Currency,ActivityGroup,Region,Activity,RiskCaptain,Owner,CompositeRating,IndustrySector,IndustrySubgroup,MarketIssue,IssuedAmount,CouponType
0,0,238,20381231,20051129,GOV,USD,FLOW LOCAL MARKET,AMERICAS,ARGENTINA,ARGENTINA,EMK ARGENTINA,NR,Government,Sovereign,Domestic,1246002000.0,STEP CPN
1,1,238,20331231,20051129,GOV,USD,FLOW LOCAL MARKET,AMERICAS,ARGENTINA,ARGENTINA,EMK ARGENTINA,NR,Government,Sovereign,Domestic,4901086000.0,FIXED
2,2,238,20331231,20051129,GOV,ARS,FLOW LOCAL MARKET,AMERICAS,ARGENTINA,ARGENTINA,EMK ARGENTINA,NR,Government,Sovereign,Domestic,15012450000.0,FIXED


Bonds data: (27411, 17)
------------------------------


Unnamed: 0,IsinIdx,DateKey,Price,Yield,ZSpread
0,1,20160101,104.25,7.835,5.505
1,7,20160101,107.5,7.52,5.541
2,102,20160101,100.746,4.048,2.085


Market data: (9867747, 5)
------------------------------


Unnamed: 0,DateKey,SSE,DAX,EUROSTOXX,VSTOXX,FTSE100,HSI,NIKKEI,DOWJONES_INDU,SP500,VIX,FX_USD.ARS,FX_USD.AUD,FX_USD.BRL,FX_USD.CAD,FX_USD.CHF,FX_USD.CNO,FX_USD.CNY,FX_USD.EUR,FX_USD.GBP,FX_USD.HKD,FX_USD.IDR,FX_USD.JPY,FX_USD.NOK,FX_USD.SGD,FX_USD.TRY,FX_USD.ZAR,MoneyMarket_ARS3M,MoneyMarket_AUD3M,MoneyMarket_CAD3M,MoneyMarket_CHF3M,MoneyMarket_CNO3M,MoneyMarket_CNY3M,MoneyMarket_EUR3M,MoneyMarket_GBP3M,MoneyMarket_HKD3M,MoneyMarket_IDR3M,MoneyMarket_JPY3M,MoneyMarket_NOK3M,MoneyMarket_SGD3M,MoneyMarket_TRY3M,MoneyMarket_USD3M,MoneyMarket_ZAR3M,Swap_ARS10Y,Swap_ARS2Y,Swap_ARS5Y,Swap_AUD10Y,Swap_AUD2Y,Swap_AUD30Y,Swap_AUD5Y,Swap_BRL10Y,Swap_BRL2Y,Swap_BRL5Y,Swap_CAD10Y,Swap_CAD2Y,Swap_CAD30Y,Swap_CAD5Y,Swap_CHF10Y,Swap_CHF2Y,Swap_CHF30Y,Swap_CHF5Y,Swap_CNH10Y,Swap_CNH2Y,Swap_CNH30Y,Swap_CNH5Y,Swap_CNO10Y,Swap_CNO2Y,Swap_CNO30Y,Swap_CNO5Y,Swap_CNY10Y,Swap_CNY2Y,Swap_CNY30Y,Swap_CNY5Y,Swap_EUR10Y,Swap_EUR2Y,Swap_EUR30Y,Swap_EUR5Y,Swap_GBP10Y,Swap_GBP2Y,Swap_GBP30Y,Swap_GBP5Y,Swap_HKD10Y,Swap_HKD2Y,Swap_HKD30Y,Swap_HKD5Y,Swap_IDR10Y,Swap_IDR2Y,Swap_IDR30Y,Swap_IDR5Y,Swap_JPY10Y,Swap_JPY2Y,Swap_JPY30Y,Swap_JPY5Y,Swap_NOK10Y,Swap_NOK2Y,Swap_NOK30Y,Swap_NOK5Y,Swap_SGD10Y,Swap_SGD2Y,Swap_SGD30Y,Swap_SGD5Y,Swap_TRY10Y,Swap_TRY2Y,Swap_TRY5Y,Swap_USD10Y,Swap_USD2Y,Swap_USD30Y,Swap_USD5Y,Swap_ZAR10Y,Swap_ZAR2Y,Swap_ZAR30Y,Swap_ZAR5Y
0,20150101,,,,,,,,,,19.2,0.116959,0.8162,0.377658,0.86103,1.006264,0.161512,0.163399,1.2101,1.55775,0.128941,8.1e-05,0.008341,0.133451,0.756287,0.428964,0.086457,35.230452,2.72454,1.303309,-0.063143,3.701588,3.617925,0.077487,0.561227,0.381829,7.17183,0.112078,1.459648,0.709986,10.001685,0.25657,6.130798,19.989643,29.980614,21.985733,3.04115,2.39895,3.625076,2.558248,,,,2.277279,,2.760043,1.775859,0.409878,,1.082563,-0.046046,3.749868,3.418686,,3.549519,3.750081,3.419989,,3.550031,3.439794,3.218679,,3.239508,0.697389,,1.408397,0.233572,,,,,2.252274,0.95467,,1.814259,8.955845,8.368177,9.552123,8.708471,0.444559,,1.233032,0.180984,1.806932,,2.179804,1.295793,,,,,9.885583,10.069276,9.91512,2.295037,,2.712112,1.774452,7.963276,6.856526,8.331874,7.403867
1,20150102,,9764.73,3139.44,26.2531,6547.8,23857.82,,17832.99,2058.2,17.79,0.116891,0.8114,0.371292,0.851136,0.998851,0.161186,0.163292,1.2008,1.5334,0.128934,8e-05,0.008308,0.131841,0.752757,0.427241,0.085609,33.0,2.736281,1.30285,-0.063,3.701588,3.617925,0.077179,0.56338,0.385,7.1,0.112078,1.49,0.79,10.02,0.255218,6.125,20.0,30.0,22.0,3.108394,2.452905,3.692179,2.625847,11.76786,12.807338,12.202677,2.231223,1.445105,2.716551,1.748069,0.367391,,1.040147,-0.058452,3.750081,3.419989,,3.550031,3.750081,3.419989,,3.550031,3.439794,3.218679,,3.239508,0.640191,0.06212,1.347191,0.211268,1.67489,0.810358,2.109754,1.305976,2.29,0.975,2.75196,1.84,8.95,8.35,9.55,8.7,0.444559,,1.233032,0.180984,1.794443,1.10022,2.168744,1.280511,2.447394,1.132505,2.9519,1.948474,9.94626,10.151881,9.976101,2.23691,0.893242,2.647499,1.746344,7.94,6.84,8.31,7.39
2,20150105,3350.519,9473.16,3023.14,29.6236,6417.16,23721.32,17408.71,17501.65,2020.58,19.92,0.116782,0.80905,0.366757,0.850268,0.993764,0.160822,0.163265,1.1939,1.52555,0.128926,7.9e-05,0.008366,0.131277,0.74926,0.429304,0.085426,32.0,2.742523,1.303171,-0.063,3.749732,3.691439,0.075252,0.56338,0.38357,7.1,0.11214,1.45,0.875,9.9934,0.254194,6.125,20.0,30.0,22.0,3.007227,2.423082,3.591836,2.566016,11.809585,12.736379,12.154331,2.180266,1.420899,2.668708,1.713829,0.350011,-0.263049,0.981356,-0.069792,3.70001,3.40507,,3.500016,3.70001,3.40507,,3.500016,3.45001,3.23007,,3.250016,0.658914,0.063256,1.351414,0.220293,1.613752,0.799833,2.043961,1.251222,2.19,0.94,2.6528,1.75,8.95,8.35,9.55,8.7,0.436972,0.112315,1.225011,0.178599,1.747772,1.063624,2.122179,1.238903,2.417213,1.152474,2.92079,1.923255,9.564968,9.720274,9.575104,2.148063,0.891052,2.549802,1.695687,7.84,6.78,8.21,7.3


Macroeconomic data: (877, 112)
------------------------------


Unnamed: 0,TradeDateKey,CustomerIdx,IsinIdx,BuySell,NotionalEUR,Price,TradeStatus,CustomerInterest
0,2016-12-07,2789,8478,Sell,653168.0,0.0,Unknown,1
1,2017-03-29,2574,14562,Buy,1656487.0,0.0,Unknown,1
2,2017-04-18,2574,4747,Buy,939673.0,0.0,Unknown,1


Trade data: (6762021, 8)


# 4. PREPROCESSING

## 4.1. TRADE DATA

In [8]:
# check missings
count_missings(trade)

Unnamed: 0,Total,Percent
Price,4617933,68.292201


In [9]:
# create target variable
trade["CustomerInterest"] = 1
trade["CustomerInterest"][trade["TradeStatus"] == "Holding"] = 0

In [10]:
# convert dates
trade["TradeDateKey"] = pd.to_datetime(trade["TradeDateKey"], format = '%Y%m%d')

In [None]:
### ADDING MISSING COMBINATIONS

# extract all unique values
#l1 = list(trade.TradeDateKey.unique())
#l2 = list(trade.CustomerIdx.unique())
#l3 = list(trade.IsinIdx.unique())
#l4 = list(trade.BuySell.unique())

# create lists with combinations
#lp1, lp2, lp3, lp4 = pd.core.reshape.util.cartesian_product([l1, l2, l3, l4])

# convert to dataframe
#combs = pd.DataFrame(dict(TradeDateKey = lp1, CustomerIdx = lp2, IsinIdx = lp3, BuySell = lp4))

# merge missing combinations
#print(trade.shape)
#trade = trade.merge(combs, how = "right")
#print(trade.shape)

## 4.2. CUSTOMER DATA

## 4.3. MACROECONOMIC DATA

## 4.4. BONDS DATA

## 4.5. MARKET DATA

# 5. DATA EXPORT

In [52]:
# export CSV
train.to_csv("../data/prepared/train.csv", index = False, float_format = "%.8f")
test.to_csv("../data/prepared/test.csv",   index = False, float_format = "%.8f")