## Module Installation & Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')
!pip install catboost -q

## Load Module and data

In [2]:
import gc
import time
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import imblearn.over_sampling as ovs

from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import StratifiedKFold
from sklearn.decomposition import PCA, TruncatedSVD, LatentDirichletAllocation
from sklearn.preprocessing import StandardScaler


from sklearn.ensemble import *
from sklearn.cluster import KMeans
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neural_network import MLPClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier

pd.set_option('display.max_columns', None)
import warnings
warnings.filterwarnings('ignore')

path = "/content/drive/MyDrive/banking-transaction-categoriser/"
seed = 21 
le = LabelEncoder()

In [3]:
Train = pd.read_csv(path+'Train.csv')
Test = pd.read_csv(path+'Test.csv')
Sub = pd.read_csv(path+'SampleSubmission.csv')

display(Train.head(5), Test.head(5), Sub.head(5))
Train.shape, Test.shape, Sub.shape

Unnamed: 0,DATE,TRANSACTION DETAILS,Account_NO,CHQ.NO.,WITHDRAWAL AMT,DEPOSIT AMT,BALANCE AMT,Category
0,2014-02-28,NEFT/FDRL401249529/INDIAFORENSIC,84903292,,0.0,15000000.0,15000000.0,Money-Transfer
1,2014-02-28,NEFT/FDRL401249532/INDIAFORENSIC,84903292,,0.0,15000000.0,60000000.0,Money-Transfer
2,2014-02-28,NEFT/FDRL401249534/INDIAFORENSIC,84903292,,0.0,15000000.0,90000000.0,Money-Transfer
3,2014-02-28,NEFT/FDRL401249590/INDIAFORENSIC,84903292,,0.0,10000000.0,100000000.0,Money-Transfer
4,2014-02-28,NEFT/FDRL401249531/INDIAFORENSIC,84903292,,0.0,15000000.0,45000000.0,Money-Transfer


Unnamed: 0,DATE,TRANSACTION DETAILS,Account_NO,CHQ.NO.,WITHDRAWAL AMT,DEPOSIT AMT,BALANCE AMT,ID
0,2014-02-28,NEFT/FDRL401249530/INDIAFORENSIC,84903292,,0.0,15000000.0,30000000.0,2014-02-28 00:00:00X3000000000
1,2014-02-28,NEFT/FDRL401249533/INDIAFORENSIC,84903292,,0.0,15000000.0,75000000.0,2014-02-28 00:00:00X7500000000
2,2014-03-08,RTGSCHARGESANDSTAX/RAT,84903292,,28.63,0.0,45000028.63,2014-03-08 00:00:00X4500002863
3,2014-03-08,RTGS/YESBH16068986176/INDFOR,84903292,,15000000.0,0.0,60000028.63,2014-03-08 00:00:00X6000002863
4,2014-03-08,RTGSCHARGESANDSTAX/RAT,84903292,,28.63,0.0,60000057.26,2014-03-08 00:00:00X6000005726


Unnamed: 0,ID,Bank Charges,Bill-Payments,Cash-Pickup,Cell Phone and Airtime,Cheque-Payment,Deposit,Donations,General Purchases,Insurance,Interest,Internet and IT Services,Loan Repayment,Merchant-Payment,Money-Transfer,Professional services,Reversal,Salary and wages,Savings and Investments,Shopping,"Transport, Travel, and Logistics"
0,2014-02-28 00:00:00X3000000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2014-02-28 00:00:00X7500000000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2014-03-08 00:00:00X4500002863,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,2014-03-08 00:00:00X6000002863,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,2014-03-08 00:00:00X6000005726,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


((57974, 8), (22625, 8), (22625, 21))

## EDA & Feature Engineering

In [4]:
# null values
print('Missing values for Train:', Train.isna().sum())
print('')
print('Missing values for Test:', Test.isna().sum())

Missing values for Train: DATE                       0
TRANSACTION DETAILS     1651
Account_NO                 0
CHQ.NO.                57586
WITHDRAWAL AMT             0
DEPOSIT AMT                0
BALANCE AMT                0
Category                1771
dtype: int64

Missing values for Test: DATE                       0
TRANSACTION DETAILS        0
Account_NO                 0
CHQ.NO.                22504
WITHDRAWAL AMT             0
DEPOSIT AMT                0
BALANCE AMT                0
ID                         0
dtype: int64


In [5]:
# missing values

Train['CHQ.NO.'].fillna(-1, inplace=True)
Test['CHQ.NO.'].fillna(-1, inplace=True)
Train['TRANSACTION DETAILS'].fillna("", inplace=True)
Train = Train[Train.Category.notnull()].reset_index(drop=True)

Train.shape, Test.shape

((56203, 8), (22625, 8))

In [6]:
ID = Test.ID
all_data = pd.concat([
        Train.assign(train=1),
        Test.drop(columns='ID').assign(train=0)
])

display(all_data.head(2), all_data.tail(2), all_data.shape)

Unnamed: 0,DATE,TRANSACTION DETAILS,Account_NO,CHQ.NO.,WITHDRAWAL AMT,DEPOSIT AMT,BALANCE AMT,Category,train
0,2014-02-28,NEFT/FDRL401249529/INDIAFORENSIC,84903292,-1.0,0.0,15000000.0,15000000.0,Money-Transfer,1
1,2014-02-28,NEFT/FDRL401249532/INDIAFORENSIC,84903292,-1.0,0.0,15000000.0,60000000.0,Money-Transfer,1


Unnamed: 0,DATE,TRANSACTION DETAILS,Account_NO,CHQ.NO.,WITHDRAWAL AMT,DEPOSIT AMT,BALANCE AMT,Category,train
22623,2021-04-16,INDIAFORENSICRUPSETT050319,4969373,-1.0,6822.69,0.0,24431291.37,,0
22624,2021-04-16,INDIAFORENSICRUPSETT050319,4969373,-1.0,0.0,84.38,64122.17,,0


(78828, 9)

In [7]:
## date features

all_data['DATE'] = pd.to_datetime(all_data.DATE)

all_data['month'] = all_data.DATE.dt.month
all_data['day'] = all_data.DATE.dt.day
all_data['year'] = all_data.DATE.dt.year
all_data['dayofweek'] = all_data.DATE.dt.dayofweek
all_data['quarter'] = all_data.DATE.dt.quarter
all_data['weekofyear'] = all_data.DATE.dt.weekofyear

all_data.drop(columns='DATE', inplace=True)

all_data.head()

Unnamed: 0,TRANSACTION DETAILS,Account_NO,CHQ.NO.,WITHDRAWAL AMT,DEPOSIT AMT,BALANCE AMT,Category,train,month,day,year,dayofweek,quarter,weekofyear
0,NEFT/FDRL401249529/INDIAFORENSIC,84903292,-1.0,0.0,15000000.0,15000000.0,Money-Transfer,1,2,28,2014,4,1,9
1,NEFT/FDRL401249532/INDIAFORENSIC,84903292,-1.0,0.0,15000000.0,60000000.0,Money-Transfer,1,2,28,2014,4,1,9
2,NEFT/FDRL401249534/INDIAFORENSIC,84903292,-1.0,0.0,15000000.0,90000000.0,Money-Transfer,1,2,28,2014,4,1,9
3,NEFT/FDRL401249590/INDIAFORENSIC,84903292,-1.0,0.0,10000000.0,100000000.0,Money-Transfer,1,2,28,2014,4,1,9
4,NEFT/FDRL401249531/INDIAFORENSIC,84903292,-1.0,0.0,15000000.0,45000000.0,Money-Transfer,1,2,28,2014,4,1,9


In [8]:
# type of the transaction
all_data['is_deposit'] = (all_data['DEPOSIT AMT']>0).astype(int)
all_data['is_withdraw'] = (all_data['DEPOSIT AMT']>0).astype(int)
all_data['deposit_balance'] = all_data['DEPOSIT AMT'] / all_data['BALANCE AMT']
all_data['withdraw_balance'] = all_data['WITHDRAWAL AMT'] / all_data['BALANCE AMT']
all_data['transaction_amount'] = all_data['WITHDRAWAL AMT'] + all_data['BALANCE AMT']
all_data['balance_before_transaction'] = all_data['BALANCE AMT'] + all_data['WITHDRAWAL AMT'] - all_data['DEPOSIT AMT']

all_data.tail(2)

Unnamed: 0,TRANSACTION DETAILS,Account_NO,CHQ.NO.,WITHDRAWAL AMT,DEPOSIT AMT,BALANCE AMT,Category,train,month,day,year,dayofweek,quarter,weekofyear,is_deposit,is_withdraw,deposit_balance,withdraw_balance,transaction_amount,balance_before_transaction
22623,INDIAFORENSICRUPSETT050319,4969373,-1.0,6822.69,0.0,24431291.37,,0,4,16,2021,4,2,15,0,0,0.0,0.000279,24438114.06,24438114.06
22624,INDIAFORENSICRUPSETT050319,4969373,-1.0,0.0,84.38,64122.17,,0,4,16,2021,4,2,15,1,1,0.001316,0.0,64122.17,64037.79


In [9]:
# aggregated features form Account number

all_data['gb_feature_account_blance'] = all_data.groupby(['Account_NO'])['BALANCE AMT'].transform('mean')
all_data['gb_feature_account_balnce_diff'] = all_data['BALANCE AMT'] - all_data['gb_feature_account_blance']

all_data['gb_feature_account_WITHDRAWAL'] = all_data.groupby(['Account_NO'])['WITHDRAWAL AMT'].transform('mean')
all_data['gb_feature_account_WITHDRAWAL_diff'] = all_data['WITHDRAWAL AMT'] - all_data['gb_feature_account_WITHDRAWAL']

all_data['gb_feature_account_DEPOSIT'] = all_data.groupby(['Account_NO'])['DEPOSIT AMT'].transform('mean')
all_data['gb_feature_account_DEPOSI_diff'] = all_data['DEPOSIT AMT'] - all_data['gb_feature_account_DEPOSIT']


In [10]:
all_data.Account_NO.value_counts()

20179201    32626
56986993    14643
84903292    12196
80157410     8399
4969373      5374
28039243     3324
10908140      774
68304903      767
83234289      701
22937010       24
Name: Account_NO, dtype: int64

In [11]:
# since this is a categorical let's encode it

all_data['Account_NO'] = le.fit_transform(all_data.Account_NO)

In [12]:
all_data['TRANSACTION DETAILS'].value_counts()[:20]

INTERNALFUNDTRANSFERIN          3262
CASHDEP/GURGAON/                1837
TRFFROMINDIAFORENSICSERVICES    1746
                                1651
CASHDEP/KAROLBAGH/              1649
CASHDEP/RAJOURI/                1538
CASHDEP/NOIDA/TP                1454
INDIAFORENSICAEPSNPCIWDLSET      974
SWEEPTRFTO:40900036427           885
CASHDEP/NEW-DELHI/               723
CASHDEP/FARIDABAD/TP             702
INDIAFORENSICAEPSNPCIDEPSET      638
CASHDEP/SILVASSA/                625
TRFTOINDIAFORENSICSERVICESIN     598
CASHDEP/VIKASMARG/TP             593
CASHDEP/BHI-RIICO/               545
CASHDEP/PANIPAT/0146             534
CASHDEP/GURGAON/LCT              516
CASHDEP/PITAMPURA/TP             488
CASHDEP/KALYAN-W/                441
Name: TRANSACTION DETAILS, dtype: int64

In [13]:
all_data['TRANSACTION DETAILS WORD'] = all_data['TRANSACTION DETAILS'].str\
    .replace('/', " ").replace("-", " ")
all_data.sample(5)

Unnamed: 0,TRANSACTION DETAILS,Account_NO,CHQ.NO.,WITHDRAWAL AMT,DEPOSIT AMT,BALANCE AMT,Category,train,month,day,year,dayofweek,quarter,weekofyear,is_deposit,is_withdraw,deposit_balance,withdraw_balance,transaction_amount,balance_before_transaction,gb_feature_account_blance,gb_feature_account_balnce_diff,gb_feature_account_WITHDRAWAL,gb_feature_account_WITHDRAWAL_diff,gb_feature_account_DEPOSIT,gb_feature_account_DEPOSI_diff,TRANSACTION DETAILS WORD
17029,CR800312836531/RMCPL17352/0,9,-1.0,10000.0,0.0,6038861000.0,Money-Transfer,1,1,9,2016,5,1,1,0,0,0.0,2e-06,6038871000.0,6038871000.0,7242483000.0,-1203622000.0,736223.5,-726223.5,1328952.0,-1328952.0,CR800312836531 RMCPL17352 0
51477,NEFT/N205180592468135/PAY,7,-1.0,0.0,122755.6,42960230000.0,Money-Transfer,1,1,11,2020,5,1,2,1,1,3e-06,0.0,42960230000.0,42960100000.0,21968840000.0,20991380000.0,4052414.0,-4052414.0,4904431.0,-4781675.0,NEFT N205180592468135 PAY
40989,NEFT/N310170404566972/PAY,2,-1.0,0.0,6568326.76,49549120000.0,Money-Transfer,1,8,2,2017,2,3,31,1,1,0.000133,0.0,49549120000.0,49542550000.0,24753260000.0,24795860000.0,781090.8,-781090.8,1754307.0,4814020.0,NEFT N310170404566972 PAY
9893,SWEEPTRFTO:40900036427,5,-1.0,6154867.0,0.0,74507490000.0,,0,5,22,2016,6,2,20,0,0,0.0,8.3e-05,74513650000.0,74513650000.0,47098210000.0,27409280000.0,6558444.0,-403576.9,3560254.0,-3560254.0,SWEEPTRFTO:40900036427
21023,CASHDEP/NOIDA/TP0,2,-1.0,0.0,1193000.0,16704600000.0,Deposit,1,3,23,2016,2,1,12,1,1,7.1e-05,0.0,16704600000.0,16703410000.0,24753260000.0,-8048656000.0,781090.8,-781090.8,1754307.0,-561307.0,CASHDEP NOIDA TP0


### Transcation Details

In [14]:
wb_vector = TfidfVectorizer(analyzer='char_wb', ngram_range=(1,9))
X_vect = wb_vector.fit_transform(all_data['TRANSACTION DETAILS'])

X_vect.shape

(78828, 1254447)

In [15]:
# character features truncated to 5

n_fts = 5
trunc = TruncatedSVD(n_fts, random_state=21, n_iter=10)

trunc_fts = trunc.fit_transform(X_vect)
for i in range(n_fts):
    all_data[f"transction_detail_truncft_{i+1}"] = trunc_fts[:, i]

all_data.sample()

Unnamed: 0,TRANSACTION DETAILS,Account_NO,CHQ.NO.,WITHDRAWAL AMT,DEPOSIT AMT,BALANCE AMT,Category,train,month,day,year,dayofweek,quarter,weekofyear,is_deposit,is_withdraw,deposit_balance,withdraw_balance,transaction_amount,balance_before_transaction,gb_feature_account_blance,gb_feature_account_balnce_diff,gb_feature_account_WITHDRAWAL,gb_feature_account_WITHDRAWAL_diff,gb_feature_account_DEPOSIT,gb_feature_account_DEPOSI_diff,TRANSACTION DETAILS WORD,transction_detail_truncft_1,transction_detail_truncft_2,transction_detail_truncft_3,transction_detail_truncft_4,transction_detail_truncft_5
25473,CR819017920509/RMCPL11996/0,9,-1.0,1000.0,0.0,8900746000.0,Money-Transfer,1,7,11,2016,0,3,28,0,0,0.0,1.123501e-07,8900747000.0,8900747000.0,7242483000.0,1658263000.0,736223.46453,-735223.46453,1328952.0,-1328952.0,CR819017920509 RMCPL11996 0,0.018291,-0.010258,0.012195,-0.003107,-0.005184


In [16]:
del trunc, trunc_fts
gc.collect()

44

In [17]:
# KMeans clustering of the chracter frequencies
classes = 12
km = KMeans(
        n_clusters=classes, init='random',
        n_init=4, max_iter=1000,
        tol=1e-04, random_state=0
    )
km.fit(X_vect)
ktrans = km.predict(X_vect)
all_data['transction_detail_kmeans'] = ktrans

In [18]:
del km, ktrans, X_vect
gc.collect()

22

In [19]:
# Transaction detail word 

for i in range(11):
    all_data['TRANSACTION DETAILS WORD'] = all_data['TRANSACTION DETAILS WORD'].str.replace(str(i), "")
wb_vector = TfidfVectorizer(analyzer='word',ngram_range=(1, 3))
X_vect = wb_vector.fit_transform(all_data['TRANSACTION DETAILS WORD'])

X_vect.shape

(78828, 3936)

In [20]:
# word level truncation of the frequencies
n_fts = 4
trunc = TruncatedSVD(n_fts)

trunc_fts = trunc.fit_transform(X_vect)
for i in range(n_fts):
    all_data[f"transction_detail_word_truncft_{i+1}"] = trunc_fts[:, i]

all_data.sample()

Unnamed: 0,TRANSACTION DETAILS,Account_NO,CHQ.NO.,WITHDRAWAL AMT,DEPOSIT AMT,BALANCE AMT,Category,train,month,day,year,dayofweek,quarter,weekofyear,is_deposit,is_withdraw,deposit_balance,withdraw_balance,transaction_amount,balance_before_transaction,gb_feature_account_blance,gb_feature_account_balnce_diff,gb_feature_account_WITHDRAWAL,gb_feature_account_WITHDRAWAL_diff,gb_feature_account_DEPOSIT,gb_feature_account_DEPOSI_diff,TRANSACTION DETAILS WORD,transction_detail_truncft_1,transction_detail_truncft_2,transction_detail_truncft_3,transction_detail_truncft_4,transction_detail_truncft_5,transction_detail_kmeans,transction_detail_word_truncft_1,transction_detail_word_truncft_2,transction_detail_word_truncft_3,transction_detail_word_truncft_4
30642,INDFORINCOMEINDOREMI07091,1,-1.0,3240.0,0.0,93379336.0,Salary and wages,1,11,3,2016,3,4,44,0,0,0.0,3.5e-05,93382576.0,93382576.0,70781120.0,22598220.0,185617.090439,-182377.090439,200.904393,-200.904393,INDFORINCOMEINDOREMI,0.063314,0.001841,0.052027,-0.00067,-0.008068,2,1.294101e-09,9.136695e-09,1.591929e-08,7.625917e-08


In [21]:
del trunc, trunc_fts, X_vect

gc.collect()

44

#### Domain knowledge clustering

In [22]:
domain_categories = {}
all_data["Category"].unique()

array(['Money-Transfer', 'Salary and wages', 'Bank Charges',
       'Cash-Pickup', 'Cheque-Payment', 'Deposit', 'Merchant-Payment',
       'Reversal', 'Savings and Investments', 'Internet and IT Services',
       'Bill-Payments', 'Shopping', 'Interest',
       'Transport, Travel, and Logistics', 'Insurance',
       'Cell Phone and Airtime', 'Entertainment', 'Professional services',
       'General Purchases', 'Donations', 'Loan Repayment', nan],
      dtype=object)

In [23]:
Bank_Charges = all_data[all_data["TRANSACTION DETAILS"].str.contains("fee|tax|charg", case=False)]
domain_categories["Bank_Charges"] = Bank_Charges
print(Bank_Charges.shape)
Bank_Charges.Category.value_counts()

(1668, 37)


Bank Charges     1122
Reversal            3
Bill-Payments       1
Name: Category, dtype: int64

In [24]:
Salary_and_wages = all_data[all_data["TRANSACTION DETAILS"].str.contains("income", case=False)]
domain_categories["Salary_and_wages"] = Salary_and_wages
print(Salary_and_wages.shape)
Salary_and_wages.Category.value_counts()

(1155, 37)


Salary and wages    808
Name: Category, dtype: int64

In [25]:
Cash_Pickup = all_data[all_data["TRANSACTION DETAILS"].str.contains("cashpic|dsb|beat", case=False)]
domain_categories["Cash_Pickup"] = Cash_Pickup
print(Cash_Pickup.shape)
Cash_Pickup.Category.value_counts()

(1754, 37)


Cash-Pickup     1064
Bank Charges      19
Reversal           2
Name: Category, dtype: int64

In [26]:
Cheque_Payment = all_data[all_data["TRANSACTION DETAILS"].str.contains("chqdep", case=False)]
domain_categories["Cheque_Payment"] = Cheque_Payment
print(Cheque_Payment.shape)
Cheque_Payment.Category.value_counts()

(2565, 37)


Cheque-Payment      1427
Merchant-Payment     335
Money-Transfer        36
Name: Category, dtype: int64

In [27]:
Bill_Payments = all_data[all_data["TRANSACTION DETAILS"].str.contains("bbps|billpay", case=False)]
domain_categories["Bill_Payments"] = Bill_Payments
print(Bill_Payments.shape)
print(Bill_Payments["TRANSACTION DETAILS"].unique()[:20])
Bill_Payments.Category.value_counts()

(783, 37)
['BBPSSETTLEMENTDT30/08/' 'BBPSSETTLEMENTDT31/08/'
 'BBPSSETTLEMENTDT01/09/' 'BBPSSETTLEMENTDTD17.10'
 'BBPSSETTLEMENTDTD18.10' 'BBPSSETTLEMENTDTD20.10'
 'BBPSSETTLEMENTDTD21.10' 'BBPSSETTLEMENTFORDT03' 'BBPSSETTLEMENTFORDT04'
 'BBPSSETTLEMENTFORDT05' 'BBPSSETTLEMENTFORDTD0' 'BBPSSETTLEMENTFORDT17'
 'BBPSSETTLEMENTFORDTD1' 'BBPSSETTLEMENTFORDT24' 'BBPSSETTLEMENTFORDT21'
 'BBPSSETTLEMENTFORDT22' 'BBPSSETTLEMENTDTD30DEC' 'BBPSSETTLEMENTDTD17JAN'
 'BBPSSETTLEMENTDTD20JAN' 'BBPSSETTLEMENTDTD21JAN']


Bill-Payments     482
Money-Transfer     96
Name: Category, dtype: int64

In [28]:
Deposit = all_data[(all_data["TRANSACTION DETAILS"].str.contains("cashdep|ica17", case=False))]
print(Deposit.shape[0])
print(Deposit["TRANSACTION DETAILS"].unique()[:20])
Deposit.Category.value_counts()

17157
['CASHDEP/NOIDA/TP' 'CASHDEP/NALASOPARA/900200' 'CASHDEP/RAJOURI/'
 'CASHDEP/BHI-RIICO/' 'CASHDEP/VAPI/' 'CASHDEP/NEW-DELHI/'
 'CASHDEP/KOVVADA/ROCODE-9' 'CASHDEP/DHARWAD/' 'CASHDEP/HAUSKHAS/CASHDE'
 'CASHDEP/BORIVALI/CASHREC' 'CASHDEP/HAUSKHAS/ALOK' 'CASHDEP/THANE-W/'
 'CASHDEP/BOISAR/' 'CASHDEP/NOIDA/TP/ONETIME' 'CASHDEP/VILE-PARLE/'
 'CASHDEP/HAUSKHAS/TP' 'CASHDEP/CAPITOLPT/@CP' 'CASHDEP/THANEWEST/'
 'CASHDEP/NOIDA/TP/RECHARGE' 'CASHDEP/DAMAN/']


Deposit             12431
Bank Charges            3
Merchant-Payment        1
Name: Category, dtype: int64

In [29]:
Internet_and_IT_Services = all_data[all_data["TRANSACTION DETAILS"].str.contains("internet", case=False)]
domain_categories["Internet_and_IT_Services"] = Internet_and_IT_Services
print(Internet_and_IT_Services.shape[0])
print(Internet_and_IT_Services["TRANSACTION DETAILS"].unique())
Internet_and_IT_Services.Category.value_counts()

23
['TIMESINTERNETLIMITED' 'DEOBAZAARINTERNETCOMMER']


Internet and IT Services    22
Name: Category, dtype: int64

In [30]:
Shopping = all_data[all_data["TRANSACTION DETAILS"].str.contains("shopping|onlinepay|purchasing|paymentsfor", case=False)]
domain_categories["Shopping"] = Shopping
print(Shopping.shape[0])
print(Shopping["TRANSACTION DETAILS"].unique())
Shopping.Category.value_counts()

131
['NAAPTOLONLINESHOPPINGP' 'INFINYPOOLONLINEPAYMENT'
 'JEWELCLICKONLINEPAYMENT' 'STAMPPAPERPURCHASING' 'PAYMENTSFOR:9090000405'
 'PAYMENTSFOR:9090000452' 'PAYMENTSFOR:9090000464'
 'PAYMENTSFOR:9090000480' 'PAYMENTSFOR:9090000395'
 'PAYMENTSFOR:9090000421' 'PAYMENTSFOR:9090000433'
 'PAYMENTSFOR:9090000437' 'PAYMENTSFOR:9090000461'
 'PAYMENTSFOR:9090000495' 'PAYMENTSFOR:9090000492'
 'PAYMENTSFOR:9090000506' 'PAYMENTSFOR:9090000577'
 'PAYMENTSFOR:9090000415' 'PAYMENTSFOR:9090000425'
 'PAYMENTSFOR:9090000427' 'PAYMENTSFOR:9090000438'
 'PAYMENTSFOR:9090000542' 'STAMPPAPERPURCHASINGCH'
 'PAYMENTSFOR:9090000505']


Shopping    119
Name: Category, dtype: int64

In [31]:
Interest = all_data[all_data["TRANSACTION DETAILS"].str.contains("int.coll", case=False)]

domain_categories["Interest"] = Interest
print(Interest.shape[0])
print(Interest["TRANSACTION DETAILS"].unique())
Interest.Category.value_counts()

108
['1196428:INT.COLL:01-11-20' '1196428:INT.COLL:01-01-20'
 '1196428:INT.COLL:01-03-20' '1196428:INT.COLL:01-04-20'
 '1196428:INT.COLL:01-05-20' '1196428:INT.COLL:01-07-20'
 '1196428:INT.COLL:01-08-20' '1196428:INT.COLL:01-09-20'
 '409000362497:INT.COLL:01-' '1196711:INT.COLL:01-06-20'
 '1196711:INT.COLL:01-07-20' '1196711:INT.COLL:01-08-20'
 '1196711:INT.COLL:01-09-20' '1196711:INT.COLL:01-11-20'
 '1196711:INT.COLL:01-01-20' '1196711:INT.COLL:01-02-20'
 '1196711:INT.COLL:01-03-20' '1196711:INT.COLL:01-05-20'
 '1196428:INT.COLL:01-10-20' '1196428:INT.COLL:01-12-20'
 '1196428:INT.COLL:01-02-20' '1196428:INT.COLL:01-06-20'
 '1196711:INT.COLL:01-10-20' '1196711:INT.COLL:01-12-20'
 '1196711:INT.COLL:01-04-20' '409000405747:INT.COLL:01-']


Interest    70
Name: Category, dtype: int64

In [32]:
Insurance = all_data[all_data["TRANSACTION DETAILS"].str.contains("insura", case=False)]

domain_categories["Insurance"] = Insurance
print(Insurance.shape[0])
print(Insurance["TRANSACTION DETAILS"].unique())
Insurance.Category.value_counts()

21
['NATIONALINSURANCECOLTD' 'BIRLASUNLIFEINSURANCE' 'NATIONALINSURANCECO'
 'TATAAIGGENERALINSURANC' 'MAXBUPAHEALTHINSURANCE' 'MAXBUPAHELATHINSURACE'
 'NATIONALINSURANCECOMPAN']


Insurance    20
Name: Category, dtype: int64

In [33]:
Cell_Phone_and_Airtime = all_data[all_data["TRANSACTION DETAILS"].str.contains("airtel|vodafone", case=False)]

domain_categories["Cell_Phone_and_Airtime"] = Cell_Phone_and_Airtime
print(Cell_Phone_and_Airtime.shape[0])
print(Cell_Phone_and_Airtime["TRANSACTION DETAILS"].unique())
Cell_Phone_and_Airtime.Category.value_counts()

135
['AIRTELRELATIONSHIP11614' 'AIRTELRELATIONSHIP10951'
 'VODAFONEMOBILESERVICES' 'AIRTELRELATIONSHIP12539'
 'AIRTELRELATIONSHIPNO12' 'AIRTELRELATIONSHIONO12'
 'AIRTELRELATIONSHIONO11' 'VODAFONENO1247161300' 'AIRTELRELATIONSHIONO10'
 'BHARTIAIRTELLLOCALBNKG' '.BHARTIAIRTELLLOCALBNK' 'VODAFONENO.1247161500'
 'VODAFONENO1247161100' 'VODAFONENO1207151000' 'BHARTIAIRTELLTD1095173'
 'BHARTIAIRTELLTD1161438' 'BHARTIAIRTELLTD1152033'
 'BHARTIAIRTELLTD1155147' 'BHARTIAIRTELLTD1155141'
 'BHARTIAIRTELLTD7006279' 'BHARTIAIRTELLTD' 'BHARTIAIRTEL'
 'BHARTIAIRTEL12539330' 'VODAFONE1207191000' 'VODAFONE1247161500'
 'AIRTELRELATIONSHIPNO11' 'AIRTEL' 'AIRTELRELATIONSHIP11520'
 'AIRTELRELATIONSHIP11551' 'BHARTIAIRTELLLDLNCOLL' 'VODAFONE'
 'VODAFONE9899294256' 'VODAFONE9873002188' 'NEFT/000022288318/AIRTEL'
 'AIRTELRELATIONSHIP' 'AIRTELRELANO7025359034' 'VODAFONE1247'
 'VODAFONE1247161300' 'BHARTIAIRTELLLOCBNKBILC']


Cell Phone and Airtime    103
Name: Category, dtype: int64

In [34]:
Loan_Repayment = all_data[all_data["TRANSACTION DETAILS"].str.contains("loan", case=False)]

domain_categories["Loan_Repayment"] = Loan_Repayment
print(Loan_Repayment.shape[0])
print(Loan_Repayment["TRANSACTION DETAILS"].unique())
Loan_Repayment.Category.value_counts()

112
['LOANRECOVERYFOR90900003' 'LOANRECOVERYFOR90900004'
 'LOANRECOVERYFOR90900005']


Loan Repayment    69
Name: Category, dtype: int64

In [35]:
Reversal = all_data[(all_data["TRANSACTION DETAILS"].str.contains("rev|inward|chqreturn", case=False))]

domain_categories["Reversal"] = Reversal
print(Reversal.shape[0])
print(Reversal["TRANSACTION DETAILS"].unique()[:10])

Reversal.Category.value_counts()

134
['REVRSLMUTILTDINDIAFORENSICMET0' 'REVAEPSINDIAFORENSICINCOME24'
 'REVAEPSINDIAFORENSICSTAX241' 'REVIMPSO/W1209151C'
 'REVAEPSBANKINCOMESHAR' 'REVINDFORAEPSNPCIDEPSET'
 'REVINDFORAEPSNPCIWDLSET' 'REVINDIAFORENSICAEPSNPCIWDL'
 'REVINDIAFORENSICAEPSNPCIDEP' 'REVIMPSOW300120164C']


Reversal                   66
Merchant-Payment           13
Salary and wages            4
Money-Transfer              4
Savings and Investments     3
Name: Category, dtype: int64

In [36]:
Savings_and_Investments = all_data[(all_data["TRANSACTION DETAILS"].str.contains("booking", case=False))]

domain_categories["Savings_and_Investments"] = Savings_and_Investments
print(Savings_and_Investments.shape[0])
print(Savings_and_Investments["TRANSACTION DETAILS"].unique()[:10])

Savings_and_Investments.Category.value_counts()

18
['FDBOOKING' 'FUNDREVTOFDBOOKING36' 'FDBOOKINGBGMARGINMONE'
 'FDBOOKINGFORBGISSUANC' 'FDBOOKING709004423111' 'FDBOOKING709005421635'
 'FDBOOKING709005482544' 'FDBOOKING365DAYS' 'FUNDREVTOFDBOOKING365'
 'FDBOOKING709004439297']


Savings and Investments    10
Name: Category, dtype: int64

In [37]:
# create a categorical columns for each clusters in the above

for cat in domain_categories:
    tr_dt = domain_categories[cat]['TRANSACTION DETAILS']
    unq = tr_dt.unique()
    domain_categories[cat] = (len(unq), unq)

    all_data[f"dm-{cat}"] = all_data['TRANSACTION DETAILS'].apply(
        lambda x:x in unq
    ).astype(int)

all_data.head()


Unnamed: 0,TRANSACTION DETAILS,Account_NO,CHQ.NO.,WITHDRAWAL AMT,DEPOSIT AMT,BALANCE AMT,Category,train,month,day,year,dayofweek,quarter,weekofyear,is_deposit,is_withdraw,deposit_balance,withdraw_balance,transaction_amount,balance_before_transaction,gb_feature_account_blance,gb_feature_account_balnce_diff,gb_feature_account_WITHDRAWAL,gb_feature_account_WITHDRAWAL_diff,gb_feature_account_DEPOSIT,gb_feature_account_DEPOSI_diff,TRANSACTION DETAILS WORD,transction_detail_truncft_1,transction_detail_truncft_2,transction_detail_truncft_3,transction_detail_truncft_4,transction_detail_truncft_5,transction_detail_kmeans,transction_detail_word_truncft_1,transction_detail_word_truncft_2,transction_detail_word_truncft_3,transction_detail_word_truncft_4,dm-Bank_Charges,dm-Salary_and_wages,dm-Cash_Pickup,dm-Cheque_Payment,dm-Bill_Payments,dm-Internet_and_IT_Services,dm-Shopping,dm-Interest,dm-Insurance,dm-Cell_Phone_and_Airtime,dm-Loan_Repayment,dm-Reversal,dm-Savings_and_Investments
0,NEFT/FDRL401249529/INDIAFORENSIC,9,-1.0,0.0,15000000.0,15000000.0,Money-Transfer,1,2,28,2014,4,1,9,1,1,1.0,0.0,15000000.0,0.0,7242483000.0,-7227483000.0,736223.46453,-736223.46453,1328952.0,13671050.0,NEFT FDRL INDIAFORENSIC,0.103372,-0.014003,0.156299,0.001463,-0.0066,5,-2.2e-05,-0.000111,-0.000413,-0.001895,0,0,0,0,0,0,0,0,0,0,0,0,0
1,NEFT/FDRL401249532/INDIAFORENSIC,9,-1.0,0.0,15000000.0,60000000.0,Money-Transfer,1,2,28,2014,4,1,9,1,1,0.25,0.0,60000000.0,45000000.0,7242483000.0,-7182483000.0,736223.46453,-736223.46453,1328952.0,13671050.0,NEFT FDRL INDIAFORENSIC,0.104187,-0.014118,0.157527,0.001474,-0.006657,5,-2.2e-05,-0.000111,-0.000413,-0.001895,0,0,0,0,0,0,0,0,0,0,0,0,0
2,NEFT/FDRL401249534/INDIAFORENSIC,9,-1.0,0.0,15000000.0,90000000.0,Money-Transfer,1,2,28,2014,4,1,9,1,1,0.166667,0.0,90000000.0,75000000.0,7242483000.0,-7152483000.0,736223.46453,-736223.46453,1328952.0,13671050.0,NEFT FDRL INDIAFORENSIC,0.104641,-0.0142,0.158043,0.001457,-0.00665,5,-2.2e-05,-0.000111,-0.000413,-0.001895,0,0,0,0,0,0,0,0,0,0,0,0,0
3,NEFT/FDRL401249590/INDIAFORENSIC,9,-1.0,0.0,10000000.0,100000000.0,Money-Transfer,1,2,28,2014,4,1,9,1,1,0.1,0.0,100000000.0,90000000.0,7242483000.0,-7142483000.0,736223.46453,-736223.46453,1328952.0,8671048.0,NEFT FDRL INDIAFORENSIC,0.103491,-0.014026,0.156545,0.001461,-0.006652,5,-2.2e-05,-0.000111,-0.000413,-0.001895,0,0,0,0,0,0,0,0,0,0,0,0,0
4,NEFT/FDRL401249531/INDIAFORENSIC,9,-1.0,0.0,15000000.0,45000000.0,Money-Transfer,1,2,28,2014,4,1,9,1,1,0.333333,0.0,45000000.0,30000000.0,7242483000.0,-7197483000.0,736223.46453,-736223.46453,1328952.0,13671050.0,NEFT FDRL INDIAFORENSIC,0.104393,-0.014194,0.157801,0.001449,-0.006711,5,-2.2e-05,-0.000111,-0.000413,-0.001895,0,0,0,0,0,0,0,0,0,0,0,0,0


In [38]:
# create a single column describing all of the classes

st_dm_cat = [j for i, j in sorted(domain_categories.values())]

def get_bins(x):
    for i, j in enumerate(st_dm_cat):
        if x in j:
            return i
    
    return len(st_dm_cat)

all_data['TRANSACTION_DETAILS_bins'] = all_data['TRANSACTION DETAILS'].apply(
        get_bins
    )

In [40]:
# aggregated features from transaction detail bins

transaction_detail_groups = all_data.groupby('TRANSACTION DETAILS').agg(

             amount_transaction_detail_min = ("transaction_amount", "min"),
             amount_transaction_detail_max = ("transaction_amount", "max"),  
             amount_transaction_detail_mean = ("transaction_amount", "mean"),
    
             balance_transaction_detail_min = ("BALANCE AMT", "min"),
             balance_transaction_detail_max = ("BALANCE AMT", "max"),     
             balance_transaction_detail_mean = ("BALANCE AMT", "mean"),
    
             ).reset_index()

all_data = pd.merge(all_data, transaction_detail_groups, on=['TRANSACTION DETAILS'], how='left')

null = all_data.isna().sum()
null[null > 0]

Category    22625
dtype: int64

### Feature combinations

In [41]:
fts_for_com = [
    ("Account_NO", "TRANSACTION_DETAILS_bins"),
    ("TRANSACTION_DETAILS_bins", "is_deposit"),
    ("TRANSACTION_DETAILS_bins", "is_withdraw"),
    ("month", "weekofyear"),
]

for ft1, ft2 in fts_for_com:
    all_data[f"{ft1}_{ft2}"] = all_data[ft1].astype(str) + all_data[ft2].astype(str)

### Preprocessing

In [42]:
lcols = all_data.select_dtypes(['object']).columns.drop("Category").to_list()

lcols

['TRANSACTION DETAILS',
 'TRANSACTION DETAILS WORD',
 'Account_NO_TRANSACTION_DETAILS_bins',
 'TRANSACTION_DETAILS_bins_is_deposit',
 'TRANSACTION_DETAILS_bins_is_withdraw',
 'month_weekofyear']

In [43]:
# lable encoding for categorical columns

for col in lcols:
    all_data[col] = le.fit_transform(all_data[col])

In [44]:
train_df = all_data[all_data.train == 1].drop(columns='train')
test_df = all_data[all_data.train == 0].drop(columns=['train', 'Category'])

test_df.shape, train_df.shape

((22625, 59), (56203, 60))

In [45]:
# encode the target variable

train_df['Category'] = le.fit_transform(train_df['Category'])
cat_map = dict(zip(range(len(le.classes_)) , le.classes_))

cat_map

{0: 'Bank Charges',
 1: 'Bill-Payments',
 2: 'Cash-Pickup',
 3: 'Cell Phone and Airtime',
 4: 'Cheque-Payment',
 5: 'Deposit',
 6: 'Donations',
 7: 'Entertainment',
 8: 'General Purchases',
 9: 'Insurance',
 10: 'Interest',
 11: 'Internet and IT Services',
 12: 'Loan Repayment',
 13: 'Merchant-Payment',
 14: 'Money-Transfer',
 15: 'Professional services',
 16: 'Reversal',
 17: 'Salary and wages',
 18: 'Savings and Investments',
 19: 'Shopping',
 20: 'Transport, Travel, and Logistics'}

## Modeling and Prediction

In [46]:
main_cols = train_df.columns.difference([
    'Category', 
    # 'TRANSACTION DETAILS', 'TRANSACTION DETAILS WORD'
])

X = train_df[main_cols] 
y = train_df['Category']

test = test_df[main_cols]

X.shape, test.shape

((56203, 59), (22625, 59))

In [None]:
print(y.value_counts().to_dict())

In [47]:
# first undersampling for high number of calss categories then oversampling for lower class categories

from imblearn import under_sampling as uns 


stra1 = {14: 4000, 5: 3000, 13: 1200, 4: 1000, 0: 900,
        2: 850, 17: 700, 1: 400, 19: 119, 3: 103,
        10: 70, 12: 69, 16: 66, 11: 22, 9: 20, 15: 15,
        18: 10, 8: 4, 20: 2, 6: 2, 7: 1}

stra2 = {14: 4000, 5: 3000, 13: 1200, 4: 1000, 0: 900,
        2: 850, 17: 700, 1: 400, 19: 130, 3: 110,
        10: 100, 12: 80, 16: 80, 11: 50, 9: 45, 15: 40,
        18: 30, 8: 20, 20: 15, 6: 15, 7: 10}
        
undersample = uns.RandomUnderSampler(sampling_strategy=stra1, random_state=0)
X, y = undersample.fit_resample(X, y)

oversample = ovs.RandomOverSampler(sampling_strategy=stra2, random_state=0)
X, y = oversample.fit_resample(X, y)

X.shape

(12775, 59)

In [48]:

# class for training more than one models and averging the results
class My_model:
    def __init__(self, models):
        self.models = models

    def fit_eval_pred(self, X, y, eval_set, val, test, verbose=True):
        results_eval = []
        results_test = []
        for i, model in enumerate(self.models):
            if verbose:
                print(f"[Training]............................... Model_{i+1}")
            st = time.time()
            try:
                model.fit(X, y, eval_set=eval_set, verbose=False)
            except Exception as e:
                model.fit(X, y)
            if verbose:
                print(f"[Prediction]............................. Model_{i+1}")
            p = model.predict_proba(val)
            results_eval.append(p)
            results_test.append(model.predict_proba(test))
            if verbose:
                print(f"Log loss = {log_loss(eval_set[0][1], p)}", end=" "*6)
                print(f"Time {time.time() - st :.2f}sec")

        results_eval = np.mean(results_eval, axis=0)
        results_test = np.mean(results_test, axis=0)

        return results_eval, results_test


In [49]:
from sklearn.ensemble import *
from sklearn.multiclass import OneVsRestClassifier
from sklearn.neural_network import MLPClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier



# custom function gives models 
def get_models(seed=21):

    xgb_params = {"eta":0.1, 'n_estimators': 200, 'random_state':seed, 'objective':'mlogloss',
            "eval_metric":'mlogloss', "subsample" : 1.0, "colsample_bytree" : .8}
    cat_params = {"iterations":10000, "verbose":100, "random_state":seed,
            'use_best_model':True, 'early_stopping_rounds':100, 
            'task_type':'GPU',
            "learning_rate":0.1,
            }

    _models = [
            CatBoostClassifier(**cat_params),
            XGBClassifier(**xgb_params),
    ]
    return _models


In [50]:
from sklearn.model_selection import StratifiedKFold

results = []
ns = 5
tot = []


skf = StratifiedKFold(n_splits=ns, random_state=seed, shuffle=True)
for fold, (train_index, test_index) in enumerate(skf.split(X, y)):
    print(100*"-")
    print(f"Fold-{fold+1}")
    X_train, X_test = X.loc[train_index], X.loc[test_index]
    y_train, y_test = y.loc[train_index], y.loc[test_index]

    main_model = My_model(get_models())
    pred_train, pred_test = main_model.fit_eval_pred(X_train, y_train, [(X_test, y_test)], X_test, test)
    try:
      loss = log_loss(y_test, pred_train)
      print(f"log loss = {loss}")
      tot += [loss]
    except Exception as e:
      print(e)
    results.append(pred_test)

    print(100*"-")
    print("\n\n")

print(f"Average log loss = {sum(tot) / len(tot)}")

----------------------------------------------------------------------------------------------------
Fold-1
[Training]............................... Model_1
[Prediction]............................. Model_1
Log loss = 0.009907836518789999      Time 14.20sec
[Training]............................... Model_2
[Prediction]............................. Model_2
Log loss = 0.01149885854077107      Time 42.02sec
log loss = 0.010016444609397046
----------------------------------------------------------------------------------------------------



----------------------------------------------------------------------------------------------------
Fold-2
[Training]............................... Model_1
[Prediction]............................. Model_1
Log loss = 0.004991644707235832      Time 14.65sec
[Training]............................... Model_2
[Prediction]............................. Model_2
Log loss = 0.004344028123423075      Time 41.31sec
log loss = 0.004445154549695816
-------------

In [51]:
# 5 fold prediction
preds = np.mean(results, axis=0)
preds.shape

(22625, 21)

In [53]:
# model function based on voting classifier         
def model(seed=21, weights=None):

    xgb_params = {"eta":0.1, 'n_estimators': 200, 'random_state':seed, 'objective':'mlogloss',
            "eval_metric":'mlogloss', "subsample" : 1.0, "colsample_bytree" : .8}
    cat_params = {"iterations":2000, "verbose":0, "random_state":seed,
                  'task_type':'GPU',
        #     "learning_rate":0.09,
            }
    lgb_params = {'learning_rate':0.01, 'n_estimators':650, 'random_state':seed,
            'verbose':-1, 'subsample':.8, 'colsample_bytree':0.6}

    models = [
            # ('lgb', LGBMClassifier(**lgb_params)),
            ('xgb', XGBClassifier(**xgb_params)),
            ('cat', CatBoostClassifier(**cat_params)),
    ]
    return VotingClassifier(estimators=models, voting='soft', flatten_transform=True, verbose=1, weights=weights)

In [54]:
results2 = []
for s in [0, 21, 251, 871, 1992]: # run over different seeds and average the results
    print("\n\n"+'-'*60+f"\nseed = {s}")
    clf = model(seed=s)
    clf.fit(X, y)
    print(log_loss(y, clf.predict_proba(X)))

    results2.append(clf.predict_proba(test))



------------------------------------------------------------
seed = 0
[Voting] ...................... (1 of 2) Processing xgb, total=  43.9s
[Voting] ...................... (2 of 2) Processing cat, total=  23.4s
0.0007058225997372162


------------------------------------------------------------
seed = 21
[Voting] ...................... (1 of 2) Processing xgb, total=  44.9s
[Voting] ...................... (2 of 2) Processing cat, total=  23.9s
0.0007112171813358412


------------------------------------------------------------
seed = 251
[Voting] ...................... (1 of 2) Processing xgb, total=  43.4s
[Voting] ...................... (2 of 2) Processing cat, total=  22.9s
0.0007075067784211742


------------------------------------------------------------
seed = 871
[Voting] ...................... (1 of 2) Processing xgb, total=  43.3s
[Voting] ...................... (2 of 2) Processing cat, total=  23.2s
0.0007093292284072461


-------------------------------------------------

In [55]:
# full data prediction with different seeds
full_pred = np.mean(results2, axis=0)
full_pred.shape

(22625, 21)

### Ensembling and make submission

In [56]:
# take the average of splitted prediction and full data prediction
avg_pred = preds*.5 + full_pred*.5
avg_pred.shape

(22625, 21)

In [57]:
sub = pd.DataFrame(np.round(avg_pred, 4), columns=cat_map.values(), index=ID)
sub.drop(columns="Entertainment", inplace=True) # since it is not included in sample submission
sub.to_csv("submission1.csv")
sub.head()

Unnamed: 0_level_0,Bank Charges,Bill-Payments,Cash-Pickup,Cell Phone and Airtime,Cheque-Payment,Deposit,Donations,General Purchases,Insurance,Interest,Internet and IT Services,Loan Repayment,Merchant-Payment,Money-Transfer,Professional services,Reversal,Salary and wages,Savings and Investments,Shopping,"Transport, Travel, and Logistics"
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2014-02-28 00:00:00X3000000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0001,0.9998,0.0,0.0,0.0,0.0,0.0,0.0
2014-02-28 00:00:00X7500000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0001,0.9998,0.0,0.0,0.0,0.0,0.0,0.0
2014-03-08 00:00:00X4500002863,0.9992,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0005,0.0,0.0,0.0,0.0,0.0,0.0
2014-03-08 00:00:00X6000002863,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9999,0.0,0.0,0.0,0.0,0.0,0.0
2014-03-08 00:00:00X6000005726,0.9992,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0005,0.0,0.0,0.0,0.0,0.0,0.0
