In [1]:

"""
This is extremin's script:
    https://www.kaggle.com/extremin/xgb1113
Based on Vladimir Iglovikov' method: 
    https://www.kaggle.com/iglovikov/allstate-claims-severity/xgb-1114/discussion
"""
import pandas as pd
import numpy as np
import xgboost as xgb

from sklearn.metrics import mean_absolute_error

train = pd.read_csv('../../data/train.csv')
test = pd.read_csv('../../data/test.csv')
test['loss'] = np.nan
joined = pd.concat([train, test])
def logregobj(preds, dtrain):
    labels = dtrain.get_label()
    con =2
    x =preds-labels
    grad =con*x / (np.abs(x)+con)
    hess =con**2 / (np.abs(x)+con)**2
    return grad, hess 


def evalerror(preds, dtrain):
    labels = dtrain.get_label()
    return 'mae', mean_absolute_error(np.exp(preds), np.exp(labels))

if __name__ == '__main__':
    for column in list(train.select_dtypes(include=['object']).columns):
        if train[column].nunique() != test[column].nunique():
            set_train = set(train[column].unique())
            set_test = set(test[column].unique())
            remove_train = set_train - set_test
            remove_test = set_test - set_train

            remove = remove_train.union(remove_test)
            def filter_cat(x):
                if x in remove:
                    return np.nan
                return x

            joined[column] = joined[column].apply(lambda x: filter_cat(x), 1)
            
        joined[column] = pd.factorize(joined[column].values, sort=True)[0]

    train = joined[joined['loss'].notnull()]
    test = joined[joined['loss'].isnull()]

    shift = 200
    y = np.log(train['loss'] + shift)
    ids = test['id']
    X = train.drop(['loss', 'id'], 1)
    X_test = test.drop(['loss', 'id'], 1)

    from sklearn.model_selection import train_test_split
    x_train, x_valid, y_train, y_valid =train_test_split(X, y, test_size=0.1, random_state=2016)
    RANDOM_STATE = 2016
    params = {
        'min_child_weight': 1,
        'eta': 0.01,
        'colsample_bytree': 0.5,
        'max_depth': 12,
        'subsample': 0.8,
        'alpha': 1,
        'gamma': 1,
        'silent': 1,
        #'verbose_eval': True,
        'seed': RANDOM_STATE,
        'nthread':4,
        'verbose_eval':100
    }

    xgtrain = xgb.DMatrix(X, label=y)
    xgtest = xgb.DMatrix(X_test)
    watchlist = [ (xgtrain,'train')]
    model = xgb.train(params, xgtrain,int(2012/0.9),watchlist,obj=logregobj,feval=evalerror)

    prediction = np.exp(model.predict(xgtest)) - shift

    submission = pd.DataFrame()
    submission['loss'] = prediction
    submission['id'] = ids
    submission.to_csv('sub_fair_obj.csv', index=False)

[0]	train-rmse:7.00036	train-mae:3235.03
[1]	train-rmse:6.69186	train-mae:3234.19
[2]	train-rmse:6.40793	train-mae:3233.15
[3]	train-rmse:6.14628	train-mae:3231.89
[4]	train-rmse:5.90231	train-mae:3230.36
[5]	train-rmse:5.67567	train-mae:3228.56
[6]	train-rmse:5.46445	train-mae:3226.46
[7]	train-rmse:5.26691	train-mae:3224.04
[8]	train-rmse:5.08187	train-mae:3221.28
[9]	train-rmse:4.90839	train-mae:3218.17
[10]	train-rmse:4.74514	train-mae:3214.69
[11]	train-rmse:4.59147	train-mae:3210.83
[12]	train-rmse:4.44638	train-mae:3206.58
[13]	train-rmse:4.30942	train-mae:3201.96
[14]	train-rmse:4.17972	train-mae:3196.96
[15]	train-rmse:4.05702	train-mae:3191.57
[16]	train-rmse:3.94038	train-mae:3185.75
[17]	train-rmse:3.82957	train-mae:3179.53
[18]	train-rmse:3.7241	train-mae:3172.91
[19]	train-rmse:3.62367	train-mae:3165.9
[20]	train-rmse:3.52793	train-mae:3158.53
[21]	train-rmse:3.4364	train-mae:3150.71
[22]	train-rmse:3.34903	train-mae:3142.53
[23]	train-rmse:3.2655	train-mae:3133.98
[24]	t

[193]	train-rmse:0.56612	train-mae:1372
[194]	train-rmse:0.564292	train-mae:1368.07
[195]	train-rmse:0.562522	train-mae:1364.18
[196]	train-rmse:0.560787	train-mae:1360.4
[197]	train-rmse:0.559066	train-mae:1356.63
[198]	train-rmse:0.55737	train-mae:1352.88
[199]	train-rmse:0.5557	train-mae:1349.23
[200]	train-rmse:0.554076	train-mae:1345.63
[201]	train-rmse:0.552473	train-mae:1342.13
[202]	train-rmse:0.550894	train-mae:1338.68
[203]	train-rmse:0.549382	train-mae:1335.31
[204]	train-rmse:0.547864	train-mae:1331.92
[205]	train-rmse:0.546382	train-mae:1328.56
[206]	train-rmse:0.544914	train-mae:1325.31
[207]	train-rmse:0.543472	train-mae:1322.05
[208]	train-rmse:0.542066	train-mae:1318.87
[209]	train-rmse:0.540664	train-mae:1315.72
[210]	train-rmse:0.539298	train-mae:1312.6
[211]	train-rmse:0.53796	train-mae:1309.53
[212]	train-rmse:0.536647	train-mae:1306.51
[213]	train-rmse:0.535334	train-mae:1303.5
[214]	train-rmse:0.534057	train-mae:1300.54
[215]	train-rmse:0.532807	train-mae:1297.64

[381]	train-rmse:0.462454	train-mae:1111.95
[382]	train-rmse:0.462363	train-mae:1111.65
[383]	train-rmse:0.46227	train-mae:1111.32
[384]	train-rmse:0.462158	train-mae:1110.96
[385]	train-rmse:0.462058	train-mae:1110.65
[386]	train-rmse:0.461963	train-mae:1110.33
[387]	train-rmse:0.461881	train-mae:1110.05
[388]	train-rmse:0.461783	train-mae:1109.74
[389]	train-rmse:0.461696	train-mae:1109.43
[390]	train-rmse:0.461604	train-mae:1109.13
[391]	train-rmse:0.461526	train-mae:1108.85
[392]	train-rmse:0.461435	train-mae:1108.56
[393]	train-rmse:0.46133	train-mae:1108.22
[394]	train-rmse:0.461235	train-mae:1107.92
[395]	train-rmse:0.461099	train-mae:1107.52
[396]	train-rmse:0.460992	train-mae:1107.19
[397]	train-rmse:0.460904	train-mae:1106.89
[398]	train-rmse:0.460832	train-mae:1106.63
[399]	train-rmse:0.460738	train-mae:1106.32
[400]	train-rmse:0.460665	train-mae:1106.06
[401]	train-rmse:0.46056	train-mae:1105.72
[402]	train-rmse:0.460472	train-mae:1105.42
[403]	train-rmse:0.460383	train-mae

[569]	train-rmse:0.451399	train-mae:1075.48
[570]	train-rmse:0.451376	train-mae:1075.4
[571]	train-rmse:0.451347	train-mae:1075.3
[572]	train-rmse:0.451287	train-mae:1075.12
[573]	train-rmse:0.451254	train-mae:1075.04
[574]	train-rmse:0.451194	train-mae:1074.86
[575]	train-rmse:0.45116	train-mae:1074.75
[576]	train-rmse:0.451136	train-mae:1074.66
[577]	train-rmse:0.451098	train-mae:1074.52
[578]	train-rmse:0.451072	train-mae:1074.44
[579]	train-rmse:0.45105	train-mae:1074.38
[580]	train-rmse:0.45103	train-mae:1074.31
[581]	train-rmse:0.451005	train-mae:1074.21
[582]	train-rmse:0.450972	train-mae:1074.1
[583]	train-rmse:0.450953	train-mae:1074.02
[584]	train-rmse:0.450916	train-mae:1073.89
[585]	train-rmse:0.450892	train-mae:1073.81
[586]	train-rmse:0.450847	train-mae:1073.68
[587]	train-rmse:0.450807	train-mae:1073.57
[588]	train-rmse:0.450766	train-mae:1073.45
[589]	train-rmse:0.450741	train-mae:1073.37
[590]	train-rmse:0.450717	train-mae:1073.29
[591]	train-rmse:0.450678	train-mae:10

[757]	train-rmse:0.445951	train-mae:1058.64
[758]	train-rmse:0.445925	train-mae:1058.57
[759]	train-rmse:0.445894	train-mae:1058.47
[760]	train-rmse:0.445864	train-mae:1058.38
[761]	train-rmse:0.445845	train-mae:1058.33
[762]	train-rmse:0.445836	train-mae:1058.3
[763]	train-rmse:0.445803	train-mae:1058.2
[764]	train-rmse:0.445785	train-mae:1058.14
[765]	train-rmse:0.445773	train-mae:1058.11
[766]	train-rmse:0.445745	train-mae:1058.03
[767]	train-rmse:0.445709	train-mae:1057.93
[768]	train-rmse:0.445691	train-mae:1057.87
[769]	train-rmse:0.445667	train-mae:1057.8
[770]	train-rmse:0.445646	train-mae:1057.73
[771]	train-rmse:0.445631	train-mae:1057.69
[772]	train-rmse:0.445619	train-mae:1057.64
[773]	train-rmse:0.445608	train-mae:1057.61
[774]	train-rmse:0.445577	train-mae:1057.52
[775]	train-rmse:0.445566	train-mae:1057.48
[776]	train-rmse:0.445545	train-mae:1057.42
[777]	train-rmse:0.445522	train-mae:1057.33
[778]	train-rmse:0.44549	train-mae:1057.22
[779]	train-rmse:0.44546	train-mae:1

[945]	train-rmse:0.441817	train-mae:1046.56
[946]	train-rmse:0.441787	train-mae:1046.46
[947]	train-rmse:0.441742	train-mae:1046.33
[948]	train-rmse:0.441716	train-mae:1046.26
[949]	train-rmse:0.441695	train-mae:1046.2
[950]	train-rmse:0.441669	train-mae:1046.11
[951]	train-rmse:0.441664	train-mae:1046.09
[952]	train-rmse:0.44162	train-mae:1045.99
[953]	train-rmse:0.441606	train-mae:1045.95
[954]	train-rmse:0.441585	train-mae:1045.89
[955]	train-rmse:0.441576	train-mae:1045.86
[956]	train-rmse:0.441569	train-mae:1045.84
[957]	train-rmse:0.441551	train-mae:1045.79
[958]	train-rmse:0.441534	train-mae:1045.74
[959]	train-rmse:0.441522	train-mae:1045.7
[960]	train-rmse:0.441504	train-mae:1045.65
[961]	train-rmse:0.441495	train-mae:1045.63
[962]	train-rmse:0.441486	train-mae:1045.61
[963]	train-rmse:0.44146	train-mae:1045.51
[964]	train-rmse:0.441433	train-mae:1045.42
[965]	train-rmse:0.441405	train-mae:1045.34
[966]	train-rmse:0.441397	train-mae:1045.32
[967]	train-rmse:0.44138	train-mae:1

[1130]	train-rmse:0.438625	train-mae:1037.35
[1131]	train-rmse:0.438603	train-mae:1037.29
[1132]	train-rmse:0.438597	train-mae:1037.28
[1133]	train-rmse:0.438579	train-mae:1037.22
[1134]	train-rmse:0.438565	train-mae:1037.18
[1135]	train-rmse:0.438539	train-mae:1037.1
[1136]	train-rmse:0.438524	train-mae:1037.06
[1137]	train-rmse:0.438494	train-mae:1036.96
[1138]	train-rmse:0.438474	train-mae:1036.9
[1139]	train-rmse:0.438445	train-mae:1036.82
[1140]	train-rmse:0.438439	train-mae:1036.8
[1141]	train-rmse:0.438423	train-mae:1036.76
[1142]	train-rmse:0.438398	train-mae:1036.68
[1143]	train-rmse:0.438388	train-mae:1036.65
[1144]	train-rmse:0.438373	train-mae:1036.61
[1145]	train-rmse:0.43837	train-mae:1036.6
[1146]	train-rmse:0.438355	train-mae:1036.56
[1147]	train-rmse:0.43834	train-mae:1036.52
[1148]	train-rmse:0.438316	train-mae:1036.45
[1149]	train-rmse:0.438298	train-mae:1036.4
[1150]	train-rmse:0.438286	train-mae:1036.37
[1151]	train-rmse:0.438265	train-mae:1036.32
[1152]	train-rmse

[1314]	train-rmse:0.435986	train-mae:1030
[1315]	train-rmse:0.435971	train-mae:1029.95
[1316]	train-rmse:0.435963	train-mae:1029.93
[1317]	train-rmse:0.435941	train-mae:1029.87
[1318]	train-rmse:0.435936	train-mae:1029.85
[1319]	train-rmse:0.435925	train-mae:1029.83
[1320]	train-rmse:0.435918	train-mae:1029.81
[1321]	train-rmse:0.435909	train-mae:1029.79
[1322]	train-rmse:0.435895	train-mae:1029.75
[1323]	train-rmse:0.435883	train-mae:1029.72
[1324]	train-rmse:0.43586	train-mae:1029.66
[1325]	train-rmse:0.435847	train-mae:1029.62
[1326]	train-rmse:0.435809	train-mae:1029.53
[1327]	train-rmse:0.4358	train-mae:1029.5
[1328]	train-rmse:0.435793	train-mae:1029.48
[1329]	train-rmse:0.435785	train-mae:1029.45
[1330]	train-rmse:0.435772	train-mae:1029.42
[1331]	train-rmse:0.435765	train-mae:1029.39
[1332]	train-rmse:0.435755	train-mae:1029.36
[1333]	train-rmse:0.435742	train-mae:1029.32
[1334]	train-rmse:0.435735	train-mae:1029.3
[1335]	train-rmse:0.435717	train-mae:1029.25
[1336]	train-rmse:

[1497]	train-rmse:0.433679	train-mae:1023.67
[1498]	train-rmse:0.433669	train-mae:1023.64
[1499]	train-rmse:0.433661	train-mae:1023.62
[1500]	train-rmse:0.433656	train-mae:1023.61
[1501]	train-rmse:0.433647	train-mae:1023.59
[1502]	train-rmse:0.433634	train-mae:1023.55
[1503]	train-rmse:0.433627	train-mae:1023.54
[1504]	train-rmse:0.433622	train-mae:1023.53
[1505]	train-rmse:0.433613	train-mae:1023.51
[1506]	train-rmse:0.433606	train-mae:1023.49
[1507]	train-rmse:0.4336	train-mae:1023.48
[1508]	train-rmse:0.433589	train-mae:1023.45
[1509]	train-rmse:0.433585	train-mae:1023.44
[1510]	train-rmse:0.433576	train-mae:1023.41
[1511]	train-rmse:0.433563	train-mae:1023.37
[1512]	train-rmse:0.433554	train-mae:1023.35
[1513]	train-rmse:0.433544	train-mae:1023.32
[1514]	train-rmse:0.433537	train-mae:1023.3
[1515]	train-rmse:0.433527	train-mae:1023.28
[1516]	train-rmse:0.433516	train-mae:1023.25
[1517]	train-rmse:0.433492	train-mae:1023.18
[1518]	train-rmse:0.433481	train-mae:1023.15
[1519]	train-

[1680]	train-rmse:0.43171	train-mae:1018.19
[1681]	train-rmse:0.431703	train-mae:1018.16
[1682]	train-rmse:0.431687	train-mae:1018.12
[1683]	train-rmse:0.431673	train-mae:1018.09
[1684]	train-rmse:0.431667	train-mae:1018.07
[1685]	train-rmse:0.431657	train-mae:1018.05
[1686]	train-rmse:0.431657	train-mae:1018.04
[1687]	train-rmse:0.431651	train-mae:1018.03
[1688]	train-rmse:0.43163	train-mae:1017.97
[1689]	train-rmse:0.431623	train-mae:1017.95
[1690]	train-rmse:0.431613	train-mae:1017.92
[1691]	train-rmse:0.431605	train-mae:1017.9
[1692]	train-rmse:0.431596	train-mae:1017.87
[1693]	train-rmse:0.43159	train-mae:1017.85
[1694]	train-rmse:0.431584	train-mae:1017.83
[1695]	train-rmse:0.431581	train-mae:1017.82
[1696]	train-rmse:0.431558	train-mae:1017.77
[1697]	train-rmse:0.431554	train-mae:1017.76
[1698]	train-rmse:0.431542	train-mae:1017.73
[1699]	train-rmse:0.431539	train-mae:1017.72
[1700]	train-rmse:0.431532	train-mae:1017.71
[1701]	train-rmse:0.431527	train-mae:1017.69
[1702]	train-r

[1864]	train-rmse:0.429921	train-mae:1013.28
[1865]	train-rmse:0.429897	train-mae:1013.22
[1866]	train-rmse:0.429895	train-mae:1013.21
[1867]	train-rmse:0.429888	train-mae:1013.19
[1868]	train-rmse:0.429876	train-mae:1013.16
[1869]	train-rmse:0.429872	train-mae:1013.14
[1870]	train-rmse:0.429867	train-mae:1013.12
[1871]	train-rmse:0.429855	train-mae:1013.09
[1872]	train-rmse:0.429848	train-mae:1013.07
[1873]	train-rmse:0.429835	train-mae:1013.03
[1874]	train-rmse:0.429826	train-mae:1013.01
[1875]	train-rmse:0.429813	train-mae:1012.97
[1876]	train-rmse:0.429802	train-mae:1012.94
[1877]	train-rmse:0.429799	train-mae:1012.93
[1878]	train-rmse:0.429786	train-mae:1012.89
[1879]	train-rmse:0.42978	train-mae:1012.88
[1880]	train-rmse:0.429762	train-mae:1012.83
[1881]	train-rmse:0.429753	train-mae:1012.8
[1882]	train-rmse:0.429745	train-mae:1012.77
[1883]	train-rmse:0.429729	train-mae:1012.73
[1884]	train-rmse:0.429714	train-mae:1012.68
[1885]	train-rmse:0.429706	train-mae:1012.66
[1886]	train

[2048]	train-rmse:0.428246	train-mae:1008.73
[2049]	train-rmse:0.428224	train-mae:1008.67
[2050]	train-rmse:0.428219	train-mae:1008.65
[2051]	train-rmse:0.428204	train-mae:1008.6
[2052]	train-rmse:0.428195	train-mae:1008.59
[2053]	train-rmse:0.42819	train-mae:1008.57
[2054]	train-rmse:0.428168	train-mae:1008.51
[2055]	train-rmse:0.428162	train-mae:1008.49
[2056]	train-rmse:0.428156	train-mae:1008.47
[2057]	train-rmse:0.428142	train-mae:1008.43
[2058]	train-rmse:0.428137	train-mae:1008.42
[2059]	train-rmse:0.428129	train-mae:1008.41
[2060]	train-rmse:0.428108	train-mae:1008.34
[2061]	train-rmse:0.428102	train-mae:1008.32
[2062]	train-rmse:0.428093	train-mae:1008.3
[2063]	train-rmse:0.428088	train-mae:1008.29
[2064]	train-rmse:0.428075	train-mae:1008.26
[2065]	train-rmse:0.428064	train-mae:1008.23
[2066]	train-rmse:0.428054	train-mae:1008.2
[2067]	train-rmse:0.428046	train-mae:1008.18
[2068]	train-rmse:0.428043	train-mae:1008.17
[2069]	train-rmse:0.42803	train-mae:1008.13
[2070]	train-rm

[2231]	train-rmse:0.426778	train-mae:1004.76
[2232]	train-rmse:0.42677	train-mae:1004.74
[2233]	train-rmse:0.42676	train-mae:1004.71
[2234]	train-rmse:0.42676	train-mae:1004.71
