In [1]:
import pandas as pd
import numpy as np

from catboost import CatBoostClassifier

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

train['completion_date'] = pd.to_datetime(train['completion_date'])
test['completion_date'] = pd.to_datetime(test['completion_date'])

train['dayofweek'] = train['completion_date'].map(lambda x: x.dayofweek)
train['dayofmonth'] = train['completion_date'].map(lambda x: x.day)
train['dayofyear'] = train['completion_date'].map(lambda x: x.dayofyear)

test['dayofweek'] = test['completion_date'].map(lambda x: x.dayofweek)
test['dayofmonth'] = test['completion_date'].map(lambda x: x.day)
test['dayofyear'] = test['completion_date'].map(lambda x: x.dayofyear)

weekly_N = 3
weekly_P = 7
for n in range(1, weekly_N + 1):
    col_name = 'weekly_' + str(n)
    train[col_name + '_sin'] = np.sin((2*np.pi*n*train['dayofweek']) / weekly_P)
    train[col_name + '_cos'] = np.cos((2*np.pi*n*train['dayofweek']) / weekly_P)
    test[col_name + '_sin'] = np.sin((2*np.pi*n*test['dayofweek']) / weekly_P)
    test[col_name + '_cos'] = np.cos((2*np.pi*n*test['dayofweek']) / weekly_P)

monthly_N = 5
monthly_P = 30.5
for n in range(1, monthly_N + 1):
    col_name = 'monthly_' + str(n)
    train[col_name + '_sin'] = np.sin((2*np.pi*n*train['dayofmonth']) / monthly_P)
    train[col_name + '_cos'] = np.cos((2*np.pi*n*train['dayofmonth']) / monthly_P)
    test[col_name + '_sin'] = np.sin((2*np.pi*n*test['dayofmonth']) / monthly_P)
    test[col_name + '_cos'] = np.cos((2*np.pi*n*test['dayofmonth']) / monthly_P)
    
yearly_N = 10
yearly_P = 365.25
for n in range(1, yearly_N + 1):
    col_name = 'yearly_' + str(n)
    train[col_name + '_sin'] = np.sin((2*np.pi*n*train['dayofyear']) / yearly_P)
    train[col_name + '_cos'] = np.cos((2*np.pi*n*train['dayofyear']) / yearly_P)
    test[col_name + '_sin'] = np.sin((2*np.pi*n*test['dayofyear']) / yearly_P)
    test[col_name + '_cos'] = np.cos((2*np.pi*n*test['dayofyear']) / yearly_P)

In [3]:
train['speed_load'] = train['speed_category'] * train['load_category']
train['speed_floor'] = train['speed_category'] * train['floors_category']
train['load_floor'] = train['load_category'] * train['floors_category']

test['speed_load'] = test['speed_category'] * test['load_category']
test['speed_floor'] = test['speed_category'] * test['floors_category']
test['load_floor'] = test['load_category'] * test['floors_category']

In [8]:
from catboost import CatBoostClassifier

cat_cols = ['case_id', 'equipment_id', 'action_recommendation_id', 'action_recommendation_type', 
            'action_recommendation_category', 'equipment_area', 'usage_type', 'equipment_category']

model = CatBoostClassifier(cat_features=cat_cols, iterations=600)

In [9]:
X = train.drop(['feedback', 'completion_date'], axis=1)
y = train['feedback']

In [10]:
model.fit(X, y)

Learning rate set to 0.125041
0:	learn: 0.5794543	total: 418ms	remaining: 4m 10s
1:	learn: 0.5070102	total: 750ms	remaining: 3m 44s
2:	learn: 0.4511276	total: 1.16s	remaining: 3m 50s
3:	learn: 0.4035702	total: 1.48s	remaining: 3m 40s
4:	learn: 0.3740646	total: 1.72s	remaining: 3m 25s
5:	learn: 0.3463863	total: 2s	remaining: 3m 17s
6:	learn: 0.3260955	total: 2.18s	remaining: 3m 4s
7:	learn: 0.3121557	total: 2.55s	remaining: 3m 8s
8:	learn: 0.3016522	total: 2.96s	remaining: 3m 14s
9:	learn: 0.2928574	total: 3.25s	remaining: 3m 11s
10:	learn: 0.2854522	total: 3.61s	remaining: 3m 13s
11:	learn: 0.2804075	total: 4.13s	remaining: 3m 22s
12:	learn: 0.2743740	total: 4.75s	remaining: 3m 34s
13:	learn: 0.2701159	total: 5.16s	remaining: 3m 36s
14:	learn: 0.2668377	total: 5.44s	remaining: 3m 32s
15:	learn: 0.2634302	total: 5.81s	remaining: 3m 32s
16:	learn: 0.2610921	total: 6.09s	remaining: 3m 28s
17:	learn: 0.2592412	total: 6.45s	remaining: 3m 28s
18:	learn: 0.2578636	total: 6.77s	remaining: 3m 2

158:	learn: 0.2215655	total: 1m 8s	remaining: 3m 8s
159:	learn: 0.2214355	total: 1m 8s	remaining: 3m 8s
160:	learn: 0.2213326	total: 1m 8s	remaining: 3m 7s
161:	learn: 0.2212231	total: 1m 9s	remaining: 3m 7s
162:	learn: 0.2211088	total: 1m 9s	remaining: 3m 6s
163:	learn: 0.2210344	total: 1m 10s	remaining: 3m 6s
164:	learn: 0.2209708	total: 1m 10s	remaining: 3m 5s
165:	learn: 0.2209161	total: 1m 10s	remaining: 3m 4s
166:	learn: 0.2208293	total: 1m 11s	remaining: 3m 4s
167:	learn: 0.2207550	total: 1m 11s	remaining: 3m 4s
168:	learn: 0.2206232	total: 1m 11s	remaining: 3m 3s
169:	learn: 0.2204839	total: 1m 12s	remaining: 3m 2s
170:	learn: 0.2204146	total: 1m 12s	remaining: 3m 2s
171:	learn: 0.2203648	total: 1m 12s	remaining: 3m 1s
172:	learn: 0.2203392	total: 1m 13s	remaining: 3m 1s
173:	learn: 0.2202608	total: 1m 13s	remaining: 3m
174:	learn: 0.2201515	total: 1m 14s	remaining: 3m
175:	learn: 0.2200366	total: 1m 14s	remaining: 2m 59s
176:	learn: 0.2199515	total: 1m 15s	remaining: 2m 59s
17

311:	learn: 0.2106433	total: 2m 17s	remaining: 2m 6s
312:	learn: 0.2105991	total: 2m 17s	remaining: 2m 6s
313:	learn: 0.2105217	total: 2m 17s	remaining: 2m 5s
314:	learn: 0.2105101	total: 2m 18s	remaining: 2m 5s
315:	learn: 0.2104051	total: 2m 18s	remaining: 2m 4s
316:	learn: 0.2103636	total: 2m 19s	remaining: 2m 4s
317:	learn: 0.2102082	total: 2m 20s	remaining: 2m 4s
318:	learn: 0.2101654	total: 2m 20s	remaining: 2m 3s
319:	learn: 0.2101319	total: 2m 21s	remaining: 2m 3s
320:	learn: 0.2100697	total: 2m 21s	remaining: 2m 2s
321:	learn: 0.2100161	total: 2m 21s	remaining: 2m 2s
322:	learn: 0.2099778	total: 2m 22s	remaining: 2m 1s
323:	learn: 0.2098573	total: 2m 22s	remaining: 2m 1s
324:	learn: 0.2097829	total: 2m 23s	remaining: 2m 1s
325:	learn: 0.2097607	total: 2m 24s	remaining: 2m 1s
326:	learn: 0.2097055	total: 2m 24s	remaining: 2m 1s
327:	learn: 0.2096806	total: 2m 25s	remaining: 2m
328:	learn: 0.2096652	total: 2m 26s	remaining: 2m
329:	learn: 0.2096206	total: 2m 26s	remaining: 2m
33

465:	learn: 0.2026237	total: 3m 23s	remaining: 58.4s
466:	learn: 0.2025685	total: 3m 23s	remaining: 58s
467:	learn: 0.2025284	total: 3m 23s	remaining: 57.5s
468:	learn: 0.2024800	total: 3m 24s	remaining: 57.1s
469:	learn: 0.2023909	total: 3m 24s	remaining: 56.7s
470:	learn: 0.2023354	total: 3m 25s	remaining: 56.3s
471:	learn: 0.2022601	total: 3m 25s	remaining: 55.8s
472:	learn: 0.2022515	total: 3m 26s	remaining: 55.3s
473:	learn: 0.2022287	total: 3m 26s	remaining: 54.9s
474:	learn: 0.2021557	total: 3m 26s	remaining: 54.5s
475:	learn: 0.2021167	total: 3m 27s	remaining: 54s
476:	learn: 0.2020775	total: 3m 27s	remaining: 53.6s
477:	learn: 0.2020513	total: 3m 28s	remaining: 53.2s
478:	learn: 0.2019995	total: 3m 28s	remaining: 52.7s
479:	learn: 0.2019758	total: 3m 29s	remaining: 52.3s
480:	learn: 0.2019355	total: 3m 29s	remaining: 51.8s
481:	learn: 0.2018706	total: 3m 29s	remaining: 51.4s
482:	learn: 0.2018105	total: 3m 30s	remaining: 50.9s
483:	learn: 0.2017405	total: 3m 30s	remaining: 50.

<catboost.core.CatBoostClassifier at 0x14a6f8ff688>

In [12]:
test = test.drop(['completion_date'], axis=1)

In [54]:
mode = test['action_recommendation_id'].mode().values[0]

In [55]:
test['action_recommendation_id'] = test['action_recommendation_id'].fillna(mode)

In [56]:
mode = test['equipment_area'].mode().values[0]

In [57]:
test['equipment_area'] = test['equipment_area'].fillna(mode)

In [58]:
model.predict(test)

array([1, 1, 1, ..., 1, 1, 1], dtype=int64)

In [59]:
model.predict_proba(test)

array([[0.06282752, 0.93717248],
       [0.0410078 , 0.9589922 ],
       [0.19364587, 0.80635413],
       ...,
       [0.37855746, 0.62144254],
       [0.01226621, 0.98773379],
       [0.03923917, 0.96076083]])