In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb
import xgboost as xgb
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
train = pd.read_csv('daata/xaa')

In [3]:
def get_begin(ind):
    res = []
    for col in ['X', 'Y', 'Z']:
        y, x = np.histogram(ind[col], bins=200)
        slc = slice(3, -3)
        x, y = x[:-1][slc], y[slc]
        params = np.polyfit(x, y, 3)
        lin = 0
        for par in params:
            lin = lin * x + par
        
        res.append(x[np.argmax(y - lin)])
    return res

def prepare(df):
    result = []
    for index in df.data_ind.unique():
        ind = df[df.data_ind == index].copy()
        begin = get_begin(ind)
        ind['dX'] = ind.X - begin[0]
        ind['dY'] = ind.Y - begin[1]
        ind['dZ'] = ind.Z - begin[2]
        result.append(ind)
    return pd.concat(result)

In [4]:
df = prepare(train)

In [5]:
y_train = df['signal']
x_train = df.drop(['signal', 'id', 'data_ind'],axis=1)

In [6]:
lgb_train = lgb.Dataset(x_train, y_train)

In [7]:
params = {
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': 'auc',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'max_depth': 15,
    'feature_fraction': 0.7,
    'bagging_fraction': 0.7,
    'bagging_freq': 4
}

In [8]:
num_round = 100
model = lgb.train(params, lgb_train, num_round)

In [9]:
del lgb_train, x_train, y_train, train

In [10]:
test = pd.read_csv('daata/test_50.csv')

In [11]:
prepared_test = prepare(test)

In [12]:
x_test = prepared_test.drop(['id', 'data_ind'], 1)

In [13]:
answer = model.predict(x_test)

In [16]:
result = pd.DataFrame(data={'id': prepared_test.id.astype(int), 'signal': answer.astype('float16')})

In [17]:
result.to_csv('submit.csv', index=False)