In [1]:
import numpy as np
import pandas as pd
import lightgbm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
import math
from sklearn import metrics
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.model_selection import KFold, StratifiedKFold
import matplotlib.pyplot as plt
import os
import glob

In [2]:

data_path='/home/madnisal/Documents/ML_Project/datasets/'
training_df = pd.read_csv(data_path+'train.csv', index_col="tripid")

In [3]:
def dist_from_coordinates(lat1, lon1, lat2, lon2):
  R = 6371  # Earth radius in km

  #conversion to radians
  d_lat = np.radians(lat2-lat1)
  d_lon = np.radians(lon2-lon1)

  r_lat1 = np.radians(lat1)
  r_lat2 = np.radians(lat2)

  #haversine formula
  a = np.sin(d_lat/2.) **2 + np.cos(r_lat1) * np.cos(r_lat2) * np.sin(d_lon/2.)**2

  haversine = 2 * R * np.arcsin(np.sqrt(a))

  return haversine

In [4]:
training_df['pickup_time'] = pd.to_datetime(training_df['pickup_time'], format="%m/%d/%Y %H:%M")
training_df['drop_time'] = pd.to_datetime(training_df['drop_time'], format="%m/%d/%Y %H:%M")

In [5]:
training_df = training_df.assign(timeOfDay=pd.cut(training_df.pickup_time.dt.hour,[-1, 8, 20, 24],labels=['dawn','day', 'night']))

In [6]:
training_df.loc[training_df['timeOfDay'] == 'day', 'isNormalCharge'] = 1
training_df.loc[training_df['timeOfDay'] != 'day', 'isNormalCharge'] = 0

In [7]:
durations = []
for index,row in training_df.iterrows():
  provided_duration = row['duration'] #first row of location.lat column here
  if math.isnan(provided_duration) or provided_duration <= 0 :
    time_dif = (row['drop_time'] - row['pickup_time']).seconds
    if(time_dif == 0):
        time_dif = np.nan
    durations.append(time_dif)
  else :  
    durations.append(provided_duration)

training_df.insert(4,"time_dif",durations)

In [8]:
new_column = []                    #empty column for distance
for index,row in training_df.iterrows():
  lat1 = row['pick_lat'] #first row of location.lat column here
  lon1 = row['pick_lon'] #first row of location.long column here
  lat2 = row['drop_lat'] #second row of location.lat column here
  lon2 = row['drop_lon'] #second row of location.long column here
  value = dist_from_coordinates(lat1, lon1, lat2, lon2)  #get the distance
  new_column.append(value)   #append the empty list with distance values

training_df.insert(4,"distance",new_column)

In [9]:
training_df['time_driven'] = training_df['duration']  - training_df['meter_waiting']

In [10]:
chargeperhours = []
for index,row in training_df.iterrows():
    if(row['meter_waiting'] == 0):
        chargeperhour = np.nan
    else:
        chargeperhour = (row['meter_waiting_fare'] / row['meter_waiting'] * 3600)
    chargeperhours.append(chargeperhour)

training_df.insert(4,'charge_per_hour',chargeperhours)


In [11]:
training_df['driving_fare'] = training_df['fare']  - training_df['meter_waiting_fare'] - training_df['additional_fare']

In [12]:
avgspeeds = []
for index,row in training_df.iterrows():
    if(row['time_driven'] == 0):
        avgspeed = np.nan
    else:
        avgspeed = (row['distance'] / row['time_driven'] * 3600)
    avgspeeds.append(avgspeed)

training_df.insert(4,"avg_speed",avgspeeds)



In [13]:
costsperkm = []
for index,row in training_df.iterrows():
    if row['distance'] == 0:
        costperkm = np.nan
            
    else:
        costperkm = (row['driving_fare'] / row['distance'])
    costsperkm.append(costperkm)

training_df.insert(4,"cost_per_km",costsperkm)

In [14]:
training_df = training_df.replace({'label': {'incorrect': 0, 'correct' : 1}})

In [15]:
training_df.columns

Index(['additional_fare', 'duration', 'meter_waiting', 'meter_waiting_fare',
       'cost_per_km', 'avg_speed', 'charge_per_hour', 'distance', 'time_dif',
       'meter_waiting_till_pickup', 'pickup_time', 'drop_time', 'pick_lat',
       'pick_lon', 'drop_lat', 'drop_lon', 'fare', 'label', 'timeOfDay',
       'isNormalCharge', 'time_driven', 'driving_fare'],
      dtype='object')

In [16]:
#training_columns = ['duration','meter_waiting','meter_waiting_fare','fare','additional_fare']

In [17]:
training_columns = ['meter_waiting','meter_waiting_fare','fare','additional_fare', 'distance','cost_per_km', 'avg_speed',  'time_dif','time_driven', 'charge_per_hour', 'driving_fare', 'isNormalCharge','pick_lat','pick_lon','drop_lat','drop_lon']

In [18]:
#training_columns = ['additional_fare', 'meter_waiting','cost_per_km', 'avg_speed', 'charge_per_hour', 'time_dif','isNormalCharge', 'time_driven']

In [19]:
#training_columns = ['duration','meter_waiting','meter_waiting_fare','fare','additional_fare', 'distance','cost_per_km', 'avg_speed',  'time_dif','time_driven', 'charge_per_hour', 'driving_fare', 'isNormalCharge']

In [20]:
target_column = ['label']

In [21]:
training_df = training_df.drop(190167541)

In [22]:
x = training_df[training_columns].values
y = training_df[target_column].values

In [23]:
x, x_test, y, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)

In [24]:
training_df['label'].value_counts()

1    15494
0     1681
Name: label, dtype: int64

<h2>Hyper tuning</h2>

In [None]:
param_grid = {
    'num_leaves': [15,31, 63],
    'learning_rate': [0.1, 0.01],
    'class_weight' : [{0:4,1:1}, {0:3,1:1}],
    'num_boosting_rounds': [100,500],
    'max_bins': [10,100,1000],
    'n_estimators': [50,100,200],
    'reg_alpha': [0.1, 0.5],
    'random_state': [1,8,16,64],
    'min_data_in_leaf': [30,100,400],
    'lambda_l2': [0, 1]
    }

In [None]:
model = lightgbm.LGBMClassifier()

In [None]:
gkf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42).split(X=x, y=y)

In [None]:
gs = GridSearchCV(
    estimator=model, param_grid=param_grid, cv=gkf, scoring='f1_macro',
    verbose=True, n_jobs=3)

In [None]:
gs.fit(x, y.ravel())

In [None]:
print('Best score reached: {} with params: {} '.format(gs.best_score_, gs.best_params_))

In [None]:
predicted_y = gs.predict(x_test)
train_pred_y = gs.predict(x)

In [None]:
print(); print(metrics.classification_report(y,train_pred_y))
print(); print(metrics.confusion_matrix(y, train_pred_y))

print(); print(metrics.classification_report(y_test, predicted_y))
print(); print(metrics.confusion_matrix(y_test, predicted_y))

<h2>Choosing the best model</h2>

In [25]:
import math

In [26]:
scale_pos_weight = math.sqrt(1681/15494)

In [27]:
scale_pos_weight

0.32938368270125923

In [28]:
from sklearn.metrics import f1_score
def evaluate_macroF1_lgb(y_true, y_pred):  
    y_hat = np.where(y_pred < 0.5, 0, 1) 
    f1 = f1_score(y_true, y_hat, average='macro')
    return ('macroF1', f1, True) 

In [29]:
def focal_loss_lgb_eval_error(y_true, y_pred, alpha=.25, gamma=2.):
    a,g = alpha, gamma
    p = 1/(1+np.exp(-y_pred))
    loss = -( a*y_true + (1-a)*(1-y_true) ) * (( 1 - ( y_true*p + (1-y_true)*(1-p)) )**g) * ( y_true*np.log(p)+(1-y_true)*np.log(1-p) )
    return 'focal_loss', np.mean(loss), False

In [30]:
gkf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [31]:
model = lightgbm.LGBMClassifier(boosting_type='gbdt',scale_pos_weight = scale_pos_weight, learning_rate=0.01, max_bins=10, min_data_in_leaf=60, n_estimators=100, num_iterations=1000, num_leaves=63, random_state=1, reg_alpha=0.1, metric=["custom",'binary_logloss'],early_stopping_rounds=250)

In [None]:
model = lightgbm.LGBMClassifier(class_weight={0:3,1:1}, learning_rate=0.1)

In [32]:
for training_index, testing_index in gkf.split(X=x, y=y):
    x_train_fold, y_train_fold = x[training_index], y[training_index]
    x_test_fold, y_test_fold = x[testing_index], y[testing_index]
    model.fit(x_train_fold, y_train_fold, eval_set=(x_test_fold,y_test_fold),eval_metric = lambda y_true, y_pred: [evaluate_macroF1_lgb(y_true,y_pred)])

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[1]	valid_0's binary_logloss: 0.315027	valid_0's macroF1: 0.474268
Training until validation scores don't improve for 250 rounds
[2]	valid_0's binary_logloss: 0.309945	valid_0's macroF1: 0.474268
[3]	valid_0's binary_logloss: 0.305411	valid_0's macroF1: 0.474268
[4]	valid_0's binary_logloss: 0.301144	valid_0's macroF1: 0.474268
[5]	valid_0's binary_logloss: 0.297285	valid_0's macroF1: 0.474268
[6]	valid_0's binary_logloss: 0.29356	valid_0's macroF1: 0.474268
[7]	valid_0's binary_logloss: 0.290109	valid_0's macroF1: 0.474268
[8]	valid_0's binary_logloss: 0.286888	valid_0's macroF1: 0.474268
[9]	valid_0's binary_logloss: 0.283737	valid_0's macroF1: 0.474268
[10]	valid_0's binary_logloss: 0.280807	valid_0's macroF1: 0.474268
[11]	valid_0's binary_logloss: 0.278013	valid_0's macroF1: 0.474268
[12]	valid_0's binary_logloss: 0.275339	valid_0's macroF1: 0.474268
[13]	valid_0's binary_logloss: 0.272753	valid_0's macroF1: 0.474268
[14]	valid_0's binary_logloss: 0.270256	valid_0's macroF1: 0.474

[123]	valid_0's binary_logloss: 0.182835	valid_0's macroF1: 0.827
[124]	valid_0's binary_logloss: 0.182571	valid_0's macroF1: 0.827663
[125]	valid_0's binary_logloss: 0.182225	valid_0's macroF1: 0.826173
[126]	valid_0's binary_logloss: 0.181964	valid_0's macroF1: 0.827663
[127]	valid_0's binary_logloss: 0.181728	valid_0's macroF1: 0.82832
[128]	valid_0's binary_logloss: 0.181469	valid_0's macroF1: 0.827497
[129]	valid_0's binary_logloss: 0.181226	valid_0's macroF1: 0.827497
[130]	valid_0's binary_logloss: 0.180893	valid_0's macroF1: 0.827497
[131]	valid_0's binary_logloss: 0.180679	valid_0's macroF1: 0.827497
[132]	valid_0's binary_logloss: 0.180443	valid_0's macroF1: 0.827497
[133]	valid_0's binary_logloss: 0.18021	valid_0's macroF1: 0.826676
[134]	valid_0's binary_logloss: 0.180021	valid_0's macroF1: 0.82815
[135]	valid_0's binary_logloss: 0.179771	valid_0's macroF1: 0.82815
[136]	valid_0's binary_logloss: 0.179528	valid_0's macroF1: 0.82815
[137]	valid_0's binary_logloss: 0.179304	v

[259]	valid_0's binary_logloss: 0.159453	valid_0's macroF1: 0.84173
[260]	valid_0's binary_logloss: 0.159374	valid_0's macroF1: 0.84173
[261]	valid_0's binary_logloss: 0.159322	valid_0's macroF1: 0.84173
[262]	valid_0's binary_logloss: 0.159186	valid_0's macroF1: 0.84173
[263]	valid_0's binary_logloss: 0.159152	valid_0's macroF1: 0.843324
[264]	valid_0's binary_logloss: 0.159043	valid_0's macroF1: 0.843324
[265]	valid_0's binary_logloss: 0.15896	valid_0's macroF1: 0.842525
[266]	valid_0's binary_logloss: 0.158866	valid_0's macroF1: 0.843324
[267]	valid_0's binary_logloss: 0.158802	valid_0's macroF1: 0.843324
[268]	valid_0's binary_logloss: 0.158675	valid_0's macroF1: 0.844125
[269]	valid_0's binary_logloss: 0.158564	valid_0's macroF1: 0.846811
[270]	valid_0's binary_logloss: 0.158448	valid_0's macroF1: 0.846811
[271]	valid_0's binary_logloss: 0.158337	valid_0's macroF1: 0.846811
[272]	valid_0's binary_logloss: 0.158241	valid_0's macroF1: 0.846811
[273]	valid_0's binary_logloss: 0.15814

[388]	valid_0's binary_logloss: 0.150851	valid_0's macroF1: 0.850512
[389]	valid_0's binary_logloss: 0.150829	valid_0's macroF1: 0.850512
[390]	valid_0's binary_logloss: 0.150794	valid_0's macroF1: 0.850512
[391]	valid_0's binary_logloss: 0.150717	valid_0's macroF1: 0.850512
[392]	valid_0's binary_logloss: 0.150697	valid_0's macroF1: 0.850512
[393]	valid_0's binary_logloss: 0.150683	valid_0's macroF1: 0.850512
[394]	valid_0's binary_logloss: 0.150644	valid_0's macroF1: 0.850512
[395]	valid_0's binary_logloss: 0.15063	valid_0's macroF1: 0.850512
[396]	valid_0's binary_logloss: 0.150623	valid_0's macroF1: 0.850512
[397]	valid_0's binary_logloss: 0.150611	valid_0's macroF1: 0.850512
[398]	valid_0's binary_logloss: 0.150559	valid_0's macroF1: 0.850512
[399]	valid_0's binary_logloss: 0.150532	valid_0's macroF1: 0.850512
[400]	valid_0's binary_logloss: 0.150464	valid_0's macroF1: 0.850512
[401]	valid_0's binary_logloss: 0.150452	valid_0's macroF1: 0.850512
[402]	valid_0's binary_logloss: 0.1

[520]	valid_0's binary_logloss: 0.148102	valid_0's macroF1: 0.854443
[521]	valid_0's binary_logloss: 0.148084	valid_0's macroF1: 0.854443
[522]	valid_0's binary_logloss: 0.148068	valid_0's macroF1: 0.854443
[523]	valid_0's binary_logloss: 0.148025	valid_0's macroF1: 0.854443
[524]	valid_0's binary_logloss: 0.148029	valid_0's macroF1: 0.854443
[525]	valid_0's binary_logloss: 0.148025	valid_0's macroF1: 0.854443
[526]	valid_0's binary_logloss: 0.148027	valid_0's macroF1: 0.854443
[527]	valid_0's binary_logloss: 0.148002	valid_0's macroF1: 0.854443
[528]	valid_0's binary_logloss: 0.148014	valid_0's macroF1: 0.854443
[529]	valid_0's binary_logloss: 0.14801	valid_0's macroF1: 0.853137
[530]	valid_0's binary_logloss: 0.148006	valid_0's macroF1: 0.853137
[531]	valid_0's binary_logloss: 0.147999	valid_0's macroF1: 0.851827
[532]	valid_0's binary_logloss: 0.147991	valid_0's macroF1: 0.851827
[533]	valid_0's binary_logloss: 0.148017	valid_0's macroF1: 0.851827
[534]	valid_0's binary_logloss: 0.1

[640]	valid_0's binary_logloss: 0.14796	valid_0's macroF1: 0.848953
[641]	valid_0's binary_logloss: 0.147962	valid_0's macroF1: 0.848953
[642]	valid_0's binary_logloss: 0.14797	valid_0's macroF1: 0.848953
[643]	valid_0's binary_logloss: 0.147965	valid_0's macroF1: 0.848146
[644]	valid_0's binary_logloss: 0.147968	valid_0's macroF1: 0.848953
[645]	valid_0's binary_logloss: 0.147924	valid_0's macroF1: 0.848146
[646]	valid_0's binary_logloss: 0.147924	valid_0's macroF1: 0.848953
[647]	valid_0's binary_logloss: 0.147905	valid_0's macroF1: 0.848953
[648]	valid_0's binary_logloss: 0.147914	valid_0's macroF1: 0.849763
[649]	valid_0's binary_logloss: 0.147914	valid_0's macroF1: 0.849763
[650]	valid_0's binary_logloss: 0.147924	valid_0's macroF1: 0.849763
[651]	valid_0's binary_logloss: 0.147939	valid_0's macroF1: 0.850575
[652]	valid_0's binary_logloss: 0.147939	valid_0's macroF1: 0.850575
[653]	valid_0's binary_logloss: 0.147934	valid_0's macroF1: 0.850575
[654]	valid_0's binary_logloss: 0.14

[782]	valid_0's binary_logloss: 0.148293	valid_0's macroF1: 0.852509
[783]	valid_0's binary_logloss: 0.148278	valid_0's macroF1: 0.852509
[784]	valid_0's binary_logloss: 0.148278	valid_0's macroF1: 0.852509
[785]	valid_0's binary_logloss: 0.148296	valid_0's macroF1: 0.852509
[786]	valid_0's binary_logloss: 0.148292	valid_0's macroF1: 0.853334
[787]	valid_0's binary_logloss: 0.148304	valid_0's macroF1: 0.852509
[788]	valid_0's binary_logloss: 0.148299	valid_0's macroF1: 0.853334
[789]	valid_0's binary_logloss: 0.148302	valid_0's macroF1: 0.852509
[790]	valid_0's binary_logloss: 0.148305	valid_0's macroF1: 0.852509
[791]	valid_0's binary_logloss: 0.148331	valid_0's macroF1: 0.852509
[792]	valid_0's binary_logloss: 0.148336	valid_0's macroF1: 0.853334
[793]	valid_0's binary_logloss: 0.148349	valid_0's macroF1: 0.853334
[794]	valid_0's binary_logloss: 0.148327	valid_0's macroF1: 0.853334
[795]	valid_0's binary_logloss: 0.148337	valid_0's macroF1: 0.854162
[796]	valid_0's binary_logloss: 0.

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[1]	valid_0's binary_logloss: 0.31505	valid_0's macroF1: 0.474268
Training until validation scores don't improve for 250 rounds
[2]	valid_0's binary_logloss: 0.310112	valid_0's macroF1: 0.474268
[3]	valid_0's binary_logloss: 0.305562	valid_0's macroF1: 0.474268
[4]	valid_0's binary_logloss: 0.301469	valid_0's macroF1: 0.474268
[5]	valid_0's binary_logloss: 0.297558	valid_0's macroF1: 0.474268
[6]	valid_0's binary_logloss: 0.293912	valid_0's macroF1: 0.474268
[7]	valid_0's binary_logloss: 0.290547	valid_0's macroF1: 0.474268
[8]	valid_0's binary_logloss: 0.287333	valid_0's macroF1: 0.474268
[9]	valid_0's binary_logloss: 0.284374	valid_0's macroF1: 0.474268
[10]	valid_0's binary_logloss: 0.281531	valid_0's macroF1: 0.474268
[11]	valid_0's binary_logloss: 0.27885	valid_0's macroF1: 0.474268
[12]	valid_0's binary_logloss: 0.276232	valid_0's macroF1: 0.474268
[13]	valid_0's binary_logloss: 0.273846	valid_0's macroF1: 0.474268
[14]	valid_0's binary_logloss: 0.271428	valid_0's macroF1: 0.4742

[126]	valid_0's binary_logloss: 0.187374	valid_0's macroF1: 0.82649
[127]	valid_0's binary_logloss: 0.187102	valid_0's macroF1: 0.82649
[128]	valid_0's binary_logloss: 0.186833	valid_0's macroF1: 0.82649
[129]	valid_0's binary_logloss: 0.186573	valid_0's macroF1: 0.825657
[130]	valid_0's binary_logloss: 0.186305	valid_0's macroF1: 0.825657
[131]	valid_0's binary_logloss: 0.186084	valid_0's macroF1: 0.824827
[132]	valid_0's binary_logloss: 0.185828	valid_0's macroF1: 0.825503
[133]	valid_0's binary_logloss: 0.185603	valid_0's macroF1: 0.824678
[134]	valid_0's binary_logloss: 0.185354	valid_0's macroF1: 0.823856
[135]	valid_0's binary_logloss: 0.185119	valid_0's macroF1: 0.823036
[136]	valid_0's binary_logloss: 0.184883	valid_0's macroF1: 0.823036
[137]	valid_0's binary_logloss: 0.18465	valid_0's macroF1: 0.823036
[138]	valid_0's binary_logloss: 0.184411	valid_0's macroF1: 0.822897
[139]	valid_0's binary_logloss: 0.184187	valid_0's macroF1: 0.822897
[140]	valid_0's binary_logloss: 0.1840

[260]	valid_0's binary_logloss: 0.168482	valid_0's macroF1: 0.830077
[261]	valid_0's binary_logloss: 0.168444	valid_0's macroF1: 0.830077
[262]	valid_0's binary_logloss: 0.168391	valid_0's macroF1: 0.830077
[263]	valid_0's binary_logloss: 0.168336	valid_0's macroF1: 0.830847
[264]	valid_0's binary_logloss: 0.168264	valid_0's macroF1: 0.829309
[265]	valid_0's binary_logloss: 0.168202	valid_0's macroF1: 0.830077
[266]	valid_0's binary_logloss: 0.168083	valid_0's macroF1: 0.830077
[267]	valid_0's binary_logloss: 0.168013	valid_0's macroF1: 0.830077
[268]	valid_0's binary_logloss: 0.167892	valid_0's macroF1: 0.830077
[269]	valid_0's binary_logloss: 0.167839	valid_0's macroF1: 0.829309
[270]	valid_0's binary_logloss: 0.167722	valid_0's macroF1: 0.830077
[271]	valid_0's binary_logloss: 0.167613	valid_0's macroF1: 0.830077
[272]	valid_0's binary_logloss: 0.167488	valid_0's macroF1: 0.828543
[273]	valid_0's binary_logloss: 0.16738	valid_0's macroF1: 0.828543
[274]	valid_0's binary_logloss: 0.1

[394]	valid_0's binary_logloss: 0.160624	valid_0's macroF1: 0.836232
[395]	valid_0's binary_logloss: 0.160519	valid_0's macroF1: 0.837567
[396]	valid_0's binary_logloss: 0.160461	valid_0's macroF1: 0.836232
[397]	valid_0's binary_logloss: 0.160433	valid_0's macroF1: 0.836232
[398]	valid_0's binary_logloss: 0.160397	valid_0's macroF1: 0.836232
[399]	valid_0's binary_logloss: 0.160326	valid_0's macroF1: 0.836232
[400]	valid_0's binary_logloss: 0.160248	valid_0's macroF1: 0.83701
[401]	valid_0's binary_logloss: 0.160251	valid_0's macroF1: 0.836232
[402]	valid_0's binary_logloss: 0.160205	valid_0's macroF1: 0.836232
[403]	valid_0's binary_logloss: 0.160136	valid_0's macroF1: 0.836232
[404]	valid_0's binary_logloss: 0.16008	valid_0's macroF1: 0.836232
[405]	valid_0's binary_logloss: 0.160066	valid_0's macroF1: 0.836232
[406]	valid_0's binary_logloss: 0.160076	valid_0's macroF1: 0.836232
[407]	valid_0's binary_logloss: 0.160047	valid_0's macroF1: 0.836232
[408]	valid_0's binary_logloss: 0.15

[540]	valid_0's binary_logloss: 0.157052	valid_0's macroF1: 0.829852
[541]	valid_0's binary_logloss: 0.157077	valid_0's macroF1: 0.829852
[542]	valid_0's binary_logloss: 0.157056	valid_0's macroF1: 0.829852
[543]	valid_0's binary_logloss: 0.157044	valid_0's macroF1: 0.829852
[544]	valid_0's binary_logloss: 0.157043	valid_0's macroF1: 0.829852
[545]	valid_0's binary_logloss: 0.157028	valid_0's macroF1: 0.830632
[546]	valid_0's binary_logloss: 0.157013	valid_0's macroF1: 0.830632
[547]	valid_0's binary_logloss: 0.157009	valid_0's macroF1: 0.829852
[548]	valid_0's binary_logloss: 0.157068	valid_0's macroF1: 0.830632
[549]	valid_0's binary_logloss: 0.157033	valid_0's macroF1: 0.830632
[550]	valid_0's binary_logloss: 0.157017	valid_0's macroF1: 0.830632
[551]	valid_0's binary_logloss: 0.157008	valid_0's macroF1: 0.830632
[552]	valid_0's binary_logloss: 0.157009	valid_0's macroF1: 0.832006
[553]	valid_0's binary_logloss: 0.156989	valid_0's macroF1: 0.830632
[554]	valid_0's binary_logloss: 0.

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[1]	valid_0's binary_logloss: 0.315179	valid_0's macroF1: 0.474268
Training until validation scores don't improve for 250 rounds
[2]	valid_0's binary_logloss: 0.310368	valid_0's macroF1: 0.474268
[3]	valid_0's binary_logloss: 0.30599	valid_0's macroF1: 0.474268
[4]	valid_0's binary_logloss: 0.301941	valid_0's macroF1: 0.474268
[5]	valid_0's binary_logloss: 0.298083	valid_0's macroF1: 0.474268
[6]	valid_0's binary_logloss: 0.294573	valid_0's macroF1: 0.474268
[7]	valid_0's binary_logloss: 0.291271	valid_0's macroF1: 0.474268
[8]	valid_0's binary_logloss: 0.288127	valid_0's macroF1: 0.474268
[9]	valid_0's binary_logloss: 0.285059	valid_0's macroF1: 0.474268
[10]	valid_0's binary_logloss: 0.282212	valid_0's macroF1: 0.474268
[11]	valid_0's binary_logloss: 0.279438	valid_0's macroF1: 0.474268
[12]	valid_0's binary_logloss: 0.276821	valid_0's macroF1: 0.474268
[13]	valid_0's binary_logloss: 0.274256	valid_0's macroF1: 0.474268
[14]	valid_0's binary_logloss: 0.271792	valid_0's macroF1: 0.474

[130]	valid_0's binary_logloss: 0.186369	valid_0's macroF1: 0.831268
[131]	valid_0's binary_logloss: 0.186192	valid_0's macroF1: 0.830441
[132]	valid_0's binary_logloss: 0.186001	valid_0's macroF1: 0.829618
[133]	valid_0's binary_logloss: 0.185799	valid_0's macroF1: 0.827981
[134]	valid_0's binary_logloss: 0.185571	valid_0's macroF1: 0.827981
[135]	valid_0's binary_logloss: 0.185394	valid_0's macroF1: 0.827981
[136]	valid_0's binary_logloss: 0.185145	valid_0's macroF1: 0.827981
[137]	valid_0's binary_logloss: 0.184944	valid_0's macroF1: 0.827981
[138]	valid_0's binary_logloss: 0.184746	valid_0's macroF1: 0.827166
[139]	valid_0's binary_logloss: 0.184536	valid_0's macroF1: 0.827981
[140]	valid_0's binary_logloss: 0.184344	valid_0's macroF1: 0.827166
[141]	valid_0's binary_logloss: 0.18412	valid_0's macroF1: 0.827166
[142]	valid_0's binary_logloss: 0.183948	valid_0's macroF1: 0.827812
[143]	valid_0's binary_logloss: 0.183777	valid_0's macroF1: 0.827812
[144]	valid_0's binary_logloss: 0.1

[253]	valid_0's binary_logloss: 0.168943	valid_0's macroF1: 0.842324
[254]	valid_0's binary_logloss: 0.168853	valid_0's macroF1: 0.843108
[255]	valid_0's binary_logloss: 0.16879	valid_0's macroF1: 0.843108
[256]	valid_0's binary_logloss: 0.16872	valid_0's macroF1: 0.843108
[257]	valid_0's binary_logloss: 0.1686	valid_0's macroF1: 0.843108
[258]	valid_0's binary_logloss: 0.168538	valid_0's macroF1: 0.841542
[259]	valid_0's binary_logloss: 0.168425	valid_0's macroF1: 0.842324
[260]	valid_0's binary_logloss: 0.168322	valid_0's macroF1: 0.842324
[261]	valid_0's binary_logloss: 0.168264	valid_0's macroF1: 0.842324
[262]	valid_0's binary_logloss: 0.168114	valid_0's macroF1: 0.842324
[263]	valid_0's binary_logloss: 0.168021	valid_0's macroF1: 0.842324
[264]	valid_0's binary_logloss: 0.16792	valid_0's macroF1: 0.842324
[265]	valid_0's binary_logloss: 0.167807	valid_0's macroF1: 0.842324
[266]	valid_0's binary_logloss: 0.167711	valid_0's macroF1: 0.843641
[267]	valid_0's binary_logloss: 0.16758

[398]	valid_0's binary_logloss: 0.160579	valid_0's macroF1: 0.84336
[399]	valid_0's binary_logloss: 0.160549	valid_0's macroF1: 0.84336
[400]	valid_0's binary_logloss: 0.160513	valid_0's macroF1: 0.84336
[401]	valid_0's binary_logloss: 0.160483	valid_0's macroF1: 0.84336
[402]	valid_0's binary_logloss: 0.160471	valid_0's macroF1: 0.84336
[403]	valid_0's binary_logloss: 0.160472	valid_0's macroF1: 0.84336
[404]	valid_0's binary_logloss: 0.160458	valid_0's macroF1: 0.842572
[405]	valid_0's binary_logloss: 0.160428	valid_0's macroF1: 0.84336
[406]	valid_0's binary_logloss: 0.160426	valid_0's macroF1: 0.844685
[407]	valid_0's binary_logloss: 0.160406	valid_0's macroF1: 0.846005
[408]	valid_0's binary_logloss: 0.160391	valid_0's macroF1: 0.845214
[409]	valid_0's binary_logloss: 0.160401	valid_0's macroF1: 0.84732
[410]	valid_0's binary_logloss: 0.160396	valid_0's macroF1: 0.844685
[411]	valid_0's binary_logloss: 0.160378	valid_0's macroF1: 0.845214
[412]	valid_0's binary_logloss: 0.160352	v

[545]	valid_0's binary_logloss: 0.158122	valid_0's macroF1: 0.84547
[546]	valid_0's binary_logloss: 0.158106	valid_0's macroF1: 0.84547
[547]	valid_0's binary_logloss: 0.15809	valid_0's macroF1: 0.84547
[548]	valid_0's binary_logloss: 0.158081	valid_0's macroF1: 0.844668
[549]	valid_0's binary_logloss: 0.158082	valid_0's macroF1: 0.844668
[550]	valid_0's binary_logloss: 0.158069	valid_0's macroF1: 0.843324
[551]	valid_0's binary_logloss: 0.158073	valid_0's macroF1: 0.844125
[552]	valid_0's binary_logloss: 0.158079	valid_0's macroF1: 0.844125
[553]	valid_0's binary_logloss: 0.158099	valid_0's macroF1: 0.844125
[554]	valid_0's binary_logloss: 0.158052	valid_0's macroF1: 0.844125
[555]	valid_0's binary_logloss: 0.158047	valid_0's macroF1: 0.844125
[556]	valid_0's binary_logloss: 0.158049	valid_0's macroF1: 0.844125
[557]	valid_0's binary_logloss: 0.158065	valid_0's macroF1: 0.844125
[558]	valid_0's binary_logloss: 0.158052	valid_0's macroF1: 0.844125
[559]	valid_0's binary_logloss: 0.1580

[667]	valid_0's binary_logloss: 0.157792	valid_0's macroF1: 0.851687
[668]	valid_0's binary_logloss: 0.157803	valid_0's macroF1: 0.851687
[669]	valid_0's binary_logloss: 0.15779	valid_0's macroF1: 0.851687
[670]	valid_0's binary_logloss: 0.157796	valid_0's macroF1: 0.851687
[671]	valid_0's binary_logloss: 0.157787	valid_0's macroF1: 0.851687
[672]	valid_0's binary_logloss: 0.157797	valid_0's macroF1: 0.853028
[673]	valid_0's binary_logloss: 0.157809	valid_0's macroF1: 0.853028
[674]	valid_0's binary_logloss: 0.157804	valid_0's macroF1: 0.853028
[675]	valid_0's binary_logloss: 0.157806	valid_0's macroF1: 0.853028
[676]	valid_0's binary_logloss: 0.157792	valid_0's macroF1: 0.853028
[677]	valid_0's binary_logloss: 0.157796	valid_0's macroF1: 0.853028
[678]	valid_0's binary_logloss: 0.15777	valid_0's macroF1: 0.853028
[679]	valid_0's binary_logloss: 0.157767	valid_0's macroF1: 0.853028
[680]	valid_0's binary_logloss: 0.157753	valid_0's macroF1: 0.851687
[681]	valid_0's binary_logloss: 0.15

[791]	valid_0's binary_logloss: 0.158418	valid_0's macroF1: 0.85531
[792]	valid_0's binary_logloss: 0.158402	valid_0's macroF1: 0.85531
[793]	valid_0's binary_logloss: 0.1584	valid_0's macroF1: 0.85531
[794]	valid_0's binary_logloss: 0.158367	valid_0's macroF1: 0.85531
[795]	valid_0's binary_logloss: 0.158401	valid_0's macroF1: 0.856148
[796]	valid_0's binary_logloss: 0.158413	valid_0's macroF1: 0.856148
[797]	valid_0's binary_logloss: 0.158467	valid_0's macroF1: 0.856148
[798]	valid_0's binary_logloss: 0.15848	valid_0's macroF1: 0.856148
[799]	valid_0's binary_logloss: 0.1585	valid_0's macroF1: 0.856148
[800]	valid_0's binary_logloss: 0.158521	valid_0's macroF1: 0.856148
[801]	valid_0's binary_logloss: 0.158544	valid_0's macroF1: 0.856148
[802]	valid_0's binary_logloss: 0.158548	valid_0's macroF1: 0.854789
[803]	valid_0's binary_logloss: 0.158567	valid_0's macroF1: 0.854789
[804]	valid_0's binary_logloss: 0.158585	valid_0's macroF1: 0.854789
[805]	valid_0's binary_logloss: 0.158596	va

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[1]	valid_0's binary_logloss: 0.314913	valid_0's macroF1: 0.474268
Training until validation scores don't improve for 250 rounds
[2]	valid_0's binary_logloss: 0.309828	valid_0's macroF1: 0.474268
[3]	valid_0's binary_logloss: 0.305109	valid_0's macroF1: 0.474268
[4]	valid_0's binary_logloss: 0.300743	valid_0's macroF1: 0.474268
[5]	valid_0's binary_logloss: 0.296627	valid_0's macroF1: 0.474268
[6]	valid_0's binary_logloss: 0.29262	valid_0's macroF1: 0.474268
[7]	valid_0's binary_logloss: 0.289012	valid_0's macroF1: 0.474268
[8]	valid_0's binary_logloss: 0.285441	valid_0's macroF1: 0.474268
[9]	valid_0's binary_logloss: 0.282179	valid_0's macroF1: 0.474268
[10]	valid_0's binary_logloss: 0.278947	valid_0's macroF1: 0.474268
[11]	valid_0's binary_logloss: 0.275992	valid_0's macroF1: 0.474268
[12]	valid_0's binary_logloss: 0.273024	valid_0's macroF1: 0.474268
[13]	valid_0's binary_logloss: 0.270248	valid_0's macroF1: 0.474268
[14]	valid_0's binary_logloss: 0.267488	valid_0's macroF1: 0.474

[130]	valid_0's binary_logloss: 0.174908	valid_0's macroF1: 0.831366
[131]	valid_0's binary_logloss: 0.174677	valid_0's macroF1: 0.831366
[132]	valid_0's binary_logloss: 0.174455	valid_0's macroF1: 0.828835
[133]	valid_0's binary_logloss: 0.174186	valid_0's macroF1: 0.831834
[134]	valid_0's binary_logloss: 0.173955	valid_0's macroF1: 0.831834
[135]	valid_0's binary_logloss: 0.173697	valid_0's macroF1: 0.832483
[136]	valid_0's binary_logloss: 0.173476	valid_0's macroF1: 0.832483
[137]	valid_0's binary_logloss: 0.173233	valid_0's macroF1: 0.833966
[138]	valid_0's binary_logloss: 0.172991	valid_0's macroF1: 0.832483
[139]	valid_0's binary_logloss: 0.172775	valid_0's macroF1: 0.833966
[140]	valid_0's binary_logloss: 0.172552	valid_0's macroF1: 0.833966
[141]	valid_0's binary_logloss: 0.172348	valid_0's macroF1: 0.832483
[142]	valid_0's binary_logloss: 0.172124	valid_0's macroF1: 0.833966
[143]	valid_0's binary_logloss: 0.171936	valid_0's macroF1: 0.833966
[144]	valid_0's binary_logloss: 0.

[251]	valid_0's binary_logloss: 0.15544	valid_0's macroF1: 0.849763
[252]	valid_0's binary_logloss: 0.155288	valid_0's macroF1: 0.849763
[253]	valid_0's binary_logloss: 0.155184	valid_0's macroF1: 0.849763
[254]	valid_0's binary_logloss: 0.155025	valid_0's macroF1: 0.849763
[255]	valid_0's binary_logloss: 0.15489	valid_0's macroF1: 0.849763
[256]	valid_0's binary_logloss: 0.154782	valid_0's macroF1: 0.849763
[257]	valid_0's binary_logloss: 0.154622	valid_0's macroF1: 0.849763
[258]	valid_0's binary_logloss: 0.154523	valid_0's macroF1: 0.849763
[259]	valid_0's binary_logloss: 0.154368	valid_0's macroF1: 0.848953
[260]	valid_0's binary_logloss: 0.154233	valid_0's macroF1: 0.848953
[261]	valid_0's binary_logloss: 0.154057	valid_0's macroF1: 0.851096
[262]	valid_0's binary_logloss: 0.153959	valid_0's macroF1: 0.849477
[263]	valid_0's binary_logloss: 0.153799	valid_0's macroF1: 0.848953
[264]	valid_0's binary_logloss: 0.153618	valid_0's macroF1: 0.850285
[265]	valid_0's binary_logloss: 0.15

[373]	valid_0's binary_logloss: 0.144945	valid_0's macroF1: 0.854753
[374]	valid_0's binary_logloss: 0.144914	valid_0's macroF1: 0.856061
[375]	valid_0's binary_logloss: 0.144883	valid_0's macroF1: 0.856061
[376]	valid_0's binary_logloss: 0.144841	valid_0's macroF1: 0.856061
[377]	valid_0's binary_logloss: 0.144801	valid_0's macroF1: 0.856061
[378]	valid_0's binary_logloss: 0.144732	valid_0's macroF1: 0.856061
[379]	valid_0's binary_logloss: 0.144691	valid_0's macroF1: 0.856061
[380]	valid_0's binary_logloss: 0.144684	valid_0's macroF1: 0.856061
[381]	valid_0's binary_logloss: 0.144624	valid_0's macroF1: 0.857364
[382]	valid_0's binary_logloss: 0.144583	valid_0's macroF1: 0.857364
[383]	valid_0's binary_logloss: 0.144568	valid_0's macroF1: 0.857364
[384]	valid_0's binary_logloss: 0.144528	valid_0's macroF1: 0.857364
[385]	valid_0's binary_logloss: 0.144469	valid_0's macroF1: 0.857364
[386]	valid_0's binary_logloss: 0.144429	valid_0's macroF1: 0.856061
[387]	valid_0's binary_logloss: 0.

[495]	valid_0's binary_logloss: 0.140451	valid_0's macroF1: 0.853747
[496]	valid_0's binary_logloss: 0.140419	valid_0's macroF1: 0.853747
[497]	valid_0's binary_logloss: 0.1404	valid_0's macroF1: 0.853747
[498]	valid_0's binary_logloss: 0.140374	valid_0's macroF1: 0.853747
[499]	valid_0's binary_logloss: 0.14034	valid_0's macroF1: 0.853747
[500]	valid_0's binary_logloss: 0.140328	valid_0's macroF1: 0.853747
[501]	valid_0's binary_logloss: 0.140273	valid_0's macroF1: 0.852424
[502]	valid_0's binary_logloss: 0.140222	valid_0's macroF1: 0.853747
[503]	valid_0's binary_logloss: 0.140198	valid_0's macroF1: 0.853747
[504]	valid_0's binary_logloss: 0.140181	valid_0's macroF1: 0.853747
[505]	valid_0's binary_logloss: 0.140137	valid_0's macroF1: 0.853747
[506]	valid_0's binary_logloss: 0.140099	valid_0's macroF1: 0.853747
[507]	valid_0's binary_logloss: 0.140073	valid_0's macroF1: 0.853747
[508]	valid_0's binary_logloss: 0.140017	valid_0's macroF1: 0.853747
[509]	valid_0's binary_logloss: 0.139

[615]	valid_0's binary_logloss: 0.138618	valid_0's macroF1: 0.853028
[616]	valid_0's binary_logloss: 0.13859	valid_0's macroF1: 0.853028
[617]	valid_0's binary_logloss: 0.138588	valid_0's macroF1: 0.853028
[618]	valid_0's binary_logloss: 0.138603	valid_0's macroF1: 0.852208
[619]	valid_0's binary_logloss: 0.138579	valid_0's macroF1: 0.852208
[620]	valid_0's binary_logloss: 0.13855	valid_0's macroF1: 0.853028
[621]	valid_0's binary_logloss: 0.138525	valid_0's macroF1: 0.853028
[622]	valid_0's binary_logloss: 0.138524	valid_0's macroF1: 0.853028
[623]	valid_0's binary_logloss: 0.138518	valid_0's macroF1: 0.853028
[624]	valid_0's binary_logloss: 0.138476	valid_0's macroF1: 0.853028
[625]	valid_0's binary_logloss: 0.138472	valid_0's macroF1: 0.853028
[626]	valid_0's binary_logloss: 0.138493	valid_0's macroF1: 0.853028
[627]	valid_0's binary_logloss: 0.138451	valid_0's macroF1: 0.853028
[628]	valid_0's binary_logloss: 0.138449	valid_0's macroF1: 0.853028
[629]	valid_0's binary_logloss: 0.13

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


[1]	valid_0's binary_logloss: 0.315288	valid_0's macroF1: 0.474268
Training until validation scores don't improve for 250 rounds
[2]	valid_0's binary_logloss: 0.310614	valid_0's macroF1: 0.474268
[3]	valid_0's binary_logloss: 0.30613	valid_0's macroF1: 0.474268
[4]	valid_0's binary_logloss: 0.302012	valid_0's macroF1: 0.474268
[5]	valid_0's binary_logloss: 0.298158	valid_0's macroF1: 0.474268
[6]	valid_0's binary_logloss: 0.294431	valid_0's macroF1: 0.474268
[7]	valid_0's binary_logloss: 0.291004	valid_0's macroF1: 0.474268
[8]	valid_0's binary_logloss: 0.287742	valid_0's macroF1: 0.474268
[9]	valid_0's binary_logloss: 0.284591	valid_0's macroF1: 0.474268
[10]	valid_0's binary_logloss: 0.281614	valid_0's macroF1: 0.474268
[11]	valid_0's binary_logloss: 0.278811	valid_0's macroF1: 0.474268
[12]	valid_0's binary_logloss: 0.276073	valid_0's macroF1: 0.474268
[13]	valid_0's binary_logloss: 0.27353	valid_0's macroF1: 0.474268
[14]	valid_0's binary_logloss: 0.271116	valid_0's macroF1: 0.4742

[125]	valid_0's binary_logloss: 0.183567	valid_0's macroF1: 0.840691
[126]	valid_0's binary_logloss: 0.18329	valid_0's macroF1: 0.839842
[127]	valid_0's binary_logloss: 0.183017	valid_0's macroF1: 0.838155
[128]	valid_0's binary_logloss: 0.182827	valid_0's macroF1: 0.838155
[129]	valid_0's binary_logloss: 0.18259	valid_0's macroF1: 0.838766
[130]	valid_0's binary_logloss: 0.182352	valid_0's macroF1: 0.838997
[131]	valid_0's binary_logloss: 0.182091	valid_0's macroF1: 0.84045
[132]	valid_0's binary_logloss: 0.181841	valid_0's macroF1: 0.842147
[133]	valid_0's binary_logloss: 0.181604	valid_0's macroF1: 0.842147
[134]	valid_0's binary_logloss: 0.181374	valid_0's macroF1: 0.841297
[135]	valid_0's binary_logloss: 0.181151	valid_0's macroF1: 0.841053
[136]	valid_0's binary_logloss: 0.180928	valid_0's macroF1: 0.840211
[137]	valid_0's binary_logloss: 0.180716	valid_0's macroF1: 0.839373
[138]	valid_0's binary_logloss: 0.180505	valid_0's macroF1: 0.840811
[139]	valid_0's binary_logloss: 0.180

[272]	valid_0's binary_logloss: 0.162981	valid_0's macroF1: 0.843578
[273]	valid_0's binary_logloss: 0.162889	valid_0's macroF1: 0.843578
[274]	valid_0's binary_logloss: 0.162796	valid_0's macroF1: 0.844929
[275]	valid_0's binary_logloss: 0.162689	valid_0's macroF1: 0.843578
[276]	valid_0's binary_logloss: 0.162615	valid_0's macroF1: 0.844929
[277]	valid_0's binary_logloss: 0.162531	valid_0's macroF1: 0.844929
[278]	valid_0's binary_logloss: 0.162411	valid_0's macroF1: 0.843578
[279]	valid_0's binary_logloss: 0.162298	valid_0's macroF1: 0.843578
[280]	valid_0's binary_logloss: 0.162181	valid_0's macroF1: 0.841664
[281]	valid_0's binary_logloss: 0.162081	valid_0's macroF1: 0.84247
[282]	valid_0's binary_logloss: 0.161972	valid_0's macroF1: 0.841664
[283]	valid_0's binary_logloss: 0.161861	valid_0's macroF1: 0.841664
[284]	valid_0's binary_logloss: 0.161768	valid_0's macroF1: 0.843026
[285]	valid_0's binary_logloss: 0.161673	valid_0's macroF1: 0.841664
[286]	valid_0's binary_logloss: 0.1

[414]	valid_0's binary_logloss: 0.153258	valid_0's macroF1: 0.854678
[415]	valid_0's binary_logloss: 0.153198	valid_0's macroF1: 0.854678
[416]	valid_0's binary_logloss: 0.153149	valid_0's macroF1: 0.854678
[417]	valid_0's binary_logloss: 0.153093	valid_0's macroF1: 0.854678
[418]	valid_0's binary_logloss: 0.153039	valid_0's macroF1: 0.854678
[419]	valid_0's binary_logloss: 0.152991	valid_0's macroF1: 0.854678
[420]	valid_0's binary_logloss: 0.152979	valid_0's macroF1: 0.854678
[421]	valid_0's binary_logloss: 0.152939	valid_0's macroF1: 0.854678
[422]	valid_0's binary_logloss: 0.152937	valid_0's macroF1: 0.854678
[423]	valid_0's binary_logloss: 0.152897	valid_0's macroF1: 0.854678
[424]	valid_0's binary_logloss: 0.152907	valid_0's macroF1: 0.854678
[425]	valid_0's binary_logloss: 0.152843	valid_0's macroF1: 0.855507
[426]	valid_0's binary_logloss: 0.15281	valid_0's macroF1: 0.855507
[427]	valid_0's binary_logloss: 0.152781	valid_0's macroF1: 0.855507
[428]	valid_0's binary_logloss: 0.1

[535]	valid_0's binary_logloss: 0.149605	valid_0's macroF1: 0.856663
[536]	valid_0's binary_logloss: 0.149574	valid_0's macroF1: 0.856663
[537]	valid_0's binary_logloss: 0.149553	valid_0's macroF1: 0.856663
[538]	valid_0's binary_logloss: 0.149509	valid_0's macroF1: 0.856663
[539]	valid_0's binary_logloss: 0.149498	valid_0's macroF1: 0.856663
[540]	valid_0's binary_logloss: 0.149504	valid_0's macroF1: 0.856663
[541]	valid_0's binary_logloss: 0.149483	valid_0's macroF1: 0.856663
[542]	valid_0's binary_logloss: 0.149441	valid_0's macroF1: 0.856663
[543]	valid_0's binary_logloss: 0.149418	valid_0's macroF1: 0.856663
[544]	valid_0's binary_logloss: 0.149418	valid_0's macroF1: 0.856663
[545]	valid_0's binary_logloss: 0.149367	valid_0's macroF1: 0.856663
[546]	valid_0's binary_logloss: 0.14934	valid_0's macroF1: 0.856663
[547]	valid_0's binary_logloss: 0.149347	valid_0's macroF1: 0.856663
[548]	valid_0's binary_logloss: 0.14936	valid_0's macroF1: 0.856663
[549]	valid_0's binary_logloss: 0.14

[680]	valid_0's binary_logloss: 0.14806	valid_0's macroF1: 0.857319
[681]	valid_0's binary_logloss: 0.148082	valid_0's macroF1: 0.857319
[682]	valid_0's binary_logloss: 0.148116	valid_0's macroF1: 0.857319
[683]	valid_0's binary_logloss: 0.148149	valid_0's macroF1: 0.857319
[684]	valid_0's binary_logloss: 0.148178	valid_0's macroF1: 0.857319
[685]	valid_0's binary_logloss: 0.148136	valid_0's macroF1: 0.857319
[686]	valid_0's binary_logloss: 0.148117	valid_0's macroF1: 0.857319
[687]	valid_0's binary_logloss: 0.148069	valid_0's macroF1: 0.857319
[688]	valid_0's binary_logloss: 0.148076	valid_0's macroF1: 0.857319
[689]	valid_0's binary_logloss: 0.148026	valid_0's macroF1: 0.857319
[690]	valid_0's binary_logloss: 0.148006	valid_0's macroF1: 0.855952
[691]	valid_0's binary_logloss: 0.147961	valid_0's macroF1: 0.855952
[692]	valid_0's binary_logloss: 0.147996	valid_0's macroF1: 0.857319
[693]	valid_0's binary_logloss: 0.14798	valid_0's macroF1: 0.855952
[694]	valid_0's binary_logloss: 0.14

In [33]:
predicted_y = model.predict(x_test)
train_pred_y = model.predict(x)

In [34]:
train_pred_y = model.predict(x)

In [35]:
print(); print(metrics.classification_report(y,train_pred_y))
print(); print(metrics.confusion_matrix(y, train_pred_y))

print(); print(metrics.classification_report(y_test, predicted_y))
print(); print(metrics.confusion_matrix(y_test, predicted_y))


              precision    recall  f1-score   support

           0       0.87      0.91      0.89      1345
           1       0.99      0.99      0.99     12395

    accuracy                           0.98     13740
   macro avg       0.93      0.95      0.94     13740
weighted avg       0.98      0.98      0.98     13740


[[ 1225   120]
 [  178 12217]]

              precision    recall  f1-score   support

           0       0.82      0.75      0.78       336
           1       0.97      0.98      0.98      3099

    accuracy                           0.96      3435
   macro avg       0.90      0.87      0.88      3435
weighted avg       0.96      0.96      0.96      3435


[[ 252   84]
 [  55 3044]]


In [None]:
print(); print(metrics.classification_report(y,train_pred_y))
print(); print(metrics.confusion_matrix(y, train_pred_y))

<h2>Testing</h2>

In [36]:

test_set = pd.read_csv(data_path+'test.csv', index_col="tripid")


In [37]:
test_set['pickup_time'] = pd.to_datetime(test_set['pickup_time'], format="%m/%d/%Y %H:%M")
test_set['drop_time'] = pd.to_datetime(test_set['drop_time'], format="%m/%d/%Y %H:%M")

In [38]:
test_set = test_set.assign(timeOfDay=pd.cut(test_set.pickup_time.dt.hour,[-1, 8, 20, 24],labels=['dawn','day', 'night']))

In [39]:
test_set.loc[test_set['timeOfDay'] == 'day', 'isNormalCharge'] = 1
test_set.loc[test_set['timeOfDay'] != 'day', 'isNormalCharge'] = 0

In [40]:
durations = []
for index,row in test_set.iterrows():
  provided_duration = row['duration'] #first row of location.lat column here
  if math.isnan(provided_duration) or provided_duration <= 0 :
    time_dif = (row['drop_time'] - row['pickup_time']).seconds
    if(time_dif == 0):
        time_dif = np.nan
    durations.append(time_dif)
  else :  
    durations.append(provided_duration)

test_set.insert(4,"time_dif",durations)

In [41]:
new_column = []                    #empty column for distance
for index,row in test_set.iterrows():
  lat1 = row['pick_lat'] #first row of location.lat column here
  lon1 = row['pick_lon'] #first row of location.long column here
  lat2 = row['drop_lat'] #second row of location.lat column here
  lon2 = row['drop_lon'] #second row of location.long column here
  value = dist_from_coordinates(lat1, lon1, lat2, lon2)  #get the distance
  new_column.append(value)   #append the empty list with distance values

test_set.insert(4,"distance",new_column)

In [42]:
test_set['time_driven'] = test_set['duration']  - test_set['meter_waiting']

In [43]:
chargeperhours = []
for index,row in test_set.iterrows():
    if(row['meter_waiting'] == 0):
        chargeperhour = np.nan
    else:
        chargeperhour = (row['meter_waiting_fare'] / row['meter_waiting'] * 3600)
    chargeperhours.append(chargeperhour)

test_set.insert(4,'charge_per_hour',chargeperhours)


In [44]:
test_set['driving_fare'] = test_set['fare']  - test_set['meter_waiting_fare'] - test_set['additional_fare']

In [45]:
avgspeeds = []
for index,row in test_set.iterrows():
    if(row['time_driven'] == 0):
        avgspeed = np.nan    
    else:
        avgspeed = (row['distance'] / row['time_driven'] * 3600)
    avgspeeds.append(avgspeed)

test_set.insert(4,"avg_speed",avgspeeds)



In [46]:
costsperkm = []
for index,row in test_set.iterrows():
    if row['distance'] == 0:
        costperkm = np.nan
            
    else:
        costperkm = (row['driving_fare'] / row['distance'])
    costsperkm.append(costperkm)

test_set.insert(4,"cost_per_km",costsperkm)

In [47]:
test_features = test_set[training_columns]

In [48]:
test_features.isna().sum()

meter_waiting           0
meter_waiting_fare      0
fare                    0
additional_fare         0
distance                0
cost_per_km            25
avg_speed               9
time_dif                0
time_driven             0
charge_per_hour       298
driving_fare            0
isNormalCharge          0
pick_lat                0
pick_lon                0
drop_lat                0
drop_lon                0
dtype: int64

In [49]:
predicted_labels = model.predict(test_features)

In [50]:
predicted_labels_df = pd.DataFrame(predicted_labels )

In [51]:
sub_path =os.path.abspath(os.path.join(data_path+'/sample_submission.csv'))
submission_set = pd.read_csv(sub_path, index_col="tripid")

In [52]:
submission_set['prediction']= predicted_labels_df.values[:,0]

In [53]:
%%javascript
var kernel = IPython.notebook.kernel;
var thename = window.document.getElementById("notebook_name").innerHTML;
var command = "theNotebook = " + "'"+thename+"'";
kernel.execute(command);

<IPython.core.display.Javascript object>

In [55]:
filename = '../../submissions/'+theNotebook+'/'+theNotebook+'_{%i}.csv'
dirname = '../../submissions/'+theNotebook
fileversion = 1

if not os.path.exists(dirname):
    os.makedirs(dirname)
while glob.glob(filename.replace('{%i}',str(fileversion))) :
    fileversion+=1
submission_set.to_csv(filename.replace('{%i}',str(fileversion)), index=True)
print("Completed!")

Completed!


In [54]:
submission_set['prediction'].value_counts()

1    8022
0     554
Name: prediction, dtype: int64

<h2>testing against others</h2>

In [56]:
sub_path =os.path.abspath(os.path.join('/home/madnisal/Documents/ML_Project/submissions/grid-lgbm/lgbm3_23.csv'))
submission_set = pd.read_csv(sub_path, index_col="tripid")

In [57]:
submission_set['prediction'].value_counts()

1    8022
0     554
Name: prediction, dtype: int64