# PROBLEM STATEMENT : 
#### URL : 

https://machinehack.com/hackathon/predict_accident_risk_score_for_unique_postcode/overview

According to IBEF “Domestic automobiles production increased at 2.36% CAGR between FY16-20 with 26.36 million vehicles being manufactured in the country in FY20.Overall, domestic automobiles sales increased at 1.29% CAGR between FY16-FY20 with 21.55 million vehicles being sold in FY20”.The rise in vehicles on the road will also lead to multiple challenges and the road will be more vulnerable to accidents.

Increased accident rates also leads to more insurance claims and payouts rise for insurance companies.

In order to pre-emptively plan for the losses, the insurance firms leverage accident data to understand the risk across the geographical units e.g. Postal code/district etc. In this challenge, we are providing you the dataset to predict the “Accident_Risk_Index” against the postcodes.

Accident_Risk_Index (mean casualties at a postcode) = sum(Number_of_casualities)/count(Accident_ID)

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
from scipy import stats

In [3]:
train=pd.read_csv("train.csv")
test=pd.read_csv("test.csv")
population=pd.read_csv("population.csv")
sample_sub=pd.read_csv("sample_submission.csv")
roads_network=pd.read_csv("roads_network.csv")

In [4]:
roads_network=roads_network.dropna()
roads_network=roads_network.reset_index(drop=True)
roads_network.isnull().sum().sum()

0

In [5]:
print('train : ', train.shape)
print('test : ', test.shape)
print('population : ', population.shape)
print('roads_network : ',roads_network.shape)

train :  (478741, 27)
test :  (121259, 27)
population :  (8035, 10)
roads_network :  (90352, 8)


In [6]:
print('train : ', train.postcode.nunique())
print('test : ', test.postcode.nunique())
print('population : ', population.postcode.nunique())
print('roads_network : ',roads_network.postcode.nunique())

train :  95625
test :  49772
population :  8035
roads_network :  74681


In [8]:
train.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Accident_ID,478741.0,299892.954673,173261.27495,1.0,149718.0,299791.0,449990.0,599999.0
Police_Force,478741.0,31.610178,24.94166,1.0,12.0,33.0,45.0,98.0
Number_of_Vehicles,478741.0,1.745695,0.626458,1.0,1.0,2.0,2.0,4.0
Number_of_Casualties,478741.0,1.463528,0.816019,1.0,1.0,1.0,2.0,5.0
Day_of_Week,478741.0,4.367898,1.872983,1.0,3.0,5.0,6.0,7.0
Local_Authority_(District),478741.0,357.457132,256.530105,1.0,103.0,314.0,521.0,941.0
1st_Road_Class,478741.0,4.381542,1.369579,1.0,3.0,4.0,6.0,6.0
1st_Road_Number,478741.0,946.663375,1730.926876,0.0,10.0,336.0,569.0,9832.0
Speed_limit,478741.0,37.035976,12.854299,20.0,30.0,30.0,40.0,70.0
2nd_Road_Class,478741.0,2.430068,3.268563,-1.0,-1.0,3.0,6.0,6.0


In [9]:
roads_network.head()

Unnamed: 0,WKT,roadClassi,roadFuncti,formOfWay,length,primaryRou,distance to the nearest point on rd,postcode
0,POINT (-2.3501 56.603923),A Road,A Road,Single Carriageway,2643.0,1.0,1.256769,AB1
1,POINT (-2.021334 57.130142),A Road,A Road,Single Carriageway,2643.0,1.0,1.834101,AB1 9NN
2,POINT (-2.108598 57.146338),A Road,A Road,Single Carriageway,2643.0,1.0,1.830243,AB10 1UH
3,POINT (-2.093928 57.148218),A Road,A Road,Single Carriageway,2643.0,1.0,1.835092,AB10 1YL
4,POINT (-2.116089 57.131671),A Road,A Road,Single Carriageway,2643.0,1.0,1.814373,AB10 6AT


In [10]:
population.head()

Unnamed: 0,postcode,Rural Urban,Variable: All usual residents; measures: Value,Variable: Males; measures: Value,Variable: Females; measures: Value,Variable: Lives in a household; measures: Value,Variable: Lives in a communal establishment; measures: Value,Variable: Schoolchild or full-time student aged 4 and over at their non term-time address; measures: Value,Variable: Area (Hectares); measures: Value,Variable: Density (number of persons per hectare); measures: Value
0,AL1 1,Total,5453,2715,2738,5408,45,75,225.63,24.2
1,AL1 2,Total,6523,3183,3340,6418,105,77,286.59,22.8
2,AL1 3,Total,4179,2121,2058,4100,79,46,97.12,43.0
3,AL1 4,Total,9799,4845,4954,9765,34,285,244.75,40.0
4,AL1 5,Total,10226,5129,5097,10211,15,133,200.93,50.9


In [11]:
population.drop(['Rural Urban','Variable: Females; measures: Value'],axis=1,inplace=True)

In [12]:
population.columns=['postcode','residents','males','ll','CE','schoolchild','area','density']


In [13]:
population.males=population.males/population.residents
population.head()

Unnamed: 0,postcode,residents,males,ll,CE,schoolchild,area,density
0,AL1 1,5453,0.497891,5408,45,75,225.63,24.2
1,AL1 2,6523,0.487966,6418,105,77,286.59,22.8
2,AL1 3,4179,0.507538,4100,79,46,97.12,43.0
3,AL1 4,9799,0.494438,9765,34,285,244.75,40.0
4,AL1 5,10226,0.501565,10211,15,133,200.93,50.9


In [15]:
roads_network.head()

Unnamed: 0,WKT,roadClassi,roadFuncti,formOfWay,length,primaryRou,distance to the nearest point on rd,postcode,lat,log
0,-2.3501 56.603923,A Road,A Road,Single Carriageway,2643.0,1.0,1.256769,AB1,-2.3501,56.603923
1,-2.021334 57.130142,A Road,A Road,Single Carriageway,2643.0,1.0,1.834101,AB1 9NN,-2.021334,57.130142
2,-2.108598 57.146338,A Road,A Road,Single Carriageway,2643.0,1.0,1.830243,AB10 1UH,-2.108598,57.146338
3,-2.093928 57.148218,A Road,A Road,Single Carriageway,2643.0,1.0,1.835092,AB10 1YL,-2.093928,57.148218
4,-2.116089 57.131671,A Road,A Road,Single Carriageway,2643.0,1.0,1.814373,AB10 6AT,-2.116089,57.131671


In [14]:
roads_network.WKT=roads_network['WKT'].apply(lambda x:x.split('(')[1].split(')')[0])
roads_network['lat']=roads_network['WKT'].apply(lambda x:float(x.split(' ')[0]))
roads_network['log']=roads_network['WKT'].apply(lambda x:float(x.split(' ')[1]))

In [20]:
rn=roads_network[['lat','log','postcode']]
rn.head()

Unnamed: 0,lat,log,postcode
0,-2.3501,56.603923,AB1
1,-2.021334,57.130142,AB1 9NN
2,-2.108598,57.146338,AB10 1UH
3,-2.093928,57.148218,AB10 1YL
4,-2.116089,57.131671,AB10 6AT


# EDA

# Accident_Risk_Index (mean casualties at a postcode) = sum(Number_of_casualities)/count(Accident_ID)

In [21]:
sum_casualties = train.groupby([train['postcode']])['Number_of_Casualties'].sum()
count_acc = train.groupby([train['postcode']])['Accident_ID'].count()

Accident_Risk_Index = sum_casualties / count_acc
df  = pd.merge(train, Accident_Risk_Index.reset_index(name='Accident_Risk_Index'), how='left', on=['postcode'])

In [22]:
df.head()

Unnamed: 0,Accident_ID,Police_Force,Number_of_Vehicles,Number_of_Casualties,Date,Day_of_Week,Time,Local_Authority_(District),Local_Authority_(Highway),1st_Road_Class,...,Weather_Conditions,Road_Surface_Conditions,Special_Conditions_at_Site,Carriageway_Hazards,Urban_or_Rural_Area,Did_Police_Officer_Attend_Scene_of_Accident,state,postcode,country,Accident_Risk_Index
0,1,34,2,1,19/12/12,7,13:20,344,E10000032,4,...,Fine without high winds,Dry,Ol or diesel,,1,Yes,England,OX3 9UP,United Kingdom,1.833333
1,2,5,2,1,02/11/12,4,7:53,102,E09000026,3,...,Raining without high winds,Dry,,,1,No,England,S35 4EZ,United Kingdom,1.333333
2,3,1,2,1,02/11/12,4,16:00,531,E10000016,6,...,Fine without high winds,Dry,,,1,No,England,BN21 2XR,United Kingdom,1.333333
3,4,1,1,1,06/05/12,1,16:50,7,E08000035,6,...,Fine without high winds,Dry,Roadworks,,1,Yes,England,TA20 3PT,United Kingdom,1.666667
4,5,46,1,1,30/06/12,3,13:25,519,E10000031,3,...,Fine without high winds,Dry,,,1,No,England,DN20 0QF,United Kingdom,1.5


# Concating train and test data into single Dataframe for preprocessing¶

In [23]:
df['train_or_test']='train'
test['train_or_test']='test'
df1=pd.concat([df,test])

# Missing value Imputation

In [24]:
df1['Time'].fillna(df1['Time'].mode().values[0],inplace=True)
df1['Road_Surface_Conditions'].fillna(df1['Road_Surface_Conditions'].mode().values[0],inplace=True)
df1['Special_Conditions_at_Site'].fillna(df1['Special_Conditions_at_Site'].mode().values[0],inplace=True)

In [25]:
cols_dropped = ['country','Local_Authority_(Highway)','Local_Authority_(District)','Accident_ID','1st_Road_Number',
                'Number_of_Casualties','2nd_Road_Number']

In [26]:
df1.drop(cols_dropped,axis=1,inplace=True)

In [27]:
df1['Time']=pd.to_datetime(df1['Time'], format='%H:%M').dt.hour

In [28]:
df1['Year'] = pd.to_datetime(df1['Date']).dt.year
df1['Month'] = pd.to_datetime(df1['Date']).dt.month
df1['Day'] = pd.to_datetime(df1['Date']).dt.day
df1['weekend']=df1['Day_of_Week'].apply(lambda x: 0 if x<6 else 1)

In [29]:
def time_of_day(time):
    hour = int(time)
    if hour<=6:
        return 0
    elif hour<=9:
        return 1
    elif hour<=5:
        return 2
    elif hour<21:
        return 3
    else:
        return 4

In [30]:
df1['Time']=df1['Time'].apply(time_of_day)

In [40]:
def county_code(postcode):
    postcode=postcode.split(' ')[0]
    only_alpha=''
    for char in postcode:
        if ord(char)>=65 and ord(char)<=90:
            only_alpha+=char
        elif ord(char)>=97 and ord(char)<=122:
            only_alpha+=char
    return only_alpha.upper()

In [31]:
df1.head(2)

Unnamed: 0,Police_Force,Number_of_Vehicles,Date,Day_of_Week,Time,1st_Road_Class,Road_Type,Speed_limit,2nd_Road_Class,Pedestrian_Crossing-Human_Control,...,Urban_or_Rural_Area,Did_Police_Officer_Attend_Scene_of_Accident,state,postcode,Accident_Risk_Index,train_or_test,Year,Month,Day,weekend
0,34,2,19/12/12,7,3,4,Single carriageway,30,-1,None within 50 metres,...,1,Yes,England,OX3 9UP,1.833333,train,2012,12,19,1
1,5,2,02/11/12,4,1,3,One way street,30,-1,None within 50 metres,...,1,No,England,S35 4EZ,1.333333,train,2012,2,11,0


In [32]:
rn['postcode']=rn['postcode'].apply(lambda x : x[0:-2])
rn['postcode']=rn['postcode'].apply(lambda x : x.upper())
rn.rename(columns={'postcode':'postcode_merge'},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rn['postcode']=rn['postcode'].apply(lambda x : x[0:-2])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rn['postcode']=rn['postcode'].apply(lambda x : x.upper())
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [34]:
rn=rn.groupby('postcode_merge',as_index=False).agg({'lat' : 'mean','log' : 'mean'})
rn

Unnamed: 0,postcode_merge,lat,log
0,AB,-2.704352,56.901920
1,AB1,-2.070907,56.809341
2,AB1 9,-2.021334,57.130142
3,AB10 1,-2.101263,57.147278
4,AB10 6,-2.118697,57.135466
...,...,...,...
10724,YO8 9,-1.107228,53.772852
10725,YO89 9,-1.075875,53.786824
10726,YO91 1,-1.079029,53.975455
10727,YS2,-0.557006,54.456617


In [36]:
df1['postcode_merge']=df1['postcode'].apply(lambda x : x[0:-2])
df1  = pd.merge(df1, rn, how='left',on=['postcode_merge'])
df1.head()

Unnamed: 0,Police_Force,Number_of_Vehicles,Date,Day_of_Week,Time,1st_Road_Class,Road_Type,Speed_limit,2nd_Road_Class,Pedestrian_Crossing-Human_Control,...,postcode,Accident_Risk_Index,train_or_test,Year,Month,Day,weekend,postcode_merge,lat,log
0,34,2,19/12/12,7,3,4,Single carriageway,30,-1,None within 50 metres,...,OX3 9UP,1.833333,train,2012,12,19,1,OX3 9,-1.235937,51.757932
1,5,2,02/11/12,4,1,3,One way street,30,-1,None within 50 metres,...,S35 4EZ,1.333333,train,2012,2,11,0,S35 4,-1.486937,53.477343
2,1,2,02/11/12,4,3,6,Roundabout,40,6,None within 50 metres,...,BN21 2XR,1.333333,train,2012,2,11,0,BN21 2,0.141196,50.852716
3,1,1,06/05/12,1,3,6,Single carriageway,30,6,None within 50 metres,...,TA20 3PT,1.666667,train,2012,6,5,0,TA20 3,-3.013006,50.880963
4,46,1,30/06/12,3,3,3,Dual carriageway,30,6,None within 50 metres,...,DN20 0QF,1.5,train,2012,6,30,0,DN20 0,-0.502351,53.569844


In [46]:
df1[df1.train_or_test=='test'].isnull().sum()

Police_Force                                        0
Number_of_Vehicles                                  0
Date                                                0
Day_of_Week                                         0
Time                                                0
1st_Road_Class                                      0
Road_Type                                           0
Speed_limit                                         0
2nd_Road_Class                                      0
Pedestrian_Crossing-Human_Control                   0
Pedestrian_Crossing-Physical_Facilities             0
Light_Conditions                                    0
Weather_Conditions                                  0
Road_Surface_Conditions                             0
Special_Conditions_at_Site                          0
Carriageway_Hazards                                 0
Urban_or_Rural_Area                                 0
Did_Police_Officer_Attend_Scene_of_Accident         0
state                       

In [44]:
df1.isnull().sum()

Police_Force                                        0
Number_of_Vehicles                                  0
Date                                                0
Day_of_Week                                         0
Time                                                0
1st_Road_Class                                      0
Road_Type                                           0
Speed_limit                                         0
2nd_Road_Class                                      0
Pedestrian_Crossing-Human_Control                   0
Pedestrian_Crossing-Physical_Facilities             0
Light_Conditions                                    0
Weather_Conditions                                  0
Road_Surface_Conditions                             0
Special_Conditions_at_Site                          0
Carriageway_Hazards                                 0
Urban_or_Rural_Area                                 0
Did_Police_Officer_Attend_Scene_of_Accident         0
state                       

In [41]:
df1['county_code']=df1['postcode'].apply(county_code)

In [42]:
cols= ['lat', 'log']

for i in cols:
    df1[i] = df1[i].fillna(df1.groupby('county_code')[i].transform('mean'))

In [45]:
for i in cols:
    df1[i] = df1[i].fillna(df1.groupby('state')[i].transform('mean'))

In [52]:
df1.columns

Index(['Police_Force', 'Number_of_Vehicles', 'Day_of_Week', 'Time',
       '1st_Road_Class', 'Road_Type', 'Speed_limit', '2nd_Road_Class',
       'Pedestrian_Crossing-Human_Control',
       'Pedestrian_Crossing-Physical_Facilities', 'Light_Conditions',
       'Weather_Conditions', 'Road_Surface_Conditions',
       'Special_Conditions_at_Site', 'Carriageway_Hazards',
       'Urban_or_Rural_Area', 'state', 'Accident_Risk_Index', 'train_or_test',
       'Year', 'Month', 'Day', 'weekend', 'lat', 'log'],
      dtype='object')

In [57]:
df1.iloc[1]

Police_Force                          5
Number_of_Vehicles                    2
Day_of_Week                           4
Time                                  1
1st_Road_Class                        3
Road_Type                             1
Speed_limit                          30
2nd_Road_Class                       -1
Light_Conditions                      4
Weather_Conditions                    5
Road_Surface_Conditions               0
Special_Conditions_at_Site            3
Carriageway_Hazards                   3
Urban_or_Rural_Area                   1
state                                 2
Accident_Risk_Index            1.333333
train_or_test                     train
Year                               2012
Month                                 2
Day                                  11
weekend                               0
lat                           -1.486937
log                           53.477343
pc                                   12
lw                                   40


In [50]:
df1.drop(['postcode','county_code','postcode_merge','Date','Did_Police_Officer_Attend_Scene_of_Accident'],axis=1,inplace=True)

In [53]:
df1['pc']=df1['Pedestrian_Crossing-Human_Control']+df1['Pedestrian_Crossing-Physical_Facilities']
df1['lw']=df1['Light_Conditions']+df1['Weather_Conditions']
df1['rc']=df1['Road_Surface_Conditions']+df1['Special_Conditions_at_Site']+df1['Carriageway_Hazards']

In [55]:
df1.drop(['Pedestrian_Crossing-Human_Control','Pedestrian_Crossing-Physical_Facilities'],axis=1,inplace=True)

In [56]:
from sklearn.preprocessing import LabelEncoder
categ_col=['Road_Type', 'Light_Conditions','pc','lw','rc',
       'Weather_Conditions', 'Road_Surface_Conditions',
       'Special_Conditions_at_Site', 'Carriageway_Hazards','state']
for c in categ_col:
    le = LabelEncoder()
    df1[c] = le.fit_transform(df1[c])

In [59]:
df2=df1.drop(['Accident_Risk_Index','train_or_test'],axis=1)

In [64]:
df1[cols].describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
lat,600000.0,-1.4122,1.356557,-6.30624,-2.317185,-1.401829,-0.231591,1.742991
log,600000.0,52.689252,1.439188,49.919436,51.510356,52.480112,53.583483,58.010929


In [65]:
df1["lat"] = pd.qcut(df1["lat"], 10, labels=False)
df1["log"] = pd.qcut(df1["log"], 10, labels=False)
#df1['zone'] = df1['lat'].astype(str)+'_'+df1['long'].astype(str)
#df1 = df1.drop(['lat','log'],axis=1)

In [67]:
df1.head(2)

Unnamed: 0,Police_Force,Number_of_Vehicles,Day_of_Week,Time,1st_Road_Class,Road_Type,Speed_limit,2nd_Road_Class,Light_Conditions,Weather_Conditions,...,train_or_test,Year,Month,Day,weekend,lat,log,pc,lw,rc
0,34,2,7,3,4,3,30,-1,4,1,...,train,2012,12,19,1,5,3,12,36,18
1,5,2,4,1,3,1,30,-1,4,5,...,train,2012,2,11,0,4,7,12,40,14


In [68]:
train1=df1[df1.train_or_test=='train']
test1=df1[df1.train_or_test=='test']
train1.drop('train_or_test',axis=1,inplace=True)
test1.drop('train_or_test',axis=1,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [69]:
target = train1['Accident_Risk_Index']

train_df = train1.drop(columns =['Accident_Risk_Index'])

test_df = test1.drop(columns=['Accident_Risk_Index'])

# MODEL

In [70]:
import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances

In [81]:
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import make_scorer, mean_squared_error, r2_score

In [79]:
def objective(trial, data=train_df, target=target):
    X_train, X_val, y_train, y_val = train_test_split(data, target, test_size=0.2, random_state=42)

    params = {
        'max_depth': trial.suggest_int('max_depth', 3, 32),
        'learning_rate': trial.suggest_categorical('learning_rate', [0.005, 0.02, 0.05, 0.08, 0.1]),
        'n_estimators': trial.suggest_int('n_estimators', 2000, 8000),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 300),
        'gamma': trial.suggest_float('gamma', 0.0001, 1.0, log = True),
        'alpha': trial.suggest_float('alpha', 0.0001, 10.0, log = True),
        'lambda': trial.suggest_float('lambda', 0.0001, 10.0, log = True),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.1, 0.8),
        'subsample': trial.suggest_float('subsample', 0.1, 0.8),
        'booster': 'gblinear',
        'random_state': 42,
        'eval_metric': 'rmse'

    }
    
    model = xgb.XGBRegressor(**params)  
    model.fit(X_train, y_train, eval_set = [(X_val,y_val)], early_stopping_rounds = 333, verbose = False)
    y_pred = model.predict(X_val)
    mse = metric(y_val, y_pred)

    return mse

In [83]:
study = optuna.create_study()
study.optimize(objective, n_trials = 50)
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)
print('Best value:', study.best_value)

[32m[I 2022-03-28 18:07:17,040][0m A new study created in memory with name: no-name-bb94be3d-f33c-47fa-a85d-946f9446f6b3[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:07:26,471][0m Trial 0 finished with value: 0.3617102110255804 and parameters: {'max_depth': 16, 'learning_rate': 0.08, 'n_estimators': 5136, 'min_child_weight': 50, 'gamma': 0.0317364093363946, 'alpha': 3.6324148467840165, 'lambda': 0.01242773891568861, 'colsample_bytree': 0.37918945249870417, 'subsample': 0.4288030236098468}. Best is trial 0 with value: 0.3617102110255804.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:08:23,624][0m Trial 1 finished with value: 0.3616120023988419 and parameters: {'max_depth': 29, 'learning_rate': 0.08, 'n_estimators': 4607, 'min_child_weight': 67, 'gamma': 0.15571692151052208, 'alpha': 0.054242045578479545, 'lambda': 0.01693909522634645, 'colsample_bytree': 0.18840042224483478, 'subsample': 0.6235096295570068}. Best is trial 1 with value: 0.3616120023988419.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:12:26,042][0m Trial 2 finished with value: 0.3612390713217165 and parameters: {'max_depth': 15, 'learning_rate': 0.08, 'n_estimators': 7007, 'min_child_weight': 28, 'gamma': 0.517515593484536, 'alpha': 0.0008668580945217386, 'lambda': 0.008094393502709338, 'colsample_bytree': 0.5265968095820165, 'subsample': 0.7190712007836293}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:12:43,294][0m Trial 3 finished with value: 0.36170467108209625 and parameters: {'max_depth': 32, 'learning_rate': 0.08, 'n_estimators': 5664, 'min_child_weight': 40, 'gamma': 0.000534437334220305, 'alpha': 0.2826486118727411, 'lambda': 0.0016788817953108603, 'colsample_bytree': 0.2625598627187579, 'subsample': 0.25904441191660565}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:13:46,093][0m Trial 4 finished with value: 0.36127649553502306 and parameters: {'max_depth': 11, 'learning_rate': 0.1, 'n_estimators': 2636, 'min_child_weight': 138, 'gamma': 0.007087487619854724, 'alpha': 0.00019854972735638748, 'lambda': 0.06450984693773483, 'colsample_bytree': 0.10463928840691201, 'subsample': 0.40559775213439453}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:15:59,811][0m Trial 5 finished with value: 0.36321522665725525 and parameters: {'max_depth': 32, 'learning_rate': 0.005, 'n_estimators': 7335, 'min_child_weight': 209, 'gamma': 0.12070337116818822, 'alpha': 0.0076098184064290406, 'lambda': 0.14152718308726772, 'colsample_bytree': 0.6551118103778522, 'subsample': 0.7434971617725615}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:16:32,327][0m Trial 6 finished with value: 0.36166674816316197 and parameters: {'max_depth': 23, 'learning_rate': 0.1, 'n_estimators': 6124, 'min_child_weight': 89, 'gamma': 0.005676162833330357, 'alpha': 0.09542162870209438, 'lambda': 1.1929036130412265, 'colsample_bytree': 0.7373916680793415, 'subsample': 0.784201934370596}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:16:40,961][0m Trial 7 finished with value: 0.361708745935371 and parameters: {'max_depth': 4, 'learning_rate': 0.08, 'n_estimators': 3266, 'min_child_weight': 145, 'gamma': 0.22937773801375896, 'alpha': 2.857938123529125, 'lambda': 0.09574339262776235, 'colsample_bytree': 0.6874907105715028, 'subsample': 0.5143096810895056}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:19:07,854][0m Trial 8 finished with value: 0.3629600536355745 and parameters: {'max_depth': 30, 'learning_rate': 0.005, 'n_estimators': 7972, 'min_child_weight': 270, 'gamma': 0.00047915364840938533, 'alpha': 0.0032774446977253814, 'lambda': 2.7122599836457217, 'colsample_bytree': 0.20040285327613483, 'subsample': 0.5904044405823834}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:19:25,794][0m Trial 9 finished with value: 0.36170930446995425 and parameters: {'max_depth': 27, 'learning_rate': 0.02, 'n_estimators': 3903, 'min_child_weight': 128, 'gamma': 0.004482735446659862, 'alpha': 3.2604728846400284, 'lambda': 0.2743719076802413, 'colsample_bytree': 0.4106749190248834, 'subsample': 0.5648212130422886}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:21:41,041][0m Trial 10 finished with value: 0.3613028000305067 and parameters: {'max_depth': 14, 'learning_rate': 0.05, 'n_estimators': 6712, 'min_child_weight': 5, 'gamma': 0.5730247036799355, 'alpha': 0.00014465902543527305, 'lambda': 0.0002589263188110412, 'colsample_bytree': 0.5041631181048561, 'subsample': 0.17540447385793745}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:22:21,410][0m Trial 11 finished with value: 0.36142006276870753 and parameters: {'max_depth': 9, 'learning_rate': 0.1, 'n_estimators': 2137, 'min_child_weight': 211, 'gamma': 0.023544564958769, 'alpha': 0.0002552936762565455, 'lambda': 0.0029816598066495013, 'colsample_bytree': 0.5512617982176279, 'subsample': 0.32013580164624483}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:23:16,235][0m Trial 12 finished with value: 0.3613651602178991 and parameters: {'max_depth': 11, 'learning_rate': 0.1, 'n_estimators': 2331, 'min_child_weight': 193, 'gamma': 0.0018034175627594014, 'alpha': 0.001064567056094974, 'lambda': 0.004027570136475622, 'colsample_bytree': 0.10810350210059261, 'subsample': 0.3866838589502621}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:24:18,027][0m Trial 13 finished with value: 0.3615828441739153 and parameters: {'max_depth': 21, 'learning_rate': 0.05, 'n_estimators': 3079, 'min_child_weight': 294, 'gamma': 0.00012266582832937473, 'alpha': 0.0008744289969287757, 'lambda': 0.00032362903332064814, 'colsample_bytree': 0.32046790666805547, 'subsample': 0.693144188847007}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:25:51,683][0m Trial 14 finished with value: 0.3619160710902286 and parameters: {'max_depth': 6, 'learning_rate': 0.02, 'n_estimators': 4441, 'min_child_weight': 104, 'gamma': 0.9129719155850229, 'alpha': 0.00741011287941996, 'lambda': 0.036331580939102076, 'colsample_bytree': 0.5813207889911929, 'subsample': 0.4825304216345405}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:26:41,570][0m Trial 15 finished with value: 0.3614628060403331 and parameters: {'max_depth': 19, 'learning_rate': 0.1, 'n_estimators': 6432, 'min_child_weight': 6, 'gamma': 0.03141146904358259, 'alpha': 0.00011732627723583629, 'lambda': 7.652081733003675, 'colsample_bytree': 0.47077147506141825, 'subsample': 0.35260716510211076}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:28:28,478][0m Trial 16 finished with value: 0.3613120366658135 and parameters: {'max_depth': 10, 'learning_rate': 0.1, 'n_estimators': 5422, 'min_child_weight': 167, 'gamma': 0.014651861500407605, 'alpha': 0.0010889473983000165, 'lambda': 0.522255718317253, 'colsample_bytree': 0.7931896688412554, 'subsample': 0.10790403119775621}. Best is trial 2 with value: 0.3612390713217165.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:31:26,412][0m Trial 17 finished with value: 0.3612260417821249 and parameters: {'max_depth': 14, 'learning_rate': 0.08, 'n_estimators': 7394, 'min_child_weight': 246, 'gamma': 0.06732911303981783, 'alpha': 0.0004101244873955208, 'lambda': 0.0009058648405874197, 'colsample_bytree': 0.5988898442545675, 'subsample': 0.6940182392809582}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:33:40,067][0m Trial 18 finished with value: 0.361586584622314 and parameters: {'max_depth': 15, 'learning_rate': 0.08, 'n_estimators': 7225, 'min_child_weight': 267, 'gamma': 0.07030338202153903, 'alpha': 0.01848843553722974, 'lambda': 0.0007893779746478103, 'colsample_bytree': 0.6121451791055772, 'subsample': 0.6671164968445201}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:36:53,511][0m Trial 19 finished with value: 0.3612269809894618 and parameters: {'max_depth': 24, 'learning_rate': 0.08, 'n_estimators': 7803, 'min_child_weight': 243, 'gamma': 0.3643693856777806, 'alpha': 0.0005368277561186646, 'lambda': 0.00010448729140581447, 'colsample_bytree': 0.5222142055075876, 'subsample': 0.7978445581094498}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:40:31,929][0m Trial 20 finished with value: 0.3613288939666214 and parameters: {'max_depth': 25, 'learning_rate': 0.08, 'n_estimators': 7909, 'min_child_weight': 227, 'gamma': 0.06088124441833183, 'alpha': 0.0028013485711504524, 'lambda': 0.00013586922777302619, 'colsample_bytree': 0.42473656764024187, 'subsample': 0.7978283133966075}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:43:25,938][0m Trial 21 finished with value: 0.3612288124047564 and parameters: {'max_depth': 19, 'learning_rate': 0.08, 'n_estimators': 7038, 'min_child_weight': 245, 'gamma': 0.41886019884165526, 'alpha': 0.0004999998087296349, 'lambda': 0.0006324137813027628, 'colsample_bytree': 0.5186271712930692, 'subsample': 0.7066635815354909}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:46:28,719][0m Trial 22 finished with value: 0.3612275524197875 and parameters: {'max_depth': 19, 'learning_rate': 0.08, 'n_estimators': 7446, 'min_child_weight': 245, 'gamma': 0.31232389682278555, 'alpha': 0.0005158865304474763, 'lambda': 0.0001066430248150985, 'colsample_bytree': 0.6231976473901057, 'subsample': 0.6578311672648273}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:51:33,714][0m Trial 23 finished with value: 0.36136104252781637 and parameters: {'max_depth': 24, 'learning_rate': 0.08, 'n_estimators': 7684, 'min_child_weight': 177, 'gamma': 0.24990735510533568, 'alpha': 0.0036147402593759044, 'lambda': 0.00010137577906301424, 'colsample_bytree': 0.6313057155244408, 'subsample': 0.6542684996187386}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:54:00,893][0m Trial 24 finished with value: 0.3612365547319483 and parameters: {'max_depth': 21, 'learning_rate': 0.08, 'n_estimators': 6068, 'min_child_weight': 249, 'gamma': 0.08395954532824601, 'alpha': 0.00029611712227329607, 'lambda': 0.0007802136209453933, 'colsample_bytree': 0.7060312631640083, 'subsample': 0.5511817438282092}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:56:24,414][0m Trial 25 finished with value: 0.36331133363468626 and parameters: {'max_depth': 18, 'learning_rate': 0.005, 'n_estimators': 7523, 'min_child_weight': 288, 'gamma': 0.275522045932879, 'alpha': 0.0001000995289606605, 'lambda': 0.00026919283208468373, 'colsample_bytree': 0.5798541817080574, 'subsample': 0.748249759698365}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 18:56:59,886][0m Trial 26 finished with value: 0.36170450542943566 and parameters: {'max_depth': 13, 'learning_rate': 0.02, 'n_estimators': 6619, 'min_child_weight': 235, 'gamma': 0.13975269444809058, 'alpha': 0.31642439200022465, 'lambda': 0.00010294997620047247, 'colsample_bytree': 0.4731825489997388, 'subsample': 0.6229581204017358}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:00:34,325][0m Trial 27 finished with value: 0.3612856367909741 and parameters: {'max_depth': 21, 'learning_rate': 0.08, 'n_estimators': 6120, 'min_child_weight': 269, 'gamma': 0.9802795105380809, 'alpha': 0.00187805820054893, 'lambda': 0.0014573434809111603, 'colsample_bytree': 0.764873476866755, 'subsample': 0.791385217990656}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:03:35,527][0m Trial 28 finished with value: 0.3612677879504493 and parameters: {'max_depth': 26, 'learning_rate': 0.05, 'n_estimators': 7530, 'min_child_weight': 210, 'gamma': 0.046501067534578576, 'alpha': 0.00042141563430006285, 'lambda': 0.00034739269372700664, 'colsample_bytree': 0.675607116554215, 'subsample': 0.641909083579672}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:06:02,661][0m Trial 29 finished with value: 0.3615817012462798 and parameters: {'max_depth': 16, 'learning_rate': 0.08, 'n_estimators': 6850, 'min_child_weight': 299, 'gamma': 0.01634146185504153, 'alpha': 0.015687682071381187, 'lambda': 0.005565598373451915, 'colsample_bytree': 0.34501562351855775, 'subsample': 0.4882970010329588}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:08:08,167][0m Trial 30 finished with value: 0.36125226111770165 and parameters: {'max_depth': 17, 'learning_rate': 0.08, 'n_estimators': 5107, 'min_child_weight': 179, 'gamma': 0.4256283835755329, 'alpha': 0.0005176737414529361, 'lambda': 0.00019176267834612908, 'colsample_bytree': 0.600906188992181, 'subsample': 0.752115850884647}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:10:59,844][0m Trial 31 finished with value: 0.36122756785157983 and parameters: {'max_depth': 20, 'learning_rate': 0.08, 'n_estimators': 7109, 'min_child_weight': 245, 'gamma': 0.367068605633052, 'alpha': 0.0004330756093214202, 'lambda': 0.000526507121289756, 'colsample_bytree': 0.5482910949410541, 'subsample': 0.6891780970498977}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:11:08,699][0m Trial 32 finished with value: 0.3617240280963928 and parameters: {'max_depth': 22, 'learning_rate': 0.08, 'n_estimators': 7930, 'min_child_weight': 227, 'gamma': 0.1866170093137396, 'alpha': 9.203370675876899, 'lambda': 0.0005131682618323403, 'colsample_bytree': 0.5430008887909364, 'subsample': 0.6066217486898555}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:14:52,832][0m Trial 33 finished with value: 0.36129088755693445 and parameters: {'max_depth': 28, 'learning_rate': 0.08, 'n_estimators': 7199, 'min_child_weight': 259, 'gamma': 0.11529264308531775, 'alpha': 0.0019937777009273344, 'lambda': 0.0016231116384871208, 'colsample_bytree': 0.6305694151269583, 'subsample': 0.6798681360603964}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:17:30,069][0m Trial 34 finished with value: 0.3612325371511268 and parameters: {'max_depth': 20, 'learning_rate': 0.08, 'n_estimators': 6394, 'min_child_weight': 278, 'gamma': 0.3895010515618624, 'alpha': 0.0005428077718550739, 'lambda': 0.010926119144705362, 'colsample_bytree': 0.4601012225695761, 'subsample': 0.7041109140335146}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:21:01,005][0m Trial 35 finished with value: 0.36140471405598157 and parameters: {'max_depth': 17, 'learning_rate': 0.08, 'n_estimators': 7508, 'min_child_weight': 247, 'gamma': 0.6013164477106506, 'alpha': 0.0060884465600827544, 'lambda': 0.02222666919469234, 'colsample_bytree': 0.5638502263324424, 'subsample': 0.7282086584835473}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:23:38,479][0m Trial 36 finished with value: 0.36122837876019454 and parameters: {'max_depth': 13, 'learning_rate': 0.08, 'n_estimators': 6989, 'min_child_weight': 194, 'gamma': 0.04430369207773467, 'alpha': 0.00023186351369013043, 'lambda': 0.000979252688195769, 'colsample_bytree': 0.4974556628743642, 'subsample': 0.5527200751719646}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:24:30,011][0m Trial 37 finished with value: 0.36162241872934936 and parameters: {'max_depth': 23, 'learning_rate': 0.08, 'n_estimators': 5736, 'min_child_weight': 224, 'gamma': 0.11059352624828496, 'alpha': 0.06465424798264194, 'lambda': 0.0027364314550677527, 'colsample_bytree': 0.7219213773155745, 'subsample': 0.7615995522936411}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:26:54,973][0m Trial 38 finished with value: 0.36313691519674374 and parameters: {'max_depth': 25, 'learning_rate': 0.005, 'n_estimators': 7723, 'min_child_weight': 198, 'gamma': 0.29949055696664584, 'alpha': 0.0014464267994372187, 'lambda': 0.00048486951036363354, 'colsample_bytree': 0.6511021645328151, 'subsample': 0.6406366247111284}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:27:07,362][0m Trial 39 finished with value: 0.36170485535135716 and parameters: {'max_depth': 19, 'learning_rate': 0.08, 'n_estimators': 7235, 'min_child_weight': 257, 'gamma': 0.6771290293322637, 'alpha': 0.4194113363490923, 'lambda': 0.00016023184779704163, 'colsample_bytree': 0.41218083138607325, 'subsample': 0.44309950789243835}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:28:31,420][0m Trial 40 finished with value: 0.3619968153002285 and parameters: {'max_depth': 23, 'learning_rate': 0.02, 'n_estimators': 4450, 'min_child_weight': 281, 'gamma': 0.14544515230514674, 'alpha': 0.0006863158210507651, 'lambda': 0.00019899067167084868, 'colsample_bytree': 0.36350958877085704, 'subsample': 0.7195132803157045}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:31:14,862][0m Trial 41 finished with value: 0.36122915224853436 and parameters: {'max_depth': 13, 'learning_rate': 0.08, 'n_estimators': 6873, 'min_child_weight': 238, 'gamma': 0.044164003522713914, 'alpha': 0.00023374861931800683, 'lambda': 0.001039073193770106, 'colsample_bytree': 0.5046604613098994, 'subsample': 0.5683634641053974}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




[32m[I 2022-03-28 19:33:53,718][0m Trial 42 finished with value: 0.36122841244257003 and parameters: {'max_depth': 8, 'learning_rate': 0.08, 'n_estimators': 7017, 'min_child_weight': 160, 'gamma': 0.009871826831051008, 'alpha': 0.00020329970053225435, 'lambda': 0.001210583901347616, 'colsample_bytree': 0.4874780627615151, 'subsample': 0.5317568074915056}. Best is trial 17 with value: 0.3612260417821249.[0m


Parameters: { "colsample_bytree", "gamma", "max_depth", "min_child_weight", "subsample" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




KeyboardInterrupt: 

In [72]:
def k_fold_cross_valid(model,x_train,y_train,n_splits=5):
    
    X = x_train.copy()
    y = y_train.copy()

    from sklearn.model_selection import KFold
    kf = KFold(n_splits=5)
    kf.get_n_splits(X)
    res = []

    for train_index, test_index in kf.split(X):
        
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        model.fit(X_train,y_train)
        y_pred = model.predict(X_test)
    
        res.append(metric(y_test,y_pred))
        
    print("RMSE:",np.array(res).mean())

In [71]:
def metric(y_test, y_pred):
    rmse = mean_squared_error(y_test, y_pred , squared=False)
    return rmse

In [87]:
%%time
xgb_reg = xgb.XGBRegressor(n_estimators=100,max_depth=6,
                                learning_rate=0.08,
                                subsample=0.7,gamma = 0.067,alpha=0.0004,
                                colsample_bytree=0.6,min_child_weight=240,
                                random_state = 42)
k_fold_cross_valid(xgb_reg,train_df,target,n_splits=3)

RMSE: 0.3598296035265301
Wall time: 1min 21s


In [88]:
xgb_pred = xgb_reg.predict(test_df)
xgbp = pd.DataFrame(xgb_pred,columns=['Accident_risk_index'])
final1=pd.concat([test['postcode'],xgbp['Accident_risk_index']],axis=1)
final1=final1.groupby('postcode',as_index=False).agg({'Accident_risk_index' : 'mean'})
final1.to_csv("xgb_pred.csv",index=False)

In [89]:
final1

Unnamed: 0,postcode,Accident_risk_index
0,AB10 1AU,1.455465
1,AB10 1PG,1.460550
2,AB10 1TT,1.503684
3,AB10 1YP,1.458755
4,AB10 6LQ,1.425075
...,...,...
49767,ZE2 9LZ,1.429066
49768,ZE2 9RE,1.435767
49769,ZE2 9RJ,1.412882
49770,ZE2 9SB,1.486296
