In [1]:
from google.colab import files
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from lightgbm import LGBMClassifier
from xgboost.sklearn import XGBClassifier

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
import time

In [2]:
uploaded = files.upload()

Saving test.csv to test.csv
Saving train.csv to train.csv
Saving sample_submission.csv to sample_submission.csv


In [2]:
ls

column_mul.csv     [0m[01;34msample_data[0m/           test.csv
my_submission.csv  sample_submission.csv  train.csv


In [3]:
train = pd.read_csv("train.csv")
test  = pd.read_csv("test.csv")

In [4]:
train.shape, test.shape

((58592, 44), (39063, 43))

In [None]:
train.dtypes

In [5]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [6]:
# Expanding "max_torque" feature
train["torque"] = train["max_torque"].str.split("Nm@|rpm", expand=True)[0].astype(float)
train["rpm"]    = train["max_torque"].str.split("Nm@|rpm", expand=True)[1].astype(float)
train           = train.drop(["max_torque"], axis=1)

test["torque"] = test["max_torque"].str.split("Nm@|rpm", expand=True)[0].astype(float)
test["rpm"]    = test["max_torque"].str.split("Nm@|rpm", expand=True)[1].astype(float)
test           = test.drop(["max_torque"], axis=1)

# Expanding "max_power" feature
train["power"] = train["max_power"].str.split("bhp@|rpm", expand=True)[0].astype(float)
train["rpm_2"] = train["max_power"].str.split("bhp@|rpm", expand=True)[1].astype(float)
train          = train.drop(["max_power"], axis=1)

test["power"] = test["max_power"].str.split("bhp@|rpm", expand=True)[0].astype(float)
test["rpm_2"] = test["max_power"].str.split("bhp@|rpm", expand=True)[1].astype(float)
test          = test.drop(["max_power"], axis=1)

In [7]:
train.shape, test.shape

((58592, 46), (39063, 45))

In [8]:
# Heuristic approach to create new features by multiplying
# existing columns. This way, you can introduce non-linearity to your data set.
from itertools import combinations
# Should include numerical and One Hot Encoded columns. 
# Be careful before including label encoded columns.
used_cols      = ['policy_tenure', 'age_of_car', 'age_of_policyholder','population_density',
                  'airbags','displacement', 'cylinder','gear_box','turning_radius',
                  'length', 'width', 'height', 'gross_weight','torque', 'rpm','power', 'rpm_2']

df             = train.copy()
cc             = list(combinations(used_cols,2))
df_new         = pd.concat([df[c[1]].multiply(df[c[0]]) for c in cc], axis=1, keys=cc)
df_new.columns = df_new.columns.map('_'.join)
df             = pd.concat((df, df_new), axis=1)
train          = df.copy()
del [[df, df_new]]

df             = test.copy()
cc             = list(combinations(used_cols,2))
df_new         = pd.concat([df[c[1]].multiply(df[c[0]]) for c in cc], axis=1, keys=cc)
df_new.columns = df_new.columns.map('_'.join)
df             = pd.concat((df, df_new), axis=1)
test           = df.copy()
del [[df, df_new]]

In [9]:
train.shape, test.shape

((58592, 182), (39063, 181))

In [10]:
train.head(2)

Unnamed: 0,policy_id,policy_tenure,age_of_car,age_of_policyholder,area_cluster,population_density,make,segment,model,fuel_type,engine_type,airbags,is_esc,is_adjustable_steering,is_tpms,is_parking_sensors,is_parking_camera,rear_brakes_type,displacement,cylinder,transmission_type,gear_box,steering_type,turning_radius,length,width,height,gross_weight,is_front_fog_lights,is_rear_window_wiper,is_rear_window_washer,is_rear_window_defogger,is_brake_assist,is_power_door_locks,is_central_locking,is_power_steering,is_driver_seat_height_adjustable,is_day_night_rear_view_mirror,is_ecw,is_speed_alert,ncap_rating,is_claim,torque,rpm,power,rpm_2,policy_tenure_age_of_car,policy_tenure_age_of_policyholder,policy_tenure_population_density,policy_tenure_airbags,policy_tenure_displacement,policy_tenure_cylinder,policy_tenure_gear_box,policy_tenure_turning_radius,policy_tenure_length,policy_tenure_width,policy_tenure_height,policy_tenure_gross_weight,policy_tenure_torque,policy_tenure_rpm,policy_tenure_power,policy_tenure_rpm_2,age_of_car_age_of_policyholder,age_of_car_population_density,age_of_car_airbags,age_of_car_displacement,age_of_car_cylinder,age_of_car_gear_box,age_of_car_turning_radius,age_of_car_length,age_of_car_width,age_of_car_height,age_of_car_gross_weight,age_of_car_torque,age_of_car_rpm,age_of_car_power,age_of_car_rpm_2,age_of_policyholder_population_density,age_of_policyholder_airbags,age_of_policyholder_displacement,age_of_policyholder_cylinder,age_of_policyholder_gear_box,age_of_policyholder_turning_radius,age_of_policyholder_length,age_of_policyholder_width,age_of_policyholder_height,age_of_policyholder_gross_weight,age_of_policyholder_torque,age_of_policyholder_rpm,age_of_policyholder_power,age_of_policyholder_rpm_2,population_density_airbags,population_density_displacement,population_density_cylinder,population_density_gear_box,population_density_turning_radius,population_density_length,population_density_width,population_density_height,population_density_gross_weight,population_density_torque,population_density_rpm,population_density_power,population_density_rpm_2,airbags_displacement,airbags_cylinder,airbags_gear_box,airbags_turning_radius,airbags_length,airbags_width,airbags_height,airbags_gross_weight,airbags_torque,airbags_rpm,airbags_power,airbags_rpm_2,displacement_cylinder,displacement_gear_box,displacement_turning_radius,displacement_length,displacement_width,displacement_height,displacement_gross_weight,displacement_torque,displacement_rpm,displacement_power,displacement_rpm_2,cylinder_gear_box,cylinder_turning_radius,cylinder_length,cylinder_width,cylinder_height,cylinder_gross_weight,cylinder_torque,cylinder_rpm,cylinder_power,cylinder_rpm_2,gear_box_turning_radius,gear_box_length,gear_box_width,gear_box_height,gear_box_gross_weight,gear_box_torque,gear_box_rpm,gear_box_power,gear_box_rpm_2,turning_radius_length,turning_radius_width,turning_radius_height,turning_radius_gross_weight,turning_radius_torque,turning_radius_rpm,turning_radius_power,turning_radius_rpm_2,length_width,length_height,length_gross_weight,length_torque,length_rpm,length_power,length_rpm_2,width_height,width_gross_weight,width_torque,width_rpm,width_power,width_rpm_2,height_gross_weight,height_torque,height_rpm,height_power,height_rpm_2,gross_weight_torque,gross_weight_rpm,gross_weight_power,gross_weight_rpm_2,torque_rpm,torque_power,torque_rpm_2,rpm_power,rpm_rpm_2,power_rpm_2
0,ID00001,0.515874,0.05,0.644231,C1,4990,1,A,M1,CNG,F8D Petrol Engine,2,No,No,No,Yes,No,Drum,796,3,Manual,5,Power,4.6,3445,1515,1475,1185,No,No,No,No,No,No,No,Yes,No,No,No,Yes,0,0,60.0,3500.0,40.36,6000.0,0.025794,0.332342,2574.209214,1.031747,410.635378,1.547621,2.579368,2.373019,1777.184517,781.548489,760.913545,611.310204,30.952415,1805.557565,20.820658,3095.24154,0.032212,249.5,0.1,39.8,0.15,0.25,0.23,172.25,75.75,73.75,59.25,3.0,175.0,2.018,300.0,3214.711538,1.288462,512.807692,1.932692,3.221154,2.963462,2219.375,976.009615,950.240385,763.413462,38.653846,2254.807692,26.001154,3865.384615,9980,3972040,14970,24950,22954.0,17190550,7559850,7360250,5913150,299400.0,17465000.0,201396.4,29940000.0,1592,6,10,9.2,6890,3030,2950,2370,120.0,7000.0,80.72,12000.0,2388,3980,3661.6,2742220,1205940,1174100,943260,47760.0,2786000.0,32126.56,4776000.0,15,13.8,10335,4545,4425,3555,180.0,10500.0,121.08,18000.0,23.0,17225,7575,7375,5925,300.0,17500.0,201.8,30000.0,15847.0,6969.0,6785.0,5451.0,276.0,16100.0,185.656,27600.0,5219175,5081375,4082325,206700.0,12057500.0,139040.2,20670000.0,2234625,1795275,90900.0,5302500.0,61145.4,9090000.0,1747875,88500.0,5162500.0,59531.0,8850000.0,71100.0,4147500.0,47826.6,7110000.0,210000.0,2421.6,360000.0,141260.0,21000000.0,242160.0
1,ID00002,0.672619,0.02,0.375,C2,27003,1,A,M1,CNG,F8D Petrol Engine,2,No,No,No,Yes,No,Drum,796,3,Manual,5,Power,4.6,3445,1515,1475,1185,No,No,No,No,No,No,No,Yes,No,No,No,Yes,0,0,60.0,3500.0,40.36,6000.0,0.013452,0.252232,18162.717721,1.345237,535.404337,2.017856,3.363093,3.094045,2317.170779,1019.017048,992.112307,797.052939,40.357111,2354.164797,27.146883,4035.711081,0.0075,540.06,0.04,15.92,0.06,0.1,0.092,68.9,30.3,29.5,23.7,1.2,70.0,0.8072,120.0,10126.125,0.75,298.5,1.125,1.875,1.725,1291.875,568.125,553.125,444.375,22.5,1312.5,15.135,2250.0,54006,21494388,81009,135015,124213.8,93025335,40909545,39829425,31998555,1620180.0,94510500.0,1089841.08,162018000.0,1592,6,10,9.2,6890,3030,2950,2370,120.0,7000.0,80.72,12000.0,2388,3980,3661.6,2742220,1205940,1174100,943260,47760.0,2786000.0,32126.56,4776000.0,15,13.8,10335,4545,4425,3555,180.0,10500.0,121.08,18000.0,23.0,17225,7575,7375,5925,300.0,17500.0,201.8,30000.0,15847.0,6969.0,6785.0,5451.0,276.0,16100.0,185.656,27600.0,5219175,5081375,4082325,206700.0,12057500.0,139040.2,20670000.0,2234625,1795275,90900.0,5302500.0,61145.4,9090000.0,1747875,88500.0,5162500.0,59531.0,8850000.0,71100.0,4147500.0,47826.6,7110000.0,210000.0,2421.6,360000.0,141260.0,21000000.0,242160.0


In [11]:
train.isnull().sum().sum(), test.isnull().sum().sum() 

(0, 0)

In [12]:
# Label Encoding
from sklearn.preprocessing import LabelEncoder

lb_columns = ["area_cluster", "segment", "model", "fuel_type", 
              "engine_type", "is_esc",
              "is_adjustable_steering", "is_tpms", "is_parking_sensors",
              "is_parking_camera", "rear_brakes_type", "transmission_type",
              "steering_type", "is_front_fog_lights", "is_rear_window_wiper",
              "is_rear_window_washer", "is_rear_window_defogger", "is_brake_assist",
              "is_power_door_locks", "is_central_locking", "is_power_steering",
              "is_driver_seat_height_adjustable", "is_day_night_rear_view_mirror",
              "is_ecw", "is_speed_alert"]

for col in lb_columns:
  lb = LabelEncoder() 
  train[col] = lb.fit_transform(train[col])
  test[col]  = lb.transform(test[col])

In [13]:
train.head(2)

Unnamed: 0,policy_id,policy_tenure,age_of_car,age_of_policyholder,area_cluster,population_density,make,segment,model,fuel_type,engine_type,airbags,is_esc,is_adjustable_steering,is_tpms,is_parking_sensors,is_parking_camera,rear_brakes_type,displacement,cylinder,transmission_type,gear_box,steering_type,turning_radius,length,width,height,gross_weight,is_front_fog_lights,is_rear_window_wiper,is_rear_window_washer,is_rear_window_defogger,is_brake_assist,is_power_door_locks,is_central_locking,is_power_steering,is_driver_seat_height_adjustable,is_day_night_rear_view_mirror,is_ecw,is_speed_alert,ncap_rating,is_claim,torque,rpm,power,rpm_2,policy_tenure_age_of_car,policy_tenure_age_of_policyholder,policy_tenure_population_density,policy_tenure_airbags,policy_tenure_displacement,policy_tenure_cylinder,policy_tenure_gear_box,policy_tenure_turning_radius,policy_tenure_length,policy_tenure_width,policy_tenure_height,policy_tenure_gross_weight,policy_tenure_torque,policy_tenure_rpm,policy_tenure_power,policy_tenure_rpm_2,age_of_car_age_of_policyholder,age_of_car_population_density,age_of_car_airbags,age_of_car_displacement,age_of_car_cylinder,age_of_car_gear_box,age_of_car_turning_radius,age_of_car_length,age_of_car_width,age_of_car_height,age_of_car_gross_weight,age_of_car_torque,age_of_car_rpm,age_of_car_power,age_of_car_rpm_2,age_of_policyholder_population_density,age_of_policyholder_airbags,age_of_policyholder_displacement,age_of_policyholder_cylinder,age_of_policyholder_gear_box,age_of_policyholder_turning_radius,age_of_policyholder_length,age_of_policyholder_width,age_of_policyholder_height,age_of_policyholder_gross_weight,age_of_policyholder_torque,age_of_policyholder_rpm,age_of_policyholder_power,age_of_policyholder_rpm_2,population_density_airbags,population_density_displacement,population_density_cylinder,population_density_gear_box,population_density_turning_radius,population_density_length,population_density_width,population_density_height,population_density_gross_weight,population_density_torque,population_density_rpm,population_density_power,population_density_rpm_2,airbags_displacement,airbags_cylinder,airbags_gear_box,airbags_turning_radius,airbags_length,airbags_width,airbags_height,airbags_gross_weight,airbags_torque,airbags_rpm,airbags_power,airbags_rpm_2,displacement_cylinder,displacement_gear_box,displacement_turning_radius,displacement_length,displacement_width,displacement_height,displacement_gross_weight,displacement_torque,displacement_rpm,displacement_power,displacement_rpm_2,cylinder_gear_box,cylinder_turning_radius,cylinder_length,cylinder_width,cylinder_height,cylinder_gross_weight,cylinder_torque,cylinder_rpm,cylinder_power,cylinder_rpm_2,gear_box_turning_radius,gear_box_length,gear_box_width,gear_box_height,gear_box_gross_weight,gear_box_torque,gear_box_rpm,gear_box_power,gear_box_rpm_2,turning_radius_length,turning_radius_width,turning_radius_height,turning_radius_gross_weight,turning_radius_torque,turning_radius_rpm,turning_radius_power,turning_radius_rpm_2,length_width,length_height,length_gross_weight,length_torque,length_rpm,length_power,length_rpm_2,width_height,width_gross_weight,width_torque,width_rpm,width_power,width_rpm_2,height_gross_weight,height_torque,height_rpm,height_power,height_rpm_2,gross_weight_torque,gross_weight_rpm,gross_weight_power,gross_weight_rpm_2,torque_rpm,torque_power,torque_rpm_2,rpm_power,rpm_rpm_2,power_rpm_2
0,ID00001,0.515874,0.05,0.644231,0,4990,1,0,0,0,6,2,0,0,0,1,0,1,796,3,1,5,2,4.6,3445,1515,1475,1185,0,0,0,0,0,0,0,1,0,0,0,1,0,0,60.0,3500.0,40.36,6000.0,0.025794,0.332342,2574.209214,1.031747,410.635378,1.547621,2.579368,2.373019,1777.184517,781.548489,760.913545,611.310204,30.952415,1805.557565,20.820658,3095.24154,0.032212,249.5,0.1,39.8,0.15,0.25,0.23,172.25,75.75,73.75,59.25,3.0,175.0,2.018,300.0,3214.711538,1.288462,512.807692,1.932692,3.221154,2.963462,2219.375,976.009615,950.240385,763.413462,38.653846,2254.807692,26.001154,3865.384615,9980,3972040,14970,24950,22954.0,17190550,7559850,7360250,5913150,299400.0,17465000.0,201396.4,29940000.0,1592,6,10,9.2,6890,3030,2950,2370,120.0,7000.0,80.72,12000.0,2388,3980,3661.6,2742220,1205940,1174100,943260,47760.0,2786000.0,32126.56,4776000.0,15,13.8,10335,4545,4425,3555,180.0,10500.0,121.08,18000.0,23.0,17225,7575,7375,5925,300.0,17500.0,201.8,30000.0,15847.0,6969.0,6785.0,5451.0,276.0,16100.0,185.656,27600.0,5219175,5081375,4082325,206700.0,12057500.0,139040.2,20670000.0,2234625,1795275,90900.0,5302500.0,61145.4,9090000.0,1747875,88500.0,5162500.0,59531.0,8850000.0,71100.0,4147500.0,47826.6,7110000.0,210000.0,2421.6,360000.0,141260.0,21000000.0,242160.0
1,ID00002,0.672619,0.02,0.375,11,27003,1,0,0,0,6,2,0,0,0,1,0,1,796,3,1,5,2,4.6,3445,1515,1475,1185,0,0,0,0,0,0,0,1,0,0,0,1,0,0,60.0,3500.0,40.36,6000.0,0.013452,0.252232,18162.717721,1.345237,535.404337,2.017856,3.363093,3.094045,2317.170779,1019.017048,992.112307,797.052939,40.357111,2354.164797,27.146883,4035.711081,0.0075,540.06,0.04,15.92,0.06,0.1,0.092,68.9,30.3,29.5,23.7,1.2,70.0,0.8072,120.0,10126.125,0.75,298.5,1.125,1.875,1.725,1291.875,568.125,553.125,444.375,22.5,1312.5,15.135,2250.0,54006,21494388,81009,135015,124213.8,93025335,40909545,39829425,31998555,1620180.0,94510500.0,1089841.08,162018000.0,1592,6,10,9.2,6890,3030,2950,2370,120.0,7000.0,80.72,12000.0,2388,3980,3661.6,2742220,1205940,1174100,943260,47760.0,2786000.0,32126.56,4776000.0,15,13.8,10335,4545,4425,3555,180.0,10500.0,121.08,18000.0,23.0,17225,7575,7375,5925,300.0,17500.0,201.8,30000.0,15847.0,6969.0,6785.0,5451.0,276.0,16100.0,185.656,27600.0,5219175,5081375,4082325,206700.0,12057500.0,139040.2,20670000.0,2234625,1795275,90900.0,5302500.0,61145.4,9090000.0,1747875,88500.0,5162500.0,59531.0,8850000.0,71100.0,4147500.0,47826.6,7110000.0,210000.0,2421.6,360000.0,141260.0,21000000.0,242160.0


In [14]:
#One hot encoding
# This can be done through the pipeline but I preferred to follow the tutorial
ohe_columns = ['area_cluster','make','segment','model','fuel_type',
               'engine_type','steering_type']

train = pd.get_dummies(train, columns = ohe_columns, drop_first=True)
test  = pd.get_dummies(test,  columns = ohe_columns, drop_first=True)

In [15]:
train.shape, test.shape

((58592, 229), (39063, 228))

In [16]:
# Chi square test for feature selection
from sklearn.feature_selection import chi2

# Label Encoding Needed for chi2 module to work properly
lb_df    = train.copy()   # Copy is important

X = lb_df.drop(['policy_id','is_claim'],axis=1)
y = lb_df['is_claim']

chi_scores = chi2(X,y)
p_val      = pd.Series(chi_scores[1],index = X.columns)

# Dependent features according to Chi-Squared Test
# we are using 95% confidence interval for this test
#print("Important fetures are:")
#print("-----------------------------------")
imp_features   = []
unimp_features = []
for col in X.columns:
  if p_val.loc[col] <= 0.05:
    #print(col, "p value:", round(p_val.loc[col],3))
    imp_features.append(col)
  else:
    unimp_features.append(col)

In [17]:
len(unimp_features)

74

In [18]:
train = train.drop(unimp_features, axis =1)
test  = test.drop(unimp_features,  axis =1)

In [19]:
train.shape, test.shape

((58592, 155), (39063, 154))

In [20]:
# Highly imbalanced
train["is_claim"].value_counts(normalize=True)

0    0.936032
1    0.063968
Name: is_claim, dtype: float64

In [36]:
# Definig the features and target
X_train = train.drop(["is_claim", "policy_id"], axis =1)
y_train = train["is_claim"]

X_test  = test.drop(["policy_id"], axis =1)

In [37]:
# Scaling The Features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
y_train_scaled = y_train.copy()

X_test_scaled  = scaler.transform(X_test)

In [38]:
# SMOTEEN
from imblearn.combine import SMOTEENN
smt          = SMOTEENN(sampling_strategy= 0.5)
X_smt, y_smt = smt.fit_resample(X_train_scaled, y_train_scaled)

y_smt.value_counts(normalize=True)*100

0    69.429954
1    30.570046
Name: is_claim, dtype: float64

In [39]:
# All Classification Models After SMOTEEN
classifiers = [GaussianNB(), MultinomialNB(),
               SGDClassifier(), KNeighborsClassifier(), DecisionTreeClassifier(),
               RandomForestClassifier(), GradientBoostingClassifier(),
               LGBMClassifier(), XGBClassifier()]

score = []
for clf in classifiers:
  print("Solving Model:", str(clf)[:-2], end = ' ')
  start  = time.time()
  model  = clf
  model.fit(X_smt, y_smt)

  val_f1 = np.min(cross_val_score(model, X_smt, y_smt, 
                                   cv=5, scoring="f1"))
  end    = time.time()
  print("Execution Time:",int(end - start), " sec")
  score.append([val_f1,str(clf)[:-2]])
  
score = sorted(score,reverse=True)
for i in range(len(score)):
  print("%26s %15s %6.3f"  %(score[i][1],"Validation F1:",score[i][0]))

Solving Model: GaussianNB Execution Time: 1  sec
Solving Model: MultinomialNB Execution Time: 0  sec
Solving Model: SGDClassifier Execution Time: 14  sec
Solving Model: KNeighborsClassifier Execution Time: 63  sec
Solving Model: DecisionTreeClassifier Execution Time: 33  sec
Solving Model: RandomForestClassifier Execution Time: 168  sec
Solving Model: GradientBoostingClassifier Execution Time: 632  sec
Solving Model: LGBMClassifier Execution Time: 33  sec
Solving Model: XGBClassifier Execution Time: 117  sec
      KNeighborsClassifier  Validation F1:  0.897
    RandomForestClassifier  Validation F1:  0.740
    DecisionTreeClassifier  Validation F1:  0.646
                GaussianNB  Validation F1:  0.525
             MultinomialNB  Validation F1:  0.432
            LGBMClassifier  Validation F1:  0.408
             XGBClassifier  Validation F1:  0.335
GradientBoostingClassifier  Validation F1:  0.329
             SGDClassifier  Validation F1:  0.000


In [48]:
# Final Model Before Tuning
clf = RandomForestClassifier()
clf.fit(X_smt, y_smt)

submission = pd.read_csv('sample_submission.csv')
final_predictions = clf.predict(X_test_scaled)

submission['is_claim'] = final_predictions
submission.to_csv('my_submission.csv', index=False)

In [41]:
clf.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

In [None]:
# Random Forest Random Search
from sklearn.ensemble import RandomForestClassifier
from scipy.stats import randint
from sklearn.model_selection import RandomizedSearchCV

rs_params = {'max_depth':list(np.arange(10, 100, step=10)) + [None],
              'n_estimators':np.arange(10, 500, step=50),
              'max_features':randint(1,7),
              'criterion':['gini','entropy'],
              'min_samples_leaf':randint(1,4),
              'min_samples_split':np.arange(2, 10, step=2)
          }
rs_cv = RandomizedSearchCV(RandomForestClassifier(), rs_params,
                           cv= 5)

# Train on training data or SMOTEEN
rs_cv.fit(X_smt, y_smt)

# Print the best parameters
print(rs_cv.best_params_)

{'criterion': 'gini', 'max_depth': 80, 'max_features': 2, 'min_samples_leaf': 1, 'min_samples_split': 4, 'n_estimators': 210}


In [None]:
# Final Model

submission = pd.read_csv('sample_submission.csv')
final_predictions = rs_cv.predict(X_test_scaled)

submission['is_claim'] = final_predictions
submission.to_csv('my_submission.csv', index=False)