In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings as wr

In [2]:
df=pd.read_csv(r'C:\Users\sameer sheikh\OneDrive\Desktop\datasets\travel.csv')

In [3]:
df.head()

Unnamed: 0,CustomerID,ProdTaken,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfPersonVisiting,NumberOfFollowups,ProductPitched,PreferredPropertyStar,MaritalStatus,NumberOfTrips,Passport,PitchSatisfactionScore,OwnCar,NumberOfChildrenVisiting,Designation,MonthlyIncome
0,200000,1,41.0,Self Enquiry,3,6.0,Salaried,Female,3,3.0,Deluxe,3.0,Single,1.0,1,2,1,0.0,Manager,20993.0
1,200001,0,49.0,Company Invited,1,14.0,Salaried,Male,3,4.0,Deluxe,4.0,Divorced,2.0,0,3,1,2.0,Manager,20130.0
2,200002,1,37.0,Self Enquiry,1,8.0,Free Lancer,Male,3,4.0,Basic,3.0,Single,7.0,1,3,0,0.0,Executive,17090.0
3,200003,0,33.0,Company Invited,1,9.0,Salaried,Female,2,3.0,Basic,3.0,Divorced,2.0,1,5,1,1.0,Executive,17909.0
4,200004,0,,Self Enquiry,1,8.0,Small Business,Male,2,3.0,Basic,4.0,Divorced,1.0,0,5,1,0.0,Executive,18468.0


In [4]:
df.shape

(4888, 20)

# data cleaning
### 1) handling missing values
### 2) handling duplicates
### 3) checking datatype
### 4) understand the dataset

In [5]:
df.isnull().sum()

CustomerID                    0
ProdTaken                     0
Age                         226
TypeofContact                25
CityTier                      0
DurationOfPitch             251
Occupation                    0
Gender                        0
NumberOfPersonVisiting        0
NumberOfFollowups            45
ProductPitched                0
PreferredPropertyStar        26
MaritalStatus                 0
NumberOfTrips               140
Passport                      0
PitchSatisfactionScore        0
OwnCar                        0
NumberOfChildrenVisiting     66
Designation                   0
MonthlyIncome               233
dtype: int64

In [6]:
df.columns

Index(['CustomerID', 'ProdTaken', 'Age', 'TypeofContact', 'CityTier',
       'DurationOfPitch', 'Occupation', 'Gender', 'NumberOfPersonVisiting',
       'NumberOfFollowups', 'ProductPitched', 'PreferredPropertyStar',
       'MaritalStatus', 'NumberOfTrips', 'Passport', 'PitchSatisfactionScore',
       'OwnCar', 'NumberOfChildrenVisiting', 'Designation', 'MonthlyIncome'],
      dtype='object')

In [7]:
#check all the categorical columns
print(df['TypeofContact'].value_counts())
print('-----------------------------------------------')
print(df['Occupation'].value_counts())
print('-----------------------------------------------')
print(df['Gender'].value_counts())
print('-----------------------------------------------')
print(df['ProductPitched'].value_counts())
print('-----------------------------------------------')
print(df['MaritalStatus'].value_counts())
print('-----------------------------------------------')
print(df['Designation'].value_counts())

TypeofContact
Self Enquiry       3444
Company Invited    1419
Name: count, dtype: int64
-----------------------------------------------
Occupation
Salaried          2368
Small Business    2084
Large Business     434
Free Lancer          2
Name: count, dtype: int64
-----------------------------------------------
Gender
Male       2916
Female     1817
Fe Male     155
Name: count, dtype: int64
-----------------------------------------------
ProductPitched
Basic           1842
Deluxe          1732
Standard         742
Super Deluxe     342
King             230
Name: count, dtype: int64
-----------------------------------------------
MaritalStatus
Married      2340
Divorced      950
Single        916
Unmarried     682
Name: count, dtype: int64
-----------------------------------------------
Designation
Executive         1842
Manager           1732
Senior Manager     742
AVP                342
VP                 230
Name: count, dtype: int64


In [8]:
df['Gender']=df['Gender'].replace('Fe Male','Female')
df['MaritalStatus']=df['MaritalStatus'].replace('Single','Unmarried')

In [9]:
print(df['TypeofContact'].value_counts())
print('-----------------------------------------------')
print(df['Occupation'].value_counts())
print('-----------------------------------------------')
print(df['Gender'].value_counts())
print('-----------------------------------------------')
print(df['ProductPitched'].value_counts())
print('-----------------------------------------------')
print(df['MaritalStatus'].value_counts())
print('-----------------------------------------------')
print(df['Designation'].value_counts())

TypeofContact
Self Enquiry       3444
Company Invited    1419
Name: count, dtype: int64
-----------------------------------------------
Occupation
Salaried          2368
Small Business    2084
Large Business     434
Free Lancer          2
Name: count, dtype: int64
-----------------------------------------------
Gender
Male      2916
Female    1972
Name: count, dtype: int64
-----------------------------------------------
ProductPitched
Basic           1842
Deluxe          1732
Standard         742
Super Deluxe     342
King             230
Name: count, dtype: int64
-----------------------------------------------
MaritalStatus
Married      2340
Unmarried    1598
Divorced      950
Name: count, dtype: int64
-----------------------------------------------
Designation
Executive         1842
Manager           1732
Senior Manager     742
AVP                342
VP                 230
Name: count, dtype: int64


In [10]:
#checking columns with missing values in %
features_with_na=[feature for feature in df.columns if df[feature].isnull().sum()>=1]
for feature in features_with_na:
    print(feature,'having',round((df[feature].isnull().sum()/len(df[feature]))*100,2),'%','missing values')

Age having 4.62 % missing values
TypeofContact having 0.51 % missing values
DurationOfPitch having 5.14 % missing values
NumberOfFollowups having 0.92 % missing values
PreferredPropertyStar having 0.53 % missing values
NumberOfTrips having 2.86 % missing values
NumberOfChildrenVisiting having 1.35 % missing values
MonthlyIncome having 4.77 % missing values


In [11]:
#checking if there is some outliers or not, if mean and median(50% percentile) are almost same that means there is hardly outliers
df[features_with_na].describe()

Unnamed: 0,Age,DurationOfPitch,NumberOfFollowups,PreferredPropertyStar,NumberOfTrips,NumberOfChildrenVisiting,MonthlyIncome
count,4662.0,4637.0,4843.0,4862.0,4748.0,4822.0,4655.0
mean,37.622265,15.490835,3.708445,3.581037,3.236521,1.187267,23619.853491
std,9.316387,8.519643,1.002509,0.798009,1.849019,0.857861,5380.698361
min,18.0,5.0,1.0,3.0,1.0,0.0,1000.0
25%,31.0,9.0,3.0,3.0,2.0,1.0,20346.0
50%,36.0,13.0,4.0,3.0,3.0,1.0,22347.0
75%,44.0,20.0,4.0,4.0,4.0,2.0,25571.0
max,61.0,127.0,6.0,5.0,22.0,3.0,98678.0


In [12]:
df[features_with_na].head()

Unnamed: 0,Age,TypeofContact,DurationOfPitch,NumberOfFollowups,PreferredPropertyStar,NumberOfTrips,NumberOfChildrenVisiting,MonthlyIncome
0,41.0,Self Enquiry,6.0,3.0,3.0,1.0,0.0,20993.0
1,49.0,Company Invited,14.0,4.0,4.0,2.0,2.0,20130.0
2,37.0,Self Enquiry,8.0,4.0,3.0,7.0,0.0,17090.0
3,33.0,Company Invited,9.0,3.0,3.0,2.0,1.0,17909.0
4,,Self Enquiry,8.0,3.0,4.0,1.0,0.0,18468.0


In [13]:
# imputing null values with median for continous values and mode for categorical values

In [15]:
df['Age'].fillna(df['Age'].median(),inplace=True)
df['TypeofContact'].fillna('Self Enquiry',inplace=True)
df['DurationOfPitch'].fillna(df['DurationOfPitch'].median(),inplace=True)
df['NumberOfFollowups'].fillna(4.0,inplace=True)
df['PreferredPropertyStar'].fillna(3.0,inplace=True)
df['NumberOfTrips'].fillna(0,inplace=True)
df['NumberOfChildrenVisiting'].fillna(1.0,inplace=True)
df['MonthlyIncome'].fillna(df['MonthlyIncome'].median(),inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].median(),inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['DurationOfPitch'].fillna(df['DurationOfPitch'].median(),inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object 

In [16]:
df.isnull().sum()

CustomerID                  0
ProdTaken                   0
Age                         0
TypeofContact               0
CityTier                    0
DurationOfPitch             0
Occupation                  0
Gender                      0
NumberOfPersonVisiting      0
NumberOfFollowups           0
ProductPitched              0
PreferredPropertyStar       0
MaritalStatus               0
NumberOfTrips               0
Passport                    0
PitchSatisfactionScore      0
OwnCar                      0
NumberOfChildrenVisiting    0
Designation                 0
MonthlyIncome               0
dtype: int64

In [17]:
df.drop('CustomerID',inplace=True,axis=1)
df.head()

Unnamed: 0,ProdTaken,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfPersonVisiting,NumberOfFollowups,ProductPitched,PreferredPropertyStar,MaritalStatus,NumberOfTrips,Passport,PitchSatisfactionScore,OwnCar,NumberOfChildrenVisiting,Designation,MonthlyIncome
0,1,41.0,Self Enquiry,3,6.0,Salaried,Female,3,3.0,Deluxe,3.0,Unmarried,1.0,1,2,1,0.0,Manager,20993.0
1,0,49.0,Company Invited,1,14.0,Salaried,Male,3,4.0,Deluxe,4.0,Divorced,2.0,0,3,1,2.0,Manager,20130.0
2,1,37.0,Self Enquiry,1,8.0,Free Lancer,Male,3,4.0,Basic,3.0,Unmarried,7.0,1,3,0,0.0,Executive,17090.0
3,0,33.0,Company Invited,1,9.0,Salaried,Female,2,3.0,Basic,3.0,Divorced,2.0,1,5,1,1.0,Executive,17909.0
4,0,36.0,Self Enquiry,1,8.0,Small Business,Male,2,3.0,Basic,4.0,Divorced,1.0,0,5,1,0.0,Executive,18468.0


In [18]:
df['Totalvisitors']=df['NumberOfPersonVisiting']+df['NumberOfChildrenVisiting']
df.drop(columns=['NumberOfPersonVisiting','NumberOfChildrenVisiting'],inplace=True,axis=1)
df.head()

Unnamed: 0,ProdTaken,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfFollowups,ProductPitched,PreferredPropertyStar,MaritalStatus,NumberOfTrips,Passport,PitchSatisfactionScore,OwnCar,Designation,MonthlyIncome,Totalvisitors
0,1,41.0,Self Enquiry,3,6.0,Salaried,Female,3.0,Deluxe,3.0,Unmarried,1.0,1,2,1,Manager,20993.0,3.0
1,0,49.0,Company Invited,1,14.0,Salaried,Male,4.0,Deluxe,4.0,Divorced,2.0,0,3,1,Manager,20130.0,5.0
2,1,37.0,Self Enquiry,1,8.0,Free Lancer,Male,4.0,Basic,3.0,Unmarried,7.0,1,3,0,Executive,17090.0,3.0
3,0,33.0,Company Invited,1,9.0,Salaried,Female,3.0,Basic,3.0,Divorced,2.0,1,5,1,Executive,17909.0,3.0
4,0,36.0,Self Enquiry,1,8.0,Small Business,Male,3.0,Basic,4.0,Divorced,1.0,0,5,1,Executive,18468.0,2.0


In [19]:
# get all numeric features
num_features=[feature for feature in df.columns if df[feature].dtype!='O']
print('total numbers of numerial features =',len(num_features))

total numbers of numerial features = 12


In [20]:
# get all categorical features
cat_features=[feature for feature in df.columns if df[feature].dtype=='O']
print('total numbers of categorical features =',len(cat_features))

total numbers of categorical features = 6


In [22]:
# get all discreet features
dis_features=[feature for feature in num_features if len(df[feature].unique())<=25]
print('total numbers of discreet features =',len(dis_features))

total numbers of discreet features = 9


In [23]:
# get all continous features
con_features=[feature for feature in num_features if len(df[feature].unique())>=25]
print('total numbers of continous features =',len(con_features))

total numbers of continous features = 3


## train test split and model trainning

In [25]:
x=df.drop(['ProdTaken'],axis=1)
y=df['ProdTaken']

In [26]:
y.head()

0    1
1    0
2    1
3    0
4    0
Name: ProdTaken, dtype: int64

In [27]:
x.head()

Unnamed: 0,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfFollowups,ProductPitched,PreferredPropertyStar,MaritalStatus,NumberOfTrips,Passport,PitchSatisfactionScore,OwnCar,Designation,MonthlyIncome,Totalvisitors
0,41.0,Self Enquiry,3,6.0,Salaried,Female,3.0,Deluxe,3.0,Unmarried,1.0,1,2,1,Manager,20993.0,3.0
1,49.0,Company Invited,1,14.0,Salaried,Male,4.0,Deluxe,4.0,Divorced,2.0,0,3,1,Manager,20130.0,5.0
2,37.0,Self Enquiry,1,8.0,Free Lancer,Male,4.0,Basic,3.0,Unmarried,7.0,1,3,0,Executive,17090.0,3.0
3,33.0,Company Invited,1,9.0,Salaried,Female,3.0,Basic,3.0,Divorced,2.0,1,5,1,Executive,17909.0,3.0
4,36.0,Self Enquiry,1,8.0,Small Business,Male,3.0,Basic,4.0,Divorced,1.0,0,5,1,Executive,18468.0,2.0


In [28]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [29]:
x_train.shape,x_test.shape

((3910, 17), (978, 17))

In [30]:
x.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4888 entries, 0 to 4887
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Age                     4888 non-null   float64
 1   TypeofContact           4888 non-null   object 
 2   CityTier                4888 non-null   int64  
 3   DurationOfPitch         4888 non-null   float64
 4   Occupation              4888 non-null   object 
 5   Gender                  4888 non-null   object 
 6   NumberOfFollowups       4888 non-null   float64
 7   ProductPitched          4888 non-null   object 
 8   PreferredPropertyStar   4888 non-null   float64
 9   MaritalStatus           4888 non-null   object 
 10  NumberOfTrips           4888 non-null   float64
 11  Passport                4888 non-null   int64  
 12  PitchSatisfactionScore  4888 non-null   int64  
 13  OwnCar                  4888 non-null   int64  
 14  Designation             4888 non-null   

# converting categorical columns into numerical

In [32]:
cat=x.select_dtypes(include='object').columns
num=x.select_dtypes(exclude='object').columns

In [33]:
cat

Index(['TypeofContact', 'Occupation', 'Gender', 'ProductPitched',
       'MaritalStatus', 'Designation'],
      dtype='object')

In [34]:
num

Index(['Age', 'CityTier', 'DurationOfPitch', 'NumberOfFollowups',
       'PreferredPropertyStar', 'NumberOfTrips', 'Passport',
       'PitchSatisfactionScore', 'OwnCar', 'MonthlyIncome', 'Totalvisitors'],
      dtype='object')

In [35]:
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.compose import ColumnTransformer
numeric_transformer=StandardScaler()
cat_transformer=OneHotEncoder(drop='first')
transformer=ColumnTransformer([('OneHotEncoder',cat_transformer,cat),
                                ('StandardScaler',numeric_transformer,num)])

In [36]:
transformer

In [37]:
#applying transformation in training(fit_transform)
x_train=transformer.fit_transform(x_train)
#applying transformation in testing(transform)
x_test=transformer.transform(x_test)

In [38]:
pd.DataFrame(x_train).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,-0.7214,-1.02035,1.284279,-0.725271,-0.078776,-0.632399,0.67969,0.782966,-0.382245,-0.774151
1,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,-0.7214,0.690023,0.282777,-0.725271,1.495396,-0.632399,0.67969,0.782966,-0.459799,0.643615
2,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.7214,-1.02035,0.282777,1.771041,0.445948,-0.632399,0.67969,0.782966,-0.245196,-0.065268
3,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,...,-0.7214,-1.02035,1.284279,-0.725271,-0.078776,-0.632399,1.408395,-1.277194,0.213475,-0.065268
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.7214,2.400396,-1.720227,-0.725271,1.495396,-0.632399,-0.049015,-1.277194,-0.024889,2.061382


# Adaboost classifier

In [40]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score,classification_report,ConfusionMatrixDisplay,precision_score,recall_score,f1_score,roc_auc_score,roc_curve

In [41]:
model=AdaBoostClassifier()
model.fit(x_train,y_train)

In [42]:
y_pred_train=model.predict(x_train)
#trainning set performance
train_accuracy=accuracy_score(y_pred_train,y_train)
train_f1_score=f1_score(y_pred_train,y_train,average='weighted')
train_precision_score=precision_score(y_pred_train,y_train)
train_recall_score=recall_score(y_pred_train,y_train)
train_roc_auc_score=roc_auc_score(y_pred_train,y_train)
y_pred_test=model.predict(x_test)
#testing set performance
test_accuracy=accuracy_score(y_pred_test,y_test)
test_f1_score=f1_score(y_pred_test,y_test,average='weighted')
test_precision_score=precision_score(y_pred_test,y_test)
test_recall_score=recall_score(y_pred_test,y_test)
test_roc_auc_score=roc_auc_score(y_pred_test,y_test)
print('model performance for trainning test')
print(f'accuracy= {train_accuracy}')
print(f'f1_score= {train_f1_score}')
print(f'precision_score= {train_precision_score}')
print(f'recall_score= {train_recall_score}')
print(f'roc_auc_score= {train_roc_auc_score}')
print('---------------------------------------------------------------')
print('model performance for test test')
print(f'accuracy= {test_accuracy}')
print(f'f1_score= {test_f1_score}')
print(f'precision_score= {test_precision_score}')
print(f'recall_score= {test_recall_score}')
print(f'roc_auc_score= {test_roc_auc_score}')

model performance for trainning test
accuracy= 0.8580562659846548
f1_score= 0.8779059842729264
precision_score= 0.36899862825788754
recall_score= 0.739010989010989
roc_auc_score= 0.8046436783746428
---------------------------------------------------------------
model performance for test test
accuracy= 0.8394683026584867
f1_score= 0.8634783943305546
precision_score= 0.32460732984293195
recall_score= 0.6888888888888889
roc_auc_score= 0.7718093093093092


# hyper parameter tuning

In [43]:
ada_params={'n_estimators':[50,60,70,80,90],
           'algorithm':['SAMME', 'SAMME.R']}

In [48]:
from sklearn.model_selection import RandomizedSearchCV
model=model=AdaBoostClassifier()
random=RandomizedSearchCV(estimator=model,param_distributions=ada_params,cv=3)
random.fit(x_train,y_train)
random.best_params_

{'n_estimators': 80, 'algorithm': 'SAMME'}

In [49]:
model=AdaBoostClassifier(n_estimators=80,algorithm='SAMME')
model.fit(x_train,y_train)

In [51]:
y_pred_train=model.predict(x_train)
#trainning set performance
train_accuracy=accuracy_score(y_pred_train,y_train)
train_f1_score=f1_score(y_pred_train,y_train,average='weighted')
train_precision_score=precision_score(y_pred_train,y_train)
train_recall_score=recall_score(y_pred_train,y_train)
train_roc_auc_score=roc_auc_score(y_pred_train,y_train)
y_pred_test=model.predict(x_test)
#testing set performance
test_accuracy=accuracy_score(y_pred_test,y_test)
test_f1_score=f1_score(y_pred_test,y_test,average='weighted')
test_precision_score=precision_score(y_pred_test,y_test)
test_recall_score=recall_score(y_pred_test,y_test)
test_roc_auc_score=roc_auc_score(y_pred_test,y_test)
print('model performance for trainning test')
print(f'accuracy= {train_accuracy}')
print(f'f1_score= {train_f1_score}')
print(f'precision_score= {train_precision_score}')
print(f'recall_score= {train_recall_score}')
print(f'roc_auc_score= {train_roc_auc_score}')
print('---------------------------------------------------------------')
print('model performance for test test')
print(f'accuracy= {test_accuracy}')
print(f'f1_score= {test_f1_score}')
print(f'precision_score= {test_precision_score}')
print(f'recall_score= {test_recall_score}')
print(f'roc_auc_score= {test_roc_auc_score}')

model performance for trainning test
accuracy= 0.8465473145780051
f1_score= 0.8798992107262371
precision_score= 0.252400548696845
recall_score= 0.7698744769874477
roc_auc_score= 0.8107067835768075
---------------------------------------------------------------
model performance for test test
accuracy= 0.83640081799591
f1_score= 0.8751176576452021
precision_score= 0.225130890052356
recall_score= 0.7818181818181819
roc_auc_score= 0.8107357431301094


# Gradient boost algo

In [54]:
gb_params={'loss':['log_loss', 'deviance', 'exponential'],
          'criterion':['friedman_mse', 'squared_error'],
          'min_samples_split':[2,8,15,20],
          'n_estimators':[100,300,500],
          'max_depth':[5,8,15,None]}

In [60]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import RandomizedSearchCV
gbmodel=GradientBoostingClassifier()
gbgrid=RandomizedSearchCV(estimator=gbmodel,param_distributions=gb_params,n_jobs=-1)
gbgrid.fit(x_train,y_train)
gbgrid.best_params_



{'n_estimators': 100,
 'min_samples_split': 15,
 'max_depth': 15,
 'loss': 'deviance',
 'criterion': 'squared_error'}

In [61]:
gbmodelll=GradientBoostingClassifier(n_estimators=100,
 min_samples_split=15,
 max_depth=15,
 loss='deviance',
 criterion='squared_error')

In [62]:
gbmodelll.fit(x_train,y_train)



In [64]:
y_pred_train=gbmodelll.predict(x_train)
#trainning set performance
train_accuracy=accuracy_score(y_pred_train,y_train)
train_f1_score=f1_score(y_pred_train,y_train,average='weighted')
train_precision_score=precision_score(y_pred_train,y_train)
train_recall_score=recall_score(y_pred_train,y_train)
train_roc_auc_score=roc_auc_score(y_pred_train,y_train)
y_pred_test=model.predict(x_test)
#testing set performance
test_accuracy=accuracy_score(y_pred_test,y_test)
test_f1_score=f1_score(y_pred_test,y_test,average='weighted')
test_precision_score=precision_score(y_pred_test,y_test)
test_recall_score=recall_score(y_pred_test,y_test)
test_roc_auc_score=roc_auc_score(y_pred_test,y_test)
print('model performance for trainning test')
print(f'accuracy= {train_accuracy}')
print(f'f1_score= {train_f1_score}')
print(f'precision_score= {train_precision_score}')
print(f'recall_score= {train_recall_score}')
print(f'roc_auc_score= {train_roc_auc_score}')
print('---------------------------------------------------------------')
print('model performance for test test')
print(f'accuracy= {test_accuracy}')
print(f'f1_score= {test_f1_score}')
print(f'precision_score= {test_precision_score}')
print(f'recall_score= {test_recall_score}')
print(f'roc_auc_score= {test_roc_auc_score}')

model performance for trainning test
accuracy= 1.0
f1_score= 1.0
precision_score= 1.0
recall_score= 1.0
roc_auc_score= 1.0
---------------------------------------------------------------
model performance for test test
accuracy= 0.83640081799591
f1_score= 0.8751176576452021
precision_score= 0.225130890052356
recall_score= 0.7818181818181819
roc_auc_score= 0.8107357431301094
