In [68]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import StandardScaler,OneHotEncoder,LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score,confusion_matrix,precision_score,recall_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.naive_bayes import BernoulliNB


In [54]:
df=sns.load_dataset('titanic')

In [55]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [56]:
X=df.drop('survived',axis=1)

y=df['survived']

In [57]:
Xtrain,Xtest,ytrain,ytest=train_test_split(X,y,train_size=0.8,random_state=42)

In [58]:
cat_col=X.select_dtypes(include='object').columns
num_col=X.select_dtypes(include=['int64','float64']).columns

In [59]:
num_pipe=Pipeline(
    steps=[('impute',SimpleImputer(strategy='mean'))
           ,('scalar',StandardScaler())
           ]
)

cat_pipe=Pipeline(
    steps=[('impute',SimpleImputer(strategy='most_frequent')),
           ('encode',OneHotEncoder()),
           

    ]
)

In [60]:
preprocessing=ColumnTransformer(
    transformers=[
        ('num',num_pipe,num_col),
        ('cat',cat_pipe,cat_col)
    ]
)

In [61]:
pipe=Pipeline(
    steps=[('pre',preprocessing),
           ('model',BernoulliNB())]
)

In [62]:
pipe.fit(Xtrain,ytrain)

0,1,2
,steps,"[('pre', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('cat', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,missing_values,
,strategy,'mean'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,missing_values,
,strategy,'most_frequent'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'error'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,alpha,1.0
,force_alpha,True
,binarize,0.0
,fit_prior,True
,class_prior,


In [64]:
pred=pipe.predict(Xtest)

In [72]:
ytest.value_counts()

survived
0    105
1     74
Name: count, dtype: int64

In [66]:
confusion_matrix(ytest,pred)

array([[105,   0],
       [  0,  74]])

In [69]:
precision_score(ytest,pred)

1.0

In [70]:
recall_score(ytest,pred)

1.0

In [67]:
f1_score(ytest,pred)

1.0