In [62]:
import pandas as pd 
import numpy as np
import seaborn as sns 
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report,ConfusionMatrixDisplay, \
                            precision_score, recall_score, f1_score, roc_auc_score,roc_curve 
from xgboost import XGBClassifier
from catboost import CatBoostClassifier




In [63]:
df=pd.read_csv('rename.csv')

In [64]:
df.drop(columns=['Unnamed: 0'],axis=1,inplace=True)

In [65]:
df.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
1,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
2,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
3,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
4,I,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8


In [66]:
#deviding the columns into Independent and dependent variable
x=df.drop(labels=['Sex'],axis=1)
y=df[['Sex']] 

In [67]:
x

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
1,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
2,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
3,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
4,0.425,0.300,0.095,0.3515,0.1410,0.0775,0.1200,8
...,...,...,...,...,...,...,...,...
4171,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4172,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4173,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4174,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [68]:
y

Unnamed: 0,Sex
0,M
1,F
2,M
3,I
4,I
...,...
4171,F
4172,M
4173,M
4174,F


In [69]:
#segregate numerical and categorical columns 
numerical_columns=df.columns[df.dtypes!='object']
print(f'this is our numerical columns,:{numerical_columns}')


this is our numerical columns,:Index(['Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight',
       'Viscera weight', 'Shell weight', 'Rings'],
      dtype='object')


In [70]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()

y=le.fit_transform(df['Sex'])

In [71]:
df.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
1,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
2,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
3,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7
4,I,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8


In [72]:
num_pipleine=Pipeline(
    steps=[
        ('imputer',SimpleImputer(strategy='median')),
        ('scaler',StandardScaler())
    ]
)

preprocessor=ColumnTransformer(
    transformers=[
        ('num_pipeline',num_pipleine,numerical_columns)
    ]
)

In [73]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.30,random_state=40)

In [74]:
x_train=pd.DataFrame(preprocessor.fit_transform(x_train),columns=preprocessor.get_feature_names_out())
x_test=pd.DataFrame(preprocessor.transform(x_test),columns=preprocessor.get_feature_names_out())


In [75]:
x_train.head()

Unnamed: 0,num_pipeline__Length,num_pipeline__Diameter,num_pipeline__Height,num_pipeline__Whole weight,num_pipeline__Shucked weight,num_pipeline__Viscera weight,num_pipeline__Shell weight,num_pipeline__Rings
0,0.426817,0.427829,0.156839,-0.065058,0.023754,-0.268466,0.157214,0.037307
1,-2.886386,-2.835071,-2.540617,-1.627362,-1.560733,-1.584279,-1.667211,-1.853697
2,-0.442899,-0.626339,-0.613863,-0.68613,-0.707032,-0.65547,-0.515319,0.352474
3,-0.070163,0.126638,0.028388,-0.117743,-0.48355,-0.236595,0.371852,0.982809
4,-0.401484,-0.325148,-0.356963,-0.474378,-0.365104,-0.591728,-0.415155,0.037307


In [76]:
x_test.head()

Unnamed: 0,num_pipeline__Length,num_pipeline__Diameter,num_pipeline__Height,num_pipeline__Whole weight,num_pipeline__Shucked weight,num_pipeline__Viscera weight,num_pipeline__Shell weight,num_pipeline__Rings
0,0.799553,0.578424,1.184441,0.905556,0.984727,1.152066,0.586491,0.667641
1,-0.360069,-0.525942,-0.613863,-0.503759,-0.485785,-0.673681,-0.386536,-0.593028
2,-0.608559,-0.776934,-0.870764,-0.923211,-0.769607,-0.924096,-1.00541,-0.908195
3,-0.401484,-0.325148,-0.356963,-0.503759,-0.36287,-0.864907,-0.59402,0.037307
4,0.592477,0.528226,0.79909,0.612751,0.9445,0.687661,0.192987,0.037307


In [1]:
def evaluate_models(true,predicted):
    acc=accuracy_score(true,predicted)
    recall=recall_score(true,predicted)
    precision=precision_score(true,predicted)
    f1=f1_score(true,predicted)
    roc_auc=roc_auc_score(true,predicted)
    return acc, recall,precision,f1,roc_auc

In [78]:
 
classification_models = {
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "Logistic Regression": LogisticRegression(),
     "K-Neighbors Classifier": KNeighborsClassifier(),
    "XGBClassifier": XGBClassifier(), 
     "CatBoosting Classifier": CatBoostClassifier(verbose=False),
     "Support Vector Classifier": SVC(),
    "AdaBoost Classifier": AdaBoostClassifier()


}

model_list = []
accuracy_list = []

for model_name, model in classification_models.items():
    model.fit(x_train,y_train)
    train_accuracy= model.score(x_train,y_train)
    test_accuracy= model.score(x_test,y_test)
    print(f"Model: {model_name}")
    print(f"Training Accuracy: {train_accuracy:.4f}")
    print(f"Test Accuracy: {test_accuracy:.4f}")
    accuracy_list.append(test_accuracy)
    model_list.append(model_name)
    print('=='*50)

Model: Random Forest
Training Accuracy: 1.0000
Test Accuracy: 0.5339
Model: Decision Tree
Training Accuracy: 1.0000
Test Accuracy: 0.5012
Model: Gradient Boosting
Training Accuracy: 0.7297
Test Accuracy: 0.5467
Model: Logistic Regression
Training Accuracy: 0.5597
Test Accuracy: 0.5595
Model: K-Neighbors Classifier
Training Accuracy: 0.6784
Test Accuracy: 0.5092
Model: XGBClassifier
Training Accuracy: 0.9781
Test Accuracy: 0.5291
Model: CatBoosting Classifier
Training Accuracy: 0.9189
Test Accuracy: 0.5339
Model: Support Vector Classifier
Training Accuracy: 0.5765
Test Accuracy: 0.5603
Model: AdaBoost Classifier
Training Accuracy: 0.5652
Test Accuracy: 0.5403


In [79]:
print("Unique values in y_train:", np.unique(y_train))
print("Unique values in y_test:", np.unique(y_test))


Unique values in y_train: [0 1 2]
Unique values in y_test: [0 1 2]


In [80]:
model_list

['Random Forest',
 'Decision Tree',
 'Gradient Boosting',
 'Logistic Regression',
 'K-Neighbors Classifier',
 'XGBClassifier',
 'CatBoosting Classifier',
 'Support Vector Classifier',
 'AdaBoost Classifier']

In [81]:
# Results
pd.DataFrame(list(zip(model_list, accuracy_list)),columns=['Model Name','Accuracy Score']).sort_values(
    by=["Accuracy Score"],ascending=False)

Unnamed: 0,Model Name,Accuracy Score
7,Support Vector Classifier,0.560255
3,Logistic Regression,0.559457
2,Gradient Boosting,0.546688
8,AdaBoost Classifier,0.540303
0,Random Forest,0.533919
6,CatBoosting Classifier,0.533919
5,XGBClassifier,0.52913
4,K-Neighbors Classifier,0.509178
1,Decision Tree,0.501197


In [83]:
from sklearn.svm import SVC

from sklearn.metrics import  accuracy_score

svc=SVC()

svc.fit(x_train,y_train)

y_pred=svc.predict(x_test)

accuracy=accuracy_score(y_pred,y_test)

print("Accuracy of the Svc classifier model: {:.2f}%".format(accuracy * 100))


Accuracy of the Svc classifier model: 56.03%
