# 🔴 Task 26-> Classification Algorithms using scikit-learn

## The main objective of this task is to implement and compare the performance of several popular classification algorithms using the scikit-learn library in Python.

#### Import the Libraries

In [100]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import recall_score, f1_score, roc_auc_score
from scipy.sparse import issparse

#### Load the Titanic DataSet an Process the Data to make it useful

In [101]:
titanic = pd.read_csv("titanic.csv")
features = titanic.drop(columns=['Survived'])
target = titanic['Survived']

numeric_features = features.select_dtypes(include=['int64', 'float64']).columns
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_features = features.select_dtypes(include=['object']).columns
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

### 1- Logistic Regression: Models the probability of a binary outcome using the logistic function.

#### ✨  Split the Data

In [102]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

#### ✨ Implement the Classficication models

In [103]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "k-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector Machine": SVC(probability=True),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "Naive Bayes": make_pipeline(StandardScaler(with_mean=False), GaussianNB())
}

#### ✨ Evaluate Classification Models

In [104]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    if hasattr(model, "predict_proba"):
        roc_auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
    else:
        roc_auc = "N/A"
    return recall, f1, roc_auc

#### ✨ Train, Predict & Evaluate

In [105]:
for name, model in models.items():
    if name == "Naive Bayes":
        pipeline = Pipeline(steps=[
            ('preprocessor', preprocessor),
            ('to_dense', FunctionTransformer(lambda x: x.toarray(), accept_sparse=True)),
            ('model', model)
        ])
    else:
        pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
    
    pipeline.fit(X_train, y_train)

## Print the Performance Metrics

In [106]:
print(f"{name} Performance:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")
print(f"ROC-AUC: {roc_auc:.2f}")
print()

Naive Bayes Performance:
Accuracy: 0.62
Precision: 0.62
Recall: 0.68
F1-score: 0.74
ROC-AUC: 0.89

