 # 🔴 Task 29-> Hyperparameter Tuning Techniques

## ✨Import the libraries

In [66]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder, FunctionTransformer
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import recall_score, f1_score, roc_auc_score
from scipy.sparse import issparse
from sklearn.impute import SimpleImputer
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

## ✨Import Titanic Data and make it usefull

In [67]:
titanic = pd.read_csv("titanic.csv")
features = titanic.drop(columns=['Survived'])
target = titanic['Survived']

numeric_features = features.select_dtypes(include=['int64', 'float64']).columns
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_features = features.select_dtypes(include=['object']).columns
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

## ✨Define Models and train it

In [68]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
    "KNN": KNeighborsClassifier()
}

for name, model in models.items():
    scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
    print(f"{name} Accuracy: {scores.mean():.2f} (+/- {scores.std():.2f})")

Logistic Regression Accuracy: 0.70 (+/- 0.04)
Decision Tree Accuracy: 0.59 (+/- 0.11)
Random Forest Accuracy: 0.69 (+/- 0.08)
SVM Accuracy: 0.64 (+/- 0.02)
KNN Accuracy: 0.55 (+/- 0.10)


## ✨HyperParameter Tuning

In [69]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10]
}

rf = RandomForestClassifier()
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X, y)

print("Best Parameters:", grid_search.best_params_)
print("Best Accuracy:", grid_search.best_score_)

Best Parameters: {'max_depth': 5, 'min_samples_split': 2, 'n_estimators': 200}
Best Accuracy: 0.7336747759282971


## ✨Model Evaluation and Accuracy

In [70]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf = RandomForestClassifier(n_estimators=100, max_depth=3, min_samples_split=2)
rf.fit(X_train, y_train)
test_accuracy = rf.score(X_test, y_test)
print("Test Accuracy:", test_accuracy)

Test Accuracy: 0.6783216783216783
