In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import LabelEncoder

# Load data
train = pd.read_csv("C:/Users/Asus/Downloads/train.csv")
test = pd.read_csv("C:/Users/Asus/Downloads/train.csv")
gender_submission = pd.read_csv("C:/Users/Asus/Downloads/gender_submission.csv")

# Combine train and test data
combined = pd.concat([train, test], sort=False)

# Remove NaN values
combined.dropna(inplace=True)

# Feature engineering
# Encoding categorical variables
encoder = LabelEncoder()
combined['Sex'] = encoder.fit_transform(combined['Sex'])
combined['Embarked'] = encoder.fit_transform(combined['Embarked'])

# Extract titles from names
combined['Title'] = combined['Name'].str.extract(' ([A-Za-z]+)\.', expand=False)
combined['Title'] = combined['Title'].replace(['Lady', 'Countess', 'Capt', 'Col', 'Don', 'Dr',
                                               'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
combined['Title'] = combined['Title'].replace('Mlle', 'Miss')
combined['Title'] = combined['Title'].replace('Ms', 'Miss')
combined['Title'] = combined['Title'].replace('Mme', 'Mrs')
combined['Title'] = encoder.fit_transform(combined['Title'])

# Create new feature: Family size
combined['FamilySize'] = combined['SibSp'] + combined['Parch'] + 1

# Define features and target variable
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Title', 'FamilySize']
X = combined[features]
y = combined['Survived']

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define different combinations of hyperparameters
hyperparameters = [
    {'n_estimators': 50, 'learning_rate': 0.05, 'max_depth': 2, 'random_state': 42},
    {'n_estimators': 150, 'learning_rate': 0.2, 'max_depth': 4, 'random_state': 42},
    {'n_estimators': 200, 'learning_rate': 0.1, 'max_depth': 5, 'random_state': 42},
    {'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 3, 'random_state': 42},
    {'n_estimators': 120, 'learning_rate': 0.1, 'max_depth': 6, 'random_state': 42},
    {'n_estimators': 80, 'learning_rate': 0.15, 'max_depth': 3, 'random_state': 42},
    {'n_estimators': 100, 'learning_rate': 0.05, 'max_depth': 4, 'random_state': 42},
    {'n_estimators': 200, 'learning_rate': 0.1, 'max_depth': 6, 'random_state': 42},
    {'n_estimators': 150, 'learning_rate': 0.2, 'max_depth': 5, 'random_state': 42},
    {'n_estimators': 120, 'learning_rate': 0.15, 'max_depth': 4, 'random_state': 42}
]


# Train Gradient Boosting Classifier with different hyperparameter combinations
for i, params in enumerate(hyperparameters, start=1):
    print(f"\nTraining Model {i} with Hyperparameters: {params}")
    
    # Initialize Gradient Boosting Classifier with specified hyperparameters
    gbt_clf = GradientBoostingClassifier(**params)

    # Train the model
    gbt_clf.fit(X_train, y_train)

    # Make predictions
    predictions = gbt_clf.predict(X_val)

    # Evaluate accuracy
    accuracy = accuracy_score(y_val, predictions)
    print("Accuracy:", accuracy)



Training Model 1 with Hyperparameters: {'n_estimators': 50, 'learning_rate': 0.05, 'max_depth': 2, 'random_state': 42}
Accuracy: 0.8648648648648649

Training Model 2 with Hyperparameters: {'n_estimators': 150, 'learning_rate': 0.2, 'max_depth': 4, 'random_state': 42}
Accuracy: 0.972972972972973

Training Model 3 with Hyperparameters: {'n_estimators': 200, 'learning_rate': 0.1, 'max_depth': 5, 'random_state': 42}
Accuracy: 0.972972972972973

Training Model 4 with Hyperparameters: {'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 3, 'random_state': 42}
Accuracy: 0.9324324324324325

Training Model 5 with Hyperparameters: {'n_estimators': 120, 'learning_rate': 0.1, 'max_depth': 6, 'random_state': 42}
Accuracy: 0.972972972972973

Training Model 6 with Hyperparameters: {'n_estimators': 80, 'learning_rate': 0.15, 'max_depth': 3, 'random_state': 42}
Accuracy: 0.9324324324324325

Training Model 7 with Hyperparameters: {'n_estimators': 100, 'learning_rate': 0.05, 'max_depth': 4, 'random_

In [7]:
# Define the accuracies obtained for each model
accuracies = [
    0.8648648648648649,  # Accuracy of Model 1
    0.972972972972973,   # Accuracy of Model 2
    0.972972972972973,   # Accuracy of Model 3
    0.9324324324324325,  # Accuracy of Model 4
    0.972972972972973,   # Accuracy of Model 5
    0.9324324324324325,  # Accuracy of Model 6
    0.9459459459459459,  # Accuracy of Model 7
    0.972972972972973,   # Accuracy of Model 8
    0.972972972972973,   # Accuracy of Model 9
    0.972972972972973    # Accuracy of Model 10
]

# Find the index of the model with the highest accuracy
index_of_max_accuracy = accuracies.index(max(accuracies))

# Print the maximum accuracy and corresponding hyperparameters
print("Maximum Accuracy:", max(accuracies))
print("Hyperparameters of the model with maximum accuracy:")
print(hyperparameters[index_of_max_accuracy])


Maximum Accuracy: 0.972972972972973
Hyperparameters of the model with maximum accuracy:
{'n_estimators': 150, 'learning_rate': 0.2, 'max_depth': 4, 'random_state': 42}
