In [1]:
import numpy as np
import pandas as pd 
from sklearn.ensemble import GradientBoostingClassifier

In [2]:
# Read the databases
train_df = pd.read_csv('titanic/train.csv')
test_df = pd.read_csv('titanic/test.csv')

In [3]:
# Impute missing values in the 'Age' feature'
train_df['Age'] = train_df['Age'].fillna(train_df['Age'].mean())
test_df['Age'] = test_df['Age'].fillna(test_df['Age'].mean())

In [4]:
train_df['Embarked'] = train_df['Embarked'].fillna('S')

In [5]:
# Create a new feature called 'FamilySize' by adding the 'SibSp' and 'Parch' features together
train_df['FamilySize'] = train_df['SibSp'] + train_df['Parch']
test_df['FamilySize'] = test_df['SibSp'] + test_df['Parch']

In [6]:
# One-hot encode the categorical variables
train_df = pd.get_dummies(train_df, columns=['Sex', 'Embarked'])
test_df = pd.get_dummies(test_df, columns=['Sex', 'Embarked'])


In [7]:
# Drop irrelevant columns
columns_to_drop = ['Name', 'Ticket', 'Cabin']
train_df = train_df.drop(columns=columns_to_drop)
test_df = test_df.drop(columns=columns_to_drop)

In [8]:
# Define the target variable
y_train = train_df['Survived']

In [9]:
# Create a GradientBoostingClassifier and fit it to the training data
gbc = GradientBoostingClassifier()
gbc.fit(train_df.drop(columns=['Survived']), y_train)

In [10]:
train_df = train_df.dropna()
test_df = test_df.dropna()

In [11]:
# Make predictions on the test data
test_predictions = gbc.predict(test_df)

In [12]:
# Create a DataFrame for the predictions
predictions_df = pd.DataFrame({'PassengerId': test_df['PassengerId'], 'Survived': test_predictions})

# Save the predictions to a CSV file
predictions_df.to_csv('titanic_predictions.csv', index=False)