In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/titanic-dataset/train.csv
/kaggle/input/titanic-dataset/test.csv
/kaggle/input/titanic-dataset/gender_submission.csv


In [2]:
# Import DecisionTreeRegressor and OrdinalEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error

In [3]:
# Specify the test file path 
titanic_train_file_path = '../input/titanic-dataset/train.csv'

# Specify the validation file path
titanic_validation_file_path = '../input/titanic-dataset/test.csv'

# Read the .csv files
train_titanic_data = pd.read_csv(titanic_train_file_path)

validation_titanic_data = pd.read_csv(titanic_validation_file_path)

In [4]:
# Set up the target variable (Survived)
target_variable = train_titanic_data.Survived

In [5]:
# Convert 'Sex' column to numerical data

# Initialise the encoder

encoder = LabelEncoder()

# Apply the encoder to the 'Sex' column of the training dataset
train_titanic_data['Sex'] = encoder.fit_transform(train_titanic_data['Sex'])

# Apply the encoder to the 'Sex' column of the validation dataset
validation_titanic_data['Sex'] = encoder.fit_transform(validation_titanic_data['Sex'])

In [6]:
# Replace 'NaN' values in the 'Age' column

# Compute the mean age for each passenger class for the training dataset
mean_age_train = train_titanic_data.groupby('Pclass')['Age'].mean()

# Compute the mean age for each passenger class for the validation dataset
mean_age_validation = validation_titanic_data.groupby('Pclass')['Age'].mean()

# Replace 'NaN' values by the mean computed above, depending on passenger class in the training dataset
train_titanic_data['Age'] = train_titanic_data.apply(
    lambda row: mean_age_train[row['Pclass']] if pd.isna(row['Age']) else row['Age'], axis = 1)

# Replace 'NaN' values by the mean computed above, depending on passenger class in the validation dataset
validation_titanic_data['Age'] = validation_titanic_data.apply(
    lambda row: mean_age_validation[row['Pclass']] if pd.isna(row['Age']) else row['Age'], axis = 1)

In [7]:
# Set up the features
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch']

In [8]:
# Set up the independent variable
independent_variable = train_titanic_data[features]

In [9]:
# Specify the model and set the random seed to get the same results every run
titanic_model = RandomForestClassifier(n_estimators = 50, max_depth = 5)

In [10]:
# Fit the model
titanic_model.fit(independent_variable, target_variable)

In [11]:
# Predict survivability
prediction = titanic_model.predict(independent_variable)

In [12]:
# Set up the independent variable for the test dataset
test_independent_variable = validation_titanic_data[features]

In [13]:
# Make a prediction for survivability on the validation data
validation_prediction = titanic_model.predict(test_independent_variable)

validation_prediction = validation_prediction.round(0).astype(int)

In [14]:
output = pd.DataFrame({'PassengerId': validation_titanic_data.PassengerId, 'Survived': validation_prediction})
output.to_csv('submission.csv', index = False)
print("Your submission was successfully saved!")

Your submission was successfully saved!
