# A machine learning model to predict survival from tabular data:

In [None]:
#!pip install pandas numpy scikit-learn tensorflow


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import seaborn as sns
import matplotlib.pyplot as plt



In [None]:
# Path to the Titanic dataset's CSV file on Kaggle
# I assumes the training dataset as a main dataset and split it into train and test datasets later on in this notebook;
file_path = '/kaggle/input/titanic-machine-learning-from-disaster/train.csv'

In [None]:
# Load the CSV into a pandas DataFrame
titanic_data = pd.read_csv(file_path)

# Display the first few rows of the dataset
titanic_data.head()

In [None]:
# Get a summary of the dataset
titanic_data.info()

# Check for missing values
titanic_data.isnull().sum()

# Display descriptive statistics
titanic_data.describe()

# Show the first 10 rows of the dataset
titanic_data.head(10)


In [None]:
# Get the number of survivors and non-survivors
titanic_data['Survived'].value_counts()

sns.heatmap(titanic_data.isnull(), cbar=False)
plt.show()


In [None]:
# Fill missing values
titanic_data['Age'].fillna(titanic_data['Age'].mean(), inplace=True)
titanic_data['Embarked'].fillna(titanic_data['Embarked'].mode()[0], inplace=True)

# Drop any irrelevant columns like 'Name' and 'Ticket'
titanic_data.drop(['Name', 'Ticket', 'Cabin'], axis=1, inplace=True)

# Convert categorical data to numeric using LabelEncoder
label_encoder = LabelEncoder()
titanic_data['Sex'] = label_encoder.fit_transform(titanic_data['Sex'])
titanic_data['Embarked'] = label_encoder.fit_transform(titanic_data['Embarked'])

# Prepare features and target variable
X = titanic_data.drop('Survived', axis=1)
y = titanic_data['Survived']

In [None]:
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Create a Sequential neural network
model = Sequential()

# Add input layer
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))

# Add hidden layers
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))

# Add output layer
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))


In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


In [None]:
predictions = model.predict(X_test)
predictions = (predictions > 0.5).astype(int)
# The line above converts the predicted probabilities into binary class labels. The comparison (predictions > 0.5) creates a boolean array where each element is True if the corresponding prediction is greater than 0.5 (indicating a prediction of the positive class) and False otherwise (indicating a prediction of the negative class). The astype(int) method converts this boolean array into integers, where True becomes 1 and False becomes 0.

In [None]:
print(predictions)

In [None]:
predictions.sum()