<a href="https://colab.research.google.com/github/s-md-ahmed/titanicprediction/blob/main/titanicdeeplearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow



In [None]:
import tensorflow as tf
print(tf.__version__)


2.17.0


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Load the training dataset
df = pd.read_csv("train.csv")

# Drop unnecessary columns
dropped_columns = ["PassengerId", "Name", "Ticket", "Cabin"]
df.drop(dropped_columns, inplace=True, axis=1)

# Map 'Sex' to 0 and 1
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})

# One-Hot Encoding for 'Embarked' and drop first to avoid dummy variable trap
df = pd.get_dummies(df, columns=['Embarked'], drop_first=True)

# Separate features and target variable
X = df.drop("Survived", axis=1)  # Features
y = df["Survived"]  # Target variable

# Fill missing values in the 'Age' column with the mean age
mean_age = X['Age'].mean()
X['Age'] = X['Age'].fillna(mean_age)

# Scale the features using StandardScaler (good for deep learning models)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build a simple feedforward neural network
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.5))  # Dropout layer for regularization
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))  # Dropout layer for regularization
model.add(Dense(1, activation='sigmoid'))  # Output layer for binary classification

# Compile the model
model.compile(loss='binary_crossentropy',
              optimizer=Adam(learning_rate=0.001),
              metrics=['accuracy'])

# Train the model on the training data
history = model.fit(X_train, y_train,
                    validation_data=(X_test, y_test),
                    epochs=100,
                    batch_size=32)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Make predictions on the test set
y_test_pred_dl = (model.predict(X_test) > 0.5).astype("int32")

# Generate a classification report for the validation set
print("Test Set Classification Report (Deep Learning):\n", classification_report(y_test, y_test_pred_dl))

# Calculate training set accuracy
train_loss, train_accuracy = model.evaluate(X_train, y_train)
print(f"Train Accuracy: {train_accuracy:.4f}")

# Load the test dataset provided by Kaggle
test_df = pd.read_csv("test.csv")

# Drop unnecessary columns from the test dataset
dropped_columns_test = ["Name", "Ticket", "Cabin"]
test_df.drop(dropped_columns_test, inplace=True, axis=1)

# Map 'Sex' to 0 and 1
test_df['Sex'] = test_df['Sex'].map({'male': 0, 'female': 1})

# One-Hot Encoding for 'Embarked' in the test dataset
test_df = pd.get_dummies(test_df, columns=['Embarked'], drop_first=True)

# Fill missing values in the 'Age' column with the mean age
mean_age_test = test_df['Age'].mean()
test_df['Age'] = test_df['Age'].fillna(mean_age_test)

# Fill missing values in the test set with mean values
test_df = test_df.fillna(test_df.mean())

# Separate features from the test dataset
X_test_final = test_df.drop("PassengerId", axis=1)  # Features

# Apply StandardScaler to features
X_test_final_scaled = scaler.transform(X_test_final)

# Make predictions on the final test set
final_test_predictions_dl = (model.predict(X_test_final_scaled) > 0.5).astype("int32")

# Create a submission DataFrame for the deep learning model
submission_df_dl = pd.DataFrame({
    'PassengerId': test_df['PassengerId'].values.flatten(),  # Ensure it is 1-dimensional
    'Survived': final_test_predictions_dl.flatten()           # Flatten to ensure it is 1-dimensional
})

# Save the submission DataFrame to a CSV file named submission_dl.csv
submission_df_dl.to_csv('submission_dl.csv', index=False)

print("Submission file 'submission_dl.csv' created successfully.")


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - accuracy: 0.4501 - loss: 0.7486 - val_accuracy: 0.6369 - val_loss: 0.6575
Epoch 2/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6285 - loss: 0.6723 - val_accuracy: 0.7542 - val_loss: 0.5954
Epoch 3/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7237 - loss: 0.5880 - val_accuracy: 0.7821 - val_loss: 0.5412
Epoch 4/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7108 - loss: 0.5923 - val_accuracy: 0.7989 - val_loss: 0.5066
Epoch 5/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7363 - loss: 0.5652 - val_accuracy: 0.7989 - val_loss: 0.4822
Epoch 6/100
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7449 - loss: 0.5587 - val_accuracy: 0.7989 - val_loss: 0.4624
Epoch 7/100
[1m23/23[0m [32m━━━━━━━━━━━━━━