In [1]:
# Assignment 13: Deploy model on Colab (End-to-End Pipeline)

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from google.colab import files
import io

# 1. Upload dataset
uploaded = files.upload()  # Upload 'titanic_cleaned.csv'

# 2. Load dataset
data = pd.read_csv(io.BytesIO(list(uploaded.values())[0]))
print("Columns:", data.columns)

# 3. Features and target
X = data.drop("Survived", axis=1)
y = data["Survived"]

# 4. One-hot encode categorical features
X = pd.get_dummies(X)

# 5. Train/Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 6. Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 7. Build Neural Network
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# 8. Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 9. Train model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)

# 10. Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

# 11. Deployment: Function for new input prediction
def predict_survival(input_dict):
    """
    input_dict example:
    {
        'Pclass': 3,
        'Sex': 'male',
        'Age': 22,
        'SibSp': 1,
        'Parch': 0,
        'Fare': 7.25,
        'Embarked': 'S'
    }
    """
    import pandas as pd
    import numpy as np

    # Convert dict to DataFrame
    df = pd.DataFrame([input_dict])

    # One-hot encode categorical features to match training columns
    df = pd.get_dummies(df)
    missing_cols = set(X.columns) - set(df.columns)
    for col in missing_cols:
        df[col] = 0
    df = df[X.columns]  # Ensure same column order

    # Standardize
    df_scaled = scaler.transform(df)

    # Predict
    prediction = model.predict(df_scaled)
    return "Survived" if prediction[0][0] >= 0.5 else "Did not survive"

# Example usage
sample_input = {
    'Pclass': 3,
    'Sex': 'male',
    'Age': 22,
    'SibSp': 1,
    'Parch': 0,
    'Fare': 7.25,
    'Embarked': 'S'
}

print("Prediction for sample input:", predict_survival(sample_input))


Saving titanic_cleaned.csv to titanic_cleaned.csv
Columns: Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Embarked'],
      dtype='object')
Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.5745 - loss: 0.7212 - val_accuracy: 0.6434 - val_loss: 0.6627
Epoch 2/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7301 - loss: 0.5168 - val_accuracy: 0.6853 - val_loss: 0.6287
Epoch 3/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8327 - loss: 0.4120 - val_accuracy: 0.6853 - val_loss: 0.6052
Epoch 4/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8831 - loss: 0.3168 - val_accuracy: 0.6923 - val_loss: 0.5812
Epoch 5/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9583 - loss: 0.2016 - val_accuracy: 0.7063 - val_loss: 0.5634
Epoch 6/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9818 - loss: 0.1232 - val_accuracy: 0.7133 - val_loss: 0.5532
Epoch 7/20
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━

  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[c

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step

  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[col] = 0
  df[c

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
Prediction for sample input: Did not survive
