In [53]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler,FunctionTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
import numpy as np


In [54]:
data = pd.read_csv('train.csv')
y= data['Survived']
X = data [['Age','Fare','Parch','SibSp','Pclass','Embarked','Sex']]

X_train,X_valid, y_train, y_valid = train_test_split(X,y,test_size=0.3)

numerical = ['Age','Fare','Parch','SibSp']

categorical = ['Embarked','Sex']

log2_transformer = FunctionTransformer(func=lambda x: np.log2(x + 1), validate=False)


fare_pipeline = Pipeline([
    ("impute", SimpleImputer(strategy="median")),
    ("log2", FunctionTransformer(lambda x: np.log2(x + 1), validate=False)),
    ("scale", StandardScaler())
])

numerical_pipeline = Pipeline([
    ("impute",SimpleImputer(strategy='median')),
    ("num",StandardScaler())

])

preprocessor = ColumnTransformer([
    ("fare", fare_pipeline, ["Fare"]),
    ("num", numerical_pipeline,numerical),
    ("categorical", OneHotEncoder(handle_unknown='ignore'), categorical)
])





In [55]:
X_train = preprocessor.fit_transform(X_train)
X_valid = preprocessor.transform(X_valid)

In [None]:
print(X_train.shape)
print(X_valid.shape)
print(X_train.shape[1],)

(623, 10)
(268, 10)
10


In [56]:
import tensorflow as tf
from tensorflow import keras

In [77]:
from  keras.callbacks import EarlyStopping

In [96]:
early_stop =EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

In [97]:
model= keras.Sequential( [
     keras.layers.Input(shape=(X_train.shape[1],)),
     keras.layers.Dense(32,activation='relu'),
     keras.layers.Dense(16,activation='relu'),
     keras.layers.Dense(1,activation='sigmoid') ]
)

In [98]:
model.compile(
    optimizer ="adam",
    loss= "binary_crossentropy",
    metrics =["accuracy"]
)

In [99]:
history = model.fit(
    X_train,y_train,
    epochs=100,
    validation_data =[X_valid,y_valid],
    callbacks =[early_stop]
)

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.7335 - loss: 0.6393 - val_accuracy: 0.7090 - val_loss: 0.6200
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7576 - loss: 0.5814 - val_accuracy: 0.7313 - val_loss: 0.5781
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7801 - loss: 0.5324 - val_accuracy: 0.7313 - val_loss: 0.5427
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7978 - loss: 0.4945 - val_accuracy: 0.7463 - val_loss: 0.5182
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8090 - loss: 0.4684 - val_accuracy: 0.7575 - val_loss: 0.5026
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8315 - loss: 0.4506 - val_accuracy: 0.7649 - val_loss: 0.4926
Epoch 7/100
[1m20/20[0m [32m━━

In [100]:
test_data = pd.read_csv('test.csv')
X_test = test_data[numerical+categorical]
X_test= preprocessor.transform(X_test)




In [101]:
# Make predictions (probabilities)
y_pred_prob = model.predict(X_test)

# Convert probabilities to 0 or 1
y_pred = (y_pred_prob > 0.5).astype(int).flatten()

# Prepare submission DataFrame
submission = pd.DataFrame({
    "PassengerId": test_data["PassengerId"],
    "Survived": y_pred
})

# Save to CSV
submission.to_csv("submission.csv", index=False)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
