In [53]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler,FunctionTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
import numpy as np


In [54]:
data = pd.read_csv('train.csv')
y= data['Survived']
X = data [['Age','Fare','Parch','SibSp','Pclass','Embarked','Sex']]

X_train,X_valid, y_train, y_valid = train_test_split(X,y,test_size=0.3)

numerical = ['Age','Fare','Parch','SibSp']

categorical = ['Embarked','Sex']

log2_transformer = FunctionTransformer(func=lambda x: np.log2(x + 1), validate=False)


fare_pipeline = Pipeline([
    ("impute", SimpleImputer(strategy="median")),
    ("log2", FunctionTransformer(lambda x: np.log2(x + 1), validate=False)),
    ("scale", StandardScaler())
])

numerical_pipeline = Pipeline([
    ("impute",SimpleImputer(strategy='median')),
    ("num",StandardScaler())

])

preprocessor = ColumnTransformer([
    ("fare", fare_pipeline, ["Fare"]),
    ("num", numerical_pipeline,numerical),
    ("categorical", OneHotEncoder(handle_unknown='ignore'), categorical)
])





In [55]:
X_train = preprocessor.fit_transform(X_train)
X_valid = preprocessor.transform(X_valid)

In [None]:
print(X_train.shape)
print(X_valid.shape)
print(X_train.shape[1],)

(623, 10)
(268, 10)
10


In [56]:
import tensorflow as tf
from tensorflow import keras

In [72]:
model= keras.Sequential( [
     keras.layers.Input(shape=(X_train.shape[1],)),
     keras.layers.Dense(32,activation='relu'),
     keras.layers.Dense(16,activation='relu'),
     keras.layers.Dense(1,activation='sigmoid') ]
)

In [73]:
model.compile(
    optimizer ="adam",
    loss= "binary_crossentropy",
    metrics =["accuracy"]
)

In [74]:
history = model.fit(
    X_train,y_train,
    epochs=50,
    validation_data =[X_valid,y_valid]
)

Epoch 1/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.6421 - loss: 0.6096 - val_accuracy: 0.6493 - val_loss: 0.5975
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6918 - loss: 0.5506 - val_accuracy: 0.7090 - val_loss: 0.5544
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7544 - loss: 0.5141 - val_accuracy: 0.7388 - val_loss: 0.5278
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8026 - loss: 0.4892 - val_accuracy: 0.7500 - val_loss: 0.5087
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8122 - loss: 0.4715 - val_accuracy: 0.7761 - val_loss: 0.4965
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8106 - loss: 0.4575 - val_accuracy: 0.7687 - val_loss: 0.4902
Epoch 7/50
[1m20/20[0m [32m━━━━━━━━━

In [75]:
test_data = pd.read_csv('test.csv')
X_test = test_data[numerical+categorical]
X_test= preprocessor.transform(X_test)




In [76]:
# Make predictions (probabilities)
y_pred_prob = model.predict(X_test)

# Convert probabilities to 0 or 1
y_pred = (y_pred_prob > 0.5).astype(int).flatten()

# Prepare submission DataFrame
submission = pd.DataFrame({
    "PassengerId": test_data["PassengerId"],
    "Survived": y_pred
})

# Save to CSV
submission.to_csv("submission.csv", index=False)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
