In [365]:
import pandas as pd
import numpy as np
import os

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder , StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.metrics import accuracy_score

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


train_data = pd.read_csv("/kaggle/input/titanic/train.csv")
test_data = pd.read_csv("/kaggle/input/titanic/test.csv")


# Handle missing values (you can use different strategies based on your needs)
train_data['Age'].fillna(train_data['Age'].median(),)
test_data['Age'].fillna(test_data['Age'].median(),)
train_data['Fare'].fillna(test_data['Fare'].median(),)
test_data['Fare'].fillna(test_data['Fare'].median(),)
train_data['Embarked'].fillna(train_data['Embarked'].mode()[0],)
test_data['Embarked'].fillna(test_data['Embarked'].mode()[0],)

# Create binary 'HasCabin' feature
train_data['Cabin'] = train_data['Cabin'].apply(lambda x: 0 if pd.isnull(x) else 1)
test_data['Cabin'] = test_data['Cabin'].apply(lambda x: 0 if pd.isnull(x) else 1)

#Encoding column

labelencoder_sex = LabelEncoder()
train_data['Sex'] = labelencoder_sex.fit_transform(train_data['Sex'])
test_data['Sex'] = labelencoder_sex.transform(test_data['Sex'])

# OneHotEncode 'Embarked' column
onehotencoder_embarked = OneHotEncoder(sparse_output=False, drop='first')
embarked_train = onehotencoder_embarked.fit_transform(train_data[['Embarked']])
embarked_test = onehotencoder_embarked.transform(test_data[['Embarked']])

# Append OneHotEncoded 'Embarked' back to dataframes
embarked_train_df = pd.DataFrame(embarked_train, columns=onehotencoder_embarked.get_feature_names_out(['Embarked']))
embarked_test_df = pd.DataFrame(embarked_test, columns=onehotencoder_embarked.get_feature_names_out(['Embarked']))

train_data = train_data.join(embarked_train_df)
test_data = test_data.join(embarked_test_df)


# Scaling features
scaler = StandardScaler()
train_data[['Age', 'Fare']] = scaler.fit_transform(train_data[['Age', 'Fare']])
test_data[['Age', 'Fare']] = scaler.transform(test_data[['Age', 'Fare']])


# Applying manual weights
train_data['Pclass_weighted'] = train_data['Pclass'] * 0.5
test_data['Pclass_weighted'] = test_data['Pclass'] * 0.5

train_data['Sex_weighted'] = train_data['Sex'] * 0.5
test_data['Sex_weighted'] = test_data['Sex'] * 0.5

train_data['Age_weighted'] = train_data['Age'] * 1.2
test_data['Age_weighted'] = test_data['Age'] * 1.2

train_data['SibSp_weighted'] = train_data['SibSp'] * 0.8
test_data['SibSp_weighted'] = test_data['SibSp'] * 0.8

train_data['Parch_weighted'] = train_data['Parch'] * 1.2
test_data['Parch_weighted'] = test_data['Parch'] * 1.2

train_data['Fare_weighted'] = train_data['Fare'] * 1.5
test_data['Fare_weighted'] = test_data['Fare'] * 1.5

train_data['Cabin_weighted'] = train_data['Cabin'] * 1.7
test_data['Cabin_weighted'] = test_data['Cabin'] * 1.7

# Include OneHotEncoded 'Embarked' columns in weights
for col in onehotencoder_embarked.get_feature_names_out(['Embarked']):
    train_data[f'{col}_weighted'] = train_data[col] * 0
    test_data[f'{col}_weighted'] = test_data[col] * 0


# Preparing features
features = ['Pclass_weighted', 'Sex_weighted', 'Age_weighted', 'SibSp_weighted', 'Parch_weighted', 'Fare_weighted','Cabin_weighted'] + [f'{col}_weighted' for col in onehotencoder_embarked.get_feature_names_out(['Embarked'])]
features += [f'{col}_weighted' for col in onehotencoder_embarked.get_feature_names_out(['Embarked'])]
X = train_data[features].copy()
y = train_data['Survived'].copy()


# Splitting the testing data into, for accuracy testing
X_train, X_val ,y_train,y_val = train_test_split(X,y, test_size = 0.2, random_state = 0)


# Impute NaN values with mean
imputer = SimpleImputer(strategy='median')
X_train = imputer.fit_transform(X_train)
X_val = imputer.transform(X_val)
X_test = imputer.transform(test_data[features])


# Convert to float32
X_train = X_train.astype(np.float32)
X_val = X_val.astype(np.float32)
X_test = X_test.astype(np.float32)
y_train = y_train.astype(np.float32)
y_val = y_val.astype(np.float32)

# initializing the model
classifier = Sequential()

# adding the input layer and first hidden layer 


classifier.add(Dense (16,  activation = 'relu',input_shape=(X_train.shape[1],) ))

classifier.add(Dense (4, activation = 'hard_silu'))

classifier.add(Dense (2, activation = 'leaky_relu'))

# adding the output layer
classifier.add(Dense(1, activation = 'sigmoid' ))
#classifier.add(Dropout(0.2))
# compiling the ANN 
classifier.compile(optimizer = 'adamw', loss = 'binary_crossentropy', metrics=['accuracy'])

# fitting the ANN to the training set
classifier.fit(X_train, y_train, batch_size= 25, epochs= 500)


/kaggle/input/titanic/train.csv
/kaggle/input/titanic/test.csv
/kaggle/input/titanic/gender_submission.csv
Epoch 1/500


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.3910 - loss: 0.7019
Epoch 2/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5652 - loss: 0.6902 
Epoch 3/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5996 - loss: 0.6731 
Epoch 4/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6317 - loss: 0.6580 
Epoch 5/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6284 - loss: 0.6478 
Epoch 6/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6746 - loss: 0.6238
Epoch 7/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6830 - loss: 0.6208
Epoch 8/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7017 - loss: 0.6067
Epoch 9/500
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x78c769d41600>

In [366]:
pred = classifier.predict(X_train)
pred = (pred>0.5).astype(int)

accuracy = accuracy_score(y_train, pred)
print(f"Accuracy Score: {accuracy}")

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Accuracy Score: 0.8539325842696629


In [367]:
# Predicting 
y_pred = classifier.predict(X_test)
y_pred = (y_pred>0.5).astype(int)
y_pred_flat = y_pred.flatten()

output = pd.DataFrame({'PassengerId': test_data.PassengerId, 'Survived': y_pred_flat})
output.to_csv('submission.csv', index=False)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
