In [None]:
import pandas as pd
import os

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder, OneHotEncoder 
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


train_data = pd.read_csv("/kaggle/input/titanic/train.csv")
test_data = pd.read_csv("/kaggle/input/titanic/test.csv")

# Preprocessing
# Handle missing values (you can use different strategies based on your needs)
train_data['Age'].fillna(train_data['Age'].mean(),)
test_data['Age'].fillna(test_data['Age'].mean(),)
#Encoding sex column

labelencoder_sex = LabelEncoder()
train_data['Sex'] = labelencoder_sex.fit_transform(train_data['Sex'])
test_data['Sex'] = labelencoder_sex.transform(test_data['Sex'])

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first'), ['Pclass'])
    ],
    remainder='passthrough'
)

# importing the values X-independant, y-dependant(survived)
X_train = train_data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']].copy()
y_train = train_data['Survived'].copy()
X_test = test_data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']].copy()


# Apply transformations to both training and test data
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

# initializing the model
classifier = Sequential()


# adding the input layer and first hidden layer 
input_d = X_train.shape[1]

classifier.add(Dense ( 6,  activation = 'relu',input_shape=(input_d,) )) 

# adding the second hidden layer
classifier.add(Dense (6, activation = 'relu'))

# adding the output layer
classifier.add(Dense(1, activation = 'sigmoid' ))

# compiling the ANN 
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy'])

# fitting the ANN to the training set
classifier.fit(X_train, y_train, batch_size= 10, epochs= 300)


In [None]:
# Predicting 
y_pred = classifier.predict(X_test)
y_pred = (y_pred>0.5).astype(int)
y_pred_flat = y_pred.flatten()

output = pd.DataFrame({'PassengerId': test_data.PassengerId, 'Survived': y_pred_flat})
output.to_csv('submission.csv', index=False)
