In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2, l1
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC

In [2]:
# Reading Data
data = pd.read_csv('./data/train.csv')
test = pd.read_csv('./data/test.csv')

# Printind Data Info
print(data.info())
print(test.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  418 non-null    int64  
 1   Pcl

In [3]:
# Setting Survived Column as Labels
train_labels = np.array(data['Survived']).astype('float32')

validation_labels = train_labels[:200]
train_labels = train_labels

In [4]:
# Selecting Features which have impacted the status of Survival
train_data = data[['Pclass', 'Sex', 'Fare', 'Age', 'Embarked']]
test_data = test[['Pclass', 'Sex', 'Fare', 'Age', 'Embarked']]

train_data['Age'].fillna(train_data['Age'].mean(), inplace = True)
test_data['Age'].fillna(test_data['Age'].mean(), inplace = True)

train_data['Fare'].fillna(train_data['Fare'].mean(), inplace = True)
test_data['Fare'].fillna(test_data['Fare'].mean(), inplace = True)

train_data['Embarked'].fillna('S', inplace = True)
test_data['Embarked'].fillna('S', inplace = True)

train_data['Sex'] = train_data['Sex'].replace(('male','female'), (1,0))
test_data['Sex'] = test_data['Sex'].replace(('male','female'), (1,0))

train_data['Embarked'] = train_data['Embarked'].replace(('S','C','Q'), (0,1,2))
test_data['Embarked'] = test_data['Embarked'].replace(('S','C','Q'), (0,1,2))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_data['Sex'] = train_data['Sex'].replace(('male','female'), (1,0))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['Sex'] = test_data['Sex'].replace(('male','female'), (1,0))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col

In [5]:
train_data = np.array(train_data).astype('float32')
train_data.shape

validation_data = train_data[:200]
train_data = train_data

In [6]:
train_data.shape, train_labels.shape, validation_data.shape, validation_labels.shape

((891, 5), (891,), (200, 5), (200,))

In [7]:
def initialize_weights(shape, dtype=None):
    
    return np.random.normal(loc = 0.0, scale = 1e-2, size = shape)

def initialize_bias(shape, dtype=None):
    
    return np.random.normal(loc = 0.5, scale = 1e-2, size = shape)

def DeepLearningModel(input_shape):
    model = Sequential()
    
    model.add(Input(input_shape))
    
    model.add(Dense(8, activation='relu', kernel_initializer=initialize_weights, bias_initializer=initialize_bias, kernel_regularizer=l1(2e-4)))
    
    model.add(Dense(16, activation='relu', kernel_initializer=initialize_weights, bias_initializer=initialize_bias, kernel_regularizer=l1(2e-4)))
    
    model.add(Dense(32, activation='relu', kernel_initializer=initialize_weights, bias_initializer=initialize_bias, kernel_regularizer=l1(2e-4)))
    
    model.add(Dense(16, activation='relu', kernel_initializer=initialize_weights, bias_initializer=initialize_bias, kernel_regularizer=l1(2e-4)))
    
    model.add(Dense(1, activation='sigmoid', kernel_initializer=initialize_weights, bias_initializer=initialize_bias, kernel_regularizer=l1(2e-4)))    
    
    return model

In [16]:
lr = 0.006
epochs = 30
batch_size = 32
optimizer = Adam(lr)

model = DeepLearningModel((5))
print(model.summary())
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(train_data, train_labels, epochs=epochs, batch_size=batch_size)

# model = GradientBoostingClassifier()
# model = model.fit(train_data, train_labels)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 8)                 48        
_________________________________________________________________
dense_11 (Dense)             (None, 16)                144       
_________________________________________________________________
dense_12 (Dense)             (None, 32)                544       
_________________________________________________________________
dense_13 (Dense)             (None, 16)                528       
_________________________________________________________________
dense_14 (Dense)             (None, 1)                 17        
Total params: 1,281
Trainable params: 1,281
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Ep

In [17]:
model.evaluate(validation_data, validation_labels)



[0.49847412109375, 0.7350000143051147]

In [None]:
def plot_data(history):
    plt.plot(history['accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    # summarize history for loss
    plt.plot(history['loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
plot_data(history.history)

In [18]:
pred = model.predict(test_data)
pred = np.round(pred)
pred = list(map(int,np.reshape(pred, (pred.shape[0]))))

In [19]:
submission = pd.DataFrame({
    "PassengerId": test["PassengerId"],
    "Survived": pred
})

In [20]:
submission.to_csv('./output/titanic_survivors.csv', index = False)