In [1]:
# Importing the needed libraries

# Libraries for deep learning
import tensorflow as tf
import tensorflow.keras as keras
from keras.models import Sequential
from keras.layers import Dense

# Libraries for preprocessing
from sklearn.preprocessing import StandardScaler

#Libraries for data analysis
import pandas as pd
import seaborn as sns
import numpy as np


In [2]:
# Reading in the dataset
titanic=sns.load_dataset('titanic')

In [3]:
# viewing the first 5 rows of the dataset
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [4]:
# dropping some columns not needed for the analysis
titanic= titanic.drop(['embark_town','deck','who','adult_male','alive','alone'], axis=1)

In [5]:
# Replacing some catego
titanic.replace({'male':0, 'female':1,'no':0,'yes':1,False:0,True:1},inplace= True)


In [6]:
titanic.isna().sum()

survived      0
pclass        0
sex           0
age         177
sibsp         0
parch         0
fare          0
embarked      2
class         0
dtype: int64

In [7]:
titanic[(titanic['age'].isna()) & (titanic['sex']==1)]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class
19,1,3,1,,0,0,7.225,C,Third
28,1,3,1,,0,0,7.8792,Q,Third
31,1,1,1,,1,0,146.5208,C,First
32,1,3,1,,0,0,7.75,Q,Third
47,1,3,1,,0,0,7.75,Q,Third
82,1,3,1,,0,0,7.7875,Q,Third
109,1,3,1,,1,0,24.15,Q,Third
128,1,3,1,,1,1,22.3583,C,Third
140,0,3,1,,0,2,15.2458,C,Third
166,1,1,1,,0,1,55.0,S,First


In [8]:
titanic[(titanic['age'].isna()) & (titanic['sex']==0)]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class
5,0,3,0,,0,0,8.4583,Q,Third
17,1,2,0,,0,0,13.0000,S,Second
26,0,3,0,,0,0,7.2250,C,Third
29,0,3,0,,0,0,7.8958,S,Third
36,1,3,0,,0,0,7.2292,C,Third
...,...,...,...,...,...,...,...,...,...
839,1,1,0,,0,0,29.7000,C,First
846,0,3,0,,8,2,69.5500,S,Third
859,0,3,0,,0,0,7.2292,C,Third
868,0,3,0,,0,0,9.5000,S,Third


In [9]:
age_f= titanic[(titanic['sex']==1) & (titanic['class']=='Third')]['age'].mode()[0]
age_f

18.0

In [10]:
titanic[(titanic['age'].isna()) & (titanic['sex']==0)]['class'].value_counts()

Third     94
First     21
Second     9
Name: class, dtype: int64

In [11]:
age_m= titanic[(titanic['sex']==0) & (titanic['class']=='Third')]['age'].mode()[0]
age_m

22.0

In [12]:
#titanic.mask?

In [13]:
cond = (titanic['age'].isna()) & (titanic['sex']== 1)
titanic['age'].mask(cond,age_f,inplace=True)


In [14]:
cond_m = (titanic['age'].isna()) & (titanic['sex']== 0)
titanic['age'].mask(cond_m,age_m,inplace=True)


In [15]:
titanic['embarked'].value_counts()

S    644
C    168
Q     77
Name: embarked, dtype: int64

In [16]:
titanic[titanic['embarked'].isna()]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class
61,1,1,1,38.0,0,0,80.0,,First
829,1,1,1,62.0,0,0,80.0,,First


In [17]:
cond_emb = titanic['embarked'].isna()
titanic['embarked'].mask(cond_emb,'S',inplace=True)

In [18]:
titanic.isna().sum()

survived    0
pclass      0
sex         0
age         0
sibsp       0
parch       0
fare        0
embarked    0
class       0
dtype: int64

In [19]:
titanic= pd.get_dummies(data=titanic,columns=['embarked','class'],drop_first= True)

In [20]:
X = titanic.drop('survived',axis = 1)
y=titanic['survived']

In [21]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   pclass        891 non-null    int64  
 1   sex           891 non-null    int64  
 2   age           891 non-null    float64
 3   sibsp         891 non-null    int64  
 4   parch         891 non-null    int64  
 5   fare          891 non-null    float64
 6   embarked_Q    891 non-null    uint8  
 7   embarked_S    891 non-null    uint8  
 8   class_Second  891 non-null    uint8  
 9   class_Third   891 non-null    uint8  
dtypes: float64(2), int64(4), uint8(4)
memory usage: 45.4 KB


In [22]:
X.shape

(891, 10)

In [23]:
model = Sequential([
    Dense(3,activation='relu',input_shape=(10,)),
    Dense(1,activation='sigmoid')
])


In [24]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 3)                 33        
                                                                 
 dense_1 (Dense)             (None, 1)                 4         
                                                                 
Total params: 37
Trainable params: 37
Non-trainable params: 0
_________________________________________________________________


In [25]:
model.compile(optimizer='adam',
             loss ='binary_crossentropy',
             metrics= ['accuracy'])

In [26]:
scaler = StandardScaler()
X= scaler.fit_transform(X)

In [27]:
model.fit(X,y,validation_split=0.3,batch_size =1, epochs= 10, verbose=2);

Epoch 1/10
623/623 - 7s - loss: 0.8937 - accuracy: 0.4494 - val_loss: 0.8244 - val_accuracy: 0.5112 - 7s/epoch - 11ms/step
Epoch 2/10
623/623 - 5s - loss: 0.6828 - accuracy: 0.6100 - val_loss: 0.6559 - val_accuracy: 0.6567 - 5s/epoch - 8ms/step
Epoch 3/10
623/623 - 5s - loss: 0.5786 - accuracy: 0.7432 - val_loss: 0.5617 - val_accuracy: 0.7575 - 5s/epoch - 9ms/step
Epoch 4/10
623/623 - 5s - loss: 0.5201 - accuracy: 0.7929 - val_loss: 0.5103 - val_accuracy: 0.7687 - 5s/epoch - 8ms/step
Epoch 5/10
623/623 - 5s - loss: 0.4916 - accuracy: 0.8026 - val_loss: 0.4845 - val_accuracy: 0.7836 - 5s/epoch - 7ms/step
Epoch 6/10
623/623 - 5s - loss: 0.4760 - accuracy: 0.8138 - val_loss: 0.4658 - val_accuracy: 0.8022 - 5s/epoch - 8ms/step
Epoch 7/10
623/623 - 4s - loss: 0.4664 - accuracy: 0.8090 - val_loss: 0.4518 - val_accuracy: 0.8060 - 4s/epoch - 7ms/step
Epoch 8/10
623/623 - 5s - loss: 0.4573 - accuracy: 0.8138 - val_loss: 0.4403 - val_accuracy: 0.8172 - 5s/epoch - 8ms/step
Epoch 9/10
623/623 - 5s

In [28]:
model.save('titanic_classification.h5')

In [29]:
y[:5]

0    0
1    1
2    1
3    1
4    0
Name: survived, dtype: int64

In [30]:
model.predict(X[:5])



array([[0.12890378],
       [0.9266609 ],
       [0.45091113],
       [0.90925956],
       [0.10633822]], dtype=float32)

In [31]:
model.predict(X[:5])



array([[0.12890378],
       [0.9266609 ],
       [0.45091113],
       [0.90925956],
       [0.10633822]], dtype=float32)

In [32]:
model1 = Sequential([
    Dense(3,activation='relu',input_shape=(10,)),
    Dense(2,activation='softmax')
])


In [33]:
model1.compile(optimizer = 'adam',
              loss= 'categorical_crossentropy',
              metrics= 'accuracy')


In [34]:
from tensorflow.keras.utils import to_categorical
y1 = to_categorical(y,2)
y1

array([[1., 0.],
       [0., 1.],
       [0., 1.],
       ...,
       [1., 0.],
       [0., 1.],
       [1., 0.]], dtype=float32)

In [35]:
print(X.shape)
print(y1.shape)

(891, 10)
(891, 2)


In [36]:
model1.fit(X,y1,validation_split= 0.3,batch_size=2,epochs =10, verbose = 2)

Epoch 1/10
312/312 - 3s - loss: 0.8785 - accuracy: 0.4992 - val_loss: 0.8051 - val_accuracy: 0.5746 - 3s/epoch - 11ms/step
Epoch 2/10
312/312 - 3s - loss: 0.7338 - accuracy: 0.6003 - val_loss: 0.6939 - val_accuracy: 0.6231 - 3s/epoch - 9ms/step
Epoch 3/10
312/312 - 2s - loss: 0.6559 - accuracy: 0.6292 - val_loss: 0.6288 - val_accuracy: 0.6455 - 2s/epoch - 7ms/step
Epoch 4/10
312/312 - 3s - loss: 0.6091 - accuracy: 0.6629 - val_loss: 0.5837 - val_accuracy: 0.6828 - 3s/epoch - 9ms/step
Epoch 5/10
312/312 - 2s - loss: 0.5737 - accuracy: 0.7223 - val_loss: 0.5496 - val_accuracy: 0.7276 - 2s/epoch - 6ms/step
Epoch 6/10
312/312 - 1s - loss: 0.5434 - accuracy: 0.7721 - val_loss: 0.5204 - val_accuracy: 0.7612 - 1s/epoch - 4ms/step
Epoch 7/10
312/312 - 1s - loss: 0.5180 - accuracy: 0.7897 - val_loss: 0.4971 - val_accuracy: 0.7836 - 1s/epoch - 4ms/step
Epoch 8/10
312/312 - 2s - loss: 0.4958 - accuracy: 0.7994 - val_loss: 0.4817 - val_accuracy: 0.7724 - 2s/epoch - 8ms/step
Epoch 9/10
312/312 - 3s

<keras.callbacks.History at 0x20074006f10>

In [37]:
y1[:5]

array([[1., 0.],
       [0., 1.],
       [0., 1.],
       [0., 1.],
       [1., 0.]], dtype=float32)

In [38]:
model1.predict(X[:5])



array([[0.72357625, 0.27642378],
       [0.06943174, 0.9305682 ],
       [0.66162705, 0.33837295],
       [0.12226231, 0.87773764],
       [0.9054077 , 0.09459232]], dtype=float32)

In [39]:
model2 = Sequential([
    Dense(3,activation='relu',input_shape=(10,)),
    Dense(1,activation='sigmoid')
])


In [40]:
models = ['sgd','adam','rmsprop']

In [41]:
predictions = {}
all_history = {}
for i in models:
    model2.compile(optimizer = i, loss = 'binary_crossentropy',metrics='accuracy')
    print(f'Model using  {i} optimizer')
    history= model2.fit(X,y,validation_split= 0.3,epochs= 20, verbose = 2)
    all_history[i] = history
    predictions[i] = model2.predict(X[:5])

Model using  sgd optimizer
Epoch 1/20
20/20 - 1s - loss: 0.9386 - accuracy: 0.4061 - val_loss: 0.8783 - val_accuracy: 0.4104 - 1s/epoch - 68ms/step
Epoch 2/20
20/20 - 0s - loss: 0.8934 - accuracy: 0.4189 - val_loss: 0.8411 - val_accuracy: 0.4067 - 200ms/epoch - 10ms/step
Epoch 3/20
20/20 - 0s - loss: 0.8575 - accuracy: 0.4254 - val_loss: 0.8117 - val_accuracy: 0.4104 - 284ms/epoch - 14ms/step
Epoch 4/20
20/20 - 0s - loss: 0.8283 - accuracy: 0.4430 - val_loss: 0.7877 - val_accuracy: 0.4366 - 291ms/epoch - 15ms/step
Epoch 5/20
20/20 - 0s - loss: 0.8044 - accuracy: 0.4575 - val_loss: 0.7673 - val_accuracy: 0.4440 - 255ms/epoch - 13ms/step
Epoch 6/20
20/20 - 0s - loss: 0.7844 - accuracy: 0.4623 - val_loss: 0.7512 - val_accuracy: 0.4590 - 297ms/epoch - 15ms/step
Epoch 7/20
20/20 - 0s - loss: 0.7685 - accuracy: 0.4767 - val_loss: 0.7371 - val_accuracy: 0.4739 - 280ms/epoch - 14ms/step
Epoch 8/20
20/20 - 0s - loss: 0.7543 - accuracy: 0.4831 - val_loss: 0.7251 - val_accuracy: 0.4813 - 264ms/ep



In [42]:
sdg_hist = all_history['sgd']
max(sdg_hist.history['accuracy'])

0.6324237585067749

In [43]:
adam_hist = all_history['adam']
max(adam_hist.history['accuracy'])

0.6998394727706909

In [44]:
rms_hist = all_history['rmsprop']
max(rms_hist.history['accuracy'])

0.7624397873878479

In [45]:
sgd_pred = predictions['sgd']
sgd_pred

array([[0.4465636 ],
       [0.4465636 ],
       [0.41310266],
       [0.40555832],
       [0.4193108 ]], dtype=float32)

In [46]:
adam_pred = predictions['adam']
adam_pred

array([[0.31698015],
       [0.44374308],
       [0.27394226],
       [0.42818198],
       [0.18894742]], dtype=float32)

In [47]:
rms_pred = predictions['rmsprop']
rms_pred

array([[0.22102994],
       [0.73766696],
       [0.27749017],
       [0.69362193],
       [0.11107818]], dtype=float32)