# ANN - Single Layer Perceptron
It means the neural network only have two layers

- input layer
- output layer

In [285]:
# Importing Necessary Libraries
import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.activations import leaky_relu, sigmoid


# New Section

## 1. Loading Dataset

In [259]:
# Reading file
df = pd.read_csv('/content/Titanic-Dataset.csv')


## 2. Data Exploration & Statistics

In [260]:
# Checking first 5 rows of Dataset
df.head(5)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [261]:
# Checking the last 5 rows of dataset
df.tail(5)


Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [262]:
# Checking all the column data-types and non-null values.
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [263]:
# Checking stasts
df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


## 3. Data Preprocessing

In [264]:
# Checking nulls in each column
df.isnull().sum().sort_values(ascending=False)


Unnamed: 0,0
Cabin,687
Age,177
Embarked,2
PassengerId,0
Name,0
Pclass,0
Survived,0
Sex,0
Parch,0
SibSp,0


In [265]:
# Removing Cabin column because it has many null values and PassengerId is not needed
df = df.drop(['PassengerId',	'Cabin'], axis = 1)
df


Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
0,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,S
1,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C
2,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,S
3,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,S
4,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,S
...,...,...,...,...,...,...,...,...,...,...
886,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,S
887,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,S
888,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,S
889,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C


### Simple Imputer

In [266]:
# Filling null values using Simple Imputer, strategy mean and most frequent
# Using mean for age as its Datatype is float
si = SimpleImputer(strategy = "mean")
df['Age'] = si.fit_transform(df[['Age']])


In [267]:
# Using most_frequent for emarked column as it's datatype is object.
si = SimpleImputer(strategy = "most_frequent")
df['Embarked'] = si.fit_transform(df[['Embarked']])[:, 0]


In [268]:
# Checking Null values again
df.isnull().sum()

# Nomore Null values.

Unnamed: 0,0
Survived,0
Pclass,0
Name,0
Sex,0
Age,0
SibSp,0
Parch,0
Ticket,0
Fare,0
Embarked,0


In [269]:
# Checking Duplicates
df.duplicated().sum()

# No duplicated Rows.

np.int64(0)

### Encoding

In [270]:
# Checking our dataset
df


Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
0,0,3,"Braund, Mr. Owen Harris",male,22.000000,1,0,A/5 21171,7.2500,S
1,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.000000,1,0,PC 17599,71.2833,C
2,1,3,"Heikkinen, Miss. Laina",female,26.000000,0,0,STON/O2. 3101282,7.9250,S
3,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.000000,1,0,113803,53.1000,S
4,0,3,"Allen, Mr. William Henry",male,35.000000,0,0,373450,8.0500,S
...,...,...,...,...,...,...,...,...,...,...
886,0,2,"Montvila, Rev. Juozas",male,27.000000,0,0,211536,13.0000,S
887,1,1,"Graham, Miss. Margaret Edith",female,19.000000,0,0,112053,30.0000,S
888,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,29.699118,1,2,W./C. 6607,23.4500,S
889,1,1,"Behr, Mr. Karl Howell",male,26.000000,0,0,111369,30.0000,C


#### 1. Label Encoding

In [271]:
# Using labelencoder for sex  column
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])
df


Unnamed: 0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked
0,0,3,"Braund, Mr. Owen Harris",1,22.000000,1,0,A/5 21171,7.2500,S
1,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",0,38.000000,1,0,PC 17599,71.2833,C
2,1,3,"Heikkinen, Miss. Laina",0,26.000000,0,0,STON/O2. 3101282,7.9250,S
3,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",0,35.000000,1,0,113803,53.1000,S
4,0,3,"Allen, Mr. William Henry",1,35.000000,0,0,373450,8.0500,S
...,...,...,...,...,...,...,...,...,...,...
886,0,2,"Montvila, Rev. Juozas",1,27.000000,0,0,211536,13.0000,S
887,1,1,"Graham, Miss. Margaret Edith",0,19.000000,0,0,112053,30.0000,S
888,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",0,29.699118,1,2,W./C. 6607,23.4500,S
889,1,1,"Behr, Mr. Karl Howell",1,26.000000,0,0,111369,30.0000,C


In [272]:
# Using encoding by pd.get_dummies For Name, Ticket and Embarked Column
df = pd.get_dummies(df, columns=['Name', 'Ticket', 'Embarked'])
df


Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,"Name_Abbing, Mr. Anthony","Name_Abbott, Mr. Rossmore Edward","Name_Abbott, Mrs. Stanton (Rosa Hunt)",...,Ticket_W./C. 14263,Ticket_W./C. 6607,Ticket_W./C. 6608,Ticket_W./C. 6609,Ticket_W.E.P. 5734,Ticket_W/C 14208,Ticket_WE/P 5735,Embarked_C,Embarked_Q,Embarked_S
0,0,3,1,22.000000,1,0,7.2500,False,False,False,...,False,False,False,False,False,False,False,False,False,True
1,1,1,0,38.000000,1,0,71.2833,False,False,False,...,False,False,False,False,False,False,False,True,False,False
2,1,3,0,26.000000,0,0,7.9250,False,False,False,...,False,False,False,False,False,False,False,False,False,True
3,1,1,0,35.000000,1,0,53.1000,False,False,False,...,False,False,False,False,False,False,False,False,False,True
4,0,3,1,35.000000,0,0,8.0500,False,False,False,...,False,False,False,False,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,1,27.000000,0,0,13.0000,False,False,False,...,False,False,False,False,False,False,False,False,False,True
887,1,1,0,19.000000,0,0,30.0000,False,False,False,...,False,False,False,False,False,False,False,False,False,True
888,0,3,0,29.699118,1,2,23.4500,False,False,False,...,False,True,False,False,False,False,False,False,False,True
889,1,1,1,26.000000,0,0,30.0000,False,False,False,...,False,False,False,False,False,False,False,True,False,False


## 4. Feature & Target Selection

In [273]:
# Selecting all columns for feature except Survived column.
x = df. drop(['Survived'], axis=1)
x


Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,"Name_Abbing, Mr. Anthony","Name_Abbott, Mr. Rossmore Edward","Name_Abbott, Mrs. Stanton (Rosa Hunt)","Name_Abelson, Mr. Samuel",...,Ticket_W./C. 14263,Ticket_W./C. 6607,Ticket_W./C. 6608,Ticket_W./C. 6609,Ticket_W.E.P. 5734,Ticket_W/C 14208,Ticket_WE/P 5735,Embarked_C,Embarked_Q,Embarked_S
0,3,1,22.000000,1,0,7.2500,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
1,1,0,38.000000,1,0,71.2833,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
2,3,0,26.000000,0,0,7.9250,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
3,1,0,35.000000,1,0,53.1000,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
4,3,1,35.000000,0,0,8.0500,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,2,1,27.000000,0,0,13.0000,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
887,1,0,19.000000,0,0,30.0000,False,False,False,False,...,False,False,False,False,False,False,False,False,False,True
888,3,0,29.699118,1,2,23.4500,False,False,False,False,...,False,True,False,False,False,False,False,False,False,True
889,1,1,26.000000,0,0,30.0000,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False


In [274]:
# Taking Survived Column as Target
y = df['Survived']
y


Unnamed: 0,Survived
0,0
1,1
2,1
3,1
4,0
...,...
886,0
887,1
888,0
889,1


## 5. Standard Scaling

In [275]:
# Values of some Columns are greater than the other so the system might think they are more important.
# Need to scale them to give equal importance.
scaler = StandardScaler()
x = scaler.fit_transform(x)
x


array([[ 0.82737724,  0.73769513, -0.5924806 , ..., -0.48204268,
        -0.30756234,  0.61583843],
       [-1.56610693, -1.35557354,  0.63878901, ...,  2.0745051 ,
        -0.30756234, -1.62380254],
       [ 0.82737724, -1.35557354, -0.2846632 , ..., -0.48204268,
        -0.30756234,  0.61583843],
       ...,
       [ 0.82737724, -1.35557354,  0.        , ..., -0.48204268,
        -0.30756234,  0.61583843],
       [-1.56610693,  0.73769513, -0.2846632 , ...,  2.0745051 ,
        -0.30756234, -1.62380254],
       [ 0.82737724,  0.73769513,  0.17706291, ..., -0.48204268,
         3.25137334, -1.62380254]])

## 6. Splitting ( Train & Test )

In [276]:
# Splitting into training and testing datatset
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)


In [277]:
# Checking the size of training and testing data
print(X_train.shape)
print(X_test.shape)


(712, 1581)
(179, 1581)


## 7. Model Building

In [278]:
# Building a single layer ANN model.
# Sequential() - creating a model where layers are stacked one after another.
# Dense(10, activation=leaky_relu, input_shape=(X_train.shape[1],))-  the first layer.
# activation=leaky_relu: This tells each neuron how to react to the data.
# input_shape=(X_train.shape[1],): This tells the model that your data has features as all the columns in X_train.
# last one has 1 neuron (just one output). activation=sigmoid: This gives an output between 0 and 1.
model = Sequential([
    Dense( 10, activation='relu', input_shape=(X_train.shape[1],)),
    Dense( 1, activation = 'sigmoid' )
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## 8. Model Summary

In [279]:
# Checking summary of our Model.
model.summary()


## 9. Model Compilation

In [280]:
# Compiling our Model.
# loss='binary_crossentropy' - This tells the model how to measure its mistakes.
# optimizer='adam'. "Adam" is the method the model uses to improve itself step by step.
# It adjusts the internal numbers (weights) to get better at making predictions.
# metrics=['accuracy'] - how well the model is doing in terms of correct predictions.
model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)


## 10. Model Training

In [281]:
# Training our single layered ANN model.
# epochs=10. The model will go through the training data 10 times.
# batch_size=32. The model learns from 32 rows at a time.
history = model.fit(
    X_train, y_train,
    epochs = 10,
    batch_size = 32,
    verbose = 1
)


Epoch 1/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4650 - loss: 0.8105
Epoch 2/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6589 - loss: 0.6021
Epoch 3/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7775 - loss: 0.4678
Epoch 4/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8853 - loss: 0.3483
Epoch 5/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9366 - loss: 0.2768
Epoch 6/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9628 - loss: 0.2276
Epoch 7/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9827 - loss: 0.1656
Epoch 8/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9748 - loss: 0.1560
Epoch 9/10
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

## 11. Model Prediction

In [282]:
# Prediction of Model
y_pred = model.predict(X_test)
y_pred


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


array([[0.48274213],
       [0.68619466],
       [0.46079767],
       [0.8840038 ],
       [0.8928452 ],
       [0.91691494],
       [0.6681259 ],
       [0.6370841 ],
       [0.72763896],
       [0.7654737 ],
       [0.5009792 ],
       [0.53604406],
       [0.27330357],
       [0.90238124],
       [0.4404514 ],
       [0.90841424],
       [0.24679978],
       [0.5352597 ],
       [0.26318866],
       [0.28183633],
       [0.24638075],
       [0.18762419],
       [0.66027313],
       [0.09598597],
       [0.36259395],
       [0.35081172],
       [0.5170427 ],
       [0.45010877],
       [0.24328211],
       [0.7514174 ],
       [0.47166947],
       [0.8015766 ],
       [0.7391518 ],
       [0.6559237 ],
       [0.09165455],
       [0.84691435],
       [0.55590606],
       [0.8104714 ],
       [0.40076777],
       [0.30986398],
       [0.03130935],
       [0.7741741 ],
       [0.39114532],
       [0.29451972],
       [0.5069871 ],
       [0.25421655],
       [0.63017017],
       [0.806

In [283]:
# If a prediction is more than 0.5, calling it a 1 (yes), otherwise calling it a 0 (no)."
predictions = np.where(y_pred > 0.5, 1, 0)
predictions


array([[0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [1],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
    

## 11. Model Evaluation

In [284]:
# Checking loss and accuracy
loss, accuracy = model.evaluate(X_train, y_train)
print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9922 - loss: 0.0812  
Loss: 0.08822926133871078
Accuracy: 0.9887640476226807
