# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import gradient_descent
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

# Opening Dataset

In [2]:
train_data = pd.read_csv('./Data/train.csv', skipinitialspace = True, encoding = 'utf-8')
test_data = pd.read_csv('./Data/test.csv', skipinitialspace = True, encoding = 'utf-8')

In [3]:
train_data

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",female,29.0,1,0,228414,26.0000,,S,1
1,3,"Williams, Mr. Howard Hugh ""Harry""",male,,0,0,A/5 2466,8.0500,,S,0
2,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",male,39.0,0,0,250655,26.0000,,S,0
3,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",female,29.0,0,4,349909,21.0750,,S,0
4,3,"Sutehall, Mr. Henry Jr",male,25.0,0,0,SOTON/OQ 392076,7.0500,,S,0
...,...,...,...,...,...,...,...,...,...,...,...
663,2,"Ilett, Miss. Bertha",female,17.0,0,0,SO/C 14885,10.5000,,S,1
664,3,"Morrow, Mr. Thomas Rowan",male,,0,0,372622,7.7500,,Q,0
665,3,"Bing, Mr. Lee",male,32.0,0,0,1601,56.4958,,S,1
666,3,"Strandberg, Miss. Ida Sofia",female,22.0,0,0,7553,9.8375,,S,0


In [4]:
test_data

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,2,"Davies, Master. John Morgan Jr",male,8.0,1,1,C.A. 33112,36.7500,,S
1,1,"Leader, Dr. Alice (Farnham)",female,49.0,0,0,17465,25.9292,D17,S
2,3,"Kilgannon, Mr. Thomas J",male,,0,0,36865,7.7375,,Q
3,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Chr...",female,24.0,2,1,243847,27.0000,,S
4,1,"McGough, Mr. James Robert",male,36.0,0,0,PC 17473,26.2875,E25,S
...,...,...,...,...,...,...,...,...,...,...
218,3,"Lindqvist, Mr. Eino William",male,20.0,1,0,STON/O 2. 3101285,7.9250,,S
219,1,"Butt, Major. Archibald Willingham",male,45.0,0,0,113050,26.5500,B38,S
220,1,"Penasco y Castellana, Mrs. Victor de Satode (M...",female,17.0,1,0,PC 17758,108.9000,C65,C
221,3,"Holm, Mr. John Fredrik Alexander",male,43.0,0,0,C 7075,6.4500,,S


# Preprocessing Data

In [5]:
train_data.describe()

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Survived
count,668.0,536.0,668.0,668.0,668.0,668.0
mean,2.296407,29.70056,0.528443,0.407186,32.064552,0.402695
std,0.831638,14.240257,1.080327,0.854695,45.320835,0.490808
min,1.0,0.67,0.0,0.0,0.0,0.0
25%,2.0,21.0,0.0,0.0,7.925,0.0
50%,3.0,29.0,0.0,0.0,14.75,0.0
75%,3.0,38.25,1.0,0.0,31.275,1.0
max,3.0,80.0,8.0,6.0,512.3292,1.0


In [6]:
test_data.describe()

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare
count,223.0,178.0,223.0,223.0,223.0
mean,2.345291,29.694775,0.506726,0.304933,32.622551
std,0.850047,15.398053,1.1697,0.634108,61.062047
min,1.0,0.42,0.0,0.0,0.0
25%,2.0,19.25,0.0,0.0,7.8792
50%,3.0,27.0,0.0,0.0,12.475
75%,3.0,37.75,1.0,0.0,30.0354
max,3.0,71.0,8.0,2.0,512.3292


In [7]:
average_age = train_data.groupby('Pclass')['Age'].transform('mean')

gender_map = {'female': 1, 'male': 2}
train_data['Age'].fillna(average_age, inplace = True)
train_data['Sex'] = train_data['Sex'].map(gender_map)

most_common_embarked = train_data.Embarked[train_data.Embarked != np.nan].mode()[0]
train_data['Embarked'].fillna(most_common_embarked, inplace = True)
embarked_map = {'C': 1, 'Q': 2, 'S': 3}
train_data['Embarked'] = train_data['Embarked'].map(embarked_map)

In [8]:
train_data

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
0,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",1,29.000000,1,0,228414,26.0000,,3,1
1,3,"Williams, Mr. Howard Hugh ""Harry""",2,25.839423,0,0,A/5 2466,8.0500,,3,0
2,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",2,39.000000,0,0,250655,26.0000,,3,0
3,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",1,29.000000,0,4,349909,21.0750,,3,0
4,3,"Sutehall, Mr. Henry Jr",2,25.000000,0,0,SOTON/OQ 392076,7.0500,,3,0
...,...,...,...,...,...,...,...,...,...,...,...
663,2,"Ilett, Miss. Bertha",1,17.000000,0,0,SO/C 14885,10.5000,,3,1
664,3,"Morrow, Mr. Thomas Rowan",2,25.839423,0,0,372622,7.7500,,2,0
665,3,"Bing, Mr. Lee",2,32.000000,0,0,1601,56.4958,,3,1
666,3,"Strandberg, Miss. Ida Sofia",1,22.000000,0,0,7553,9.8375,,3,0


In [9]:
train_data = train_data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked', 'Fare', 'Survived']]
train_data.describe()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Embarked,Fare,Survived
count,668.0,668.0,668.0,668.0,668.0,668.0,668.0,668.0
mean,2.296407,1.639222,29.381738,0.528443,0.407186,2.526946,32.064552,0.402695
std,0.831638,0.480586,12.90993,1.080327,0.854695,0.805263,45.320835,0.490808
min,1.0,1.0,0.67,0.0,0.0,1.0,0.0,0.0
25%,2.0,1.0,23.0,0.0,0.0,2.0,7.925,0.0
50%,3.0,2.0,27.0,0.0,0.0,3.0,14.75,0.0
75%,3.0,2.0,37.0,1.0,0.0,3.0,31.275,1.0
max,3.0,2.0,80.0,8.0,6.0,3.0,512.3292,1.0


In [10]:
average_age = test_data.groupby('Pclass')['Age'].transform('mean')

gender_map = {'female': 1, 'male': 2}
test_data['Age'].fillna(average_age, inplace = True)
test_data['Sex'] = test_data['Sex'].map(gender_map)

most_common_embarked = test_data.Embarked.mode()[0]
test_data['Embarked'].fillna(most_common_embarked, inplace = True)
embarked_map = {'C': 1, 'Q': 2, 'S': 3}
test_data['Embarked'] = test_data['Embarked'].map(embarked_map)

In [11]:
test_data = test_data[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare','Embarked']]
test_data.describe()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
count,223.0,223.0,223.0,223.0,223.0,223.0,223.0
mean,2.345291,1.672646,28.996404,0.506726,0.304933,32.622551,2.565022
std,0.850047,0.470303,14.13723,1.1697,0.634108,61.062047,0.749796
min,1.0,1.0,0.42,0.0,0.0,0.0,1.0
25%,2.0,1.0,22.0,0.0,0.0,7.8792,2.0
50%,3.0,2.0,24.0,0.0,0.0,12.475,3.0
75%,3.0,2.0,36.0,1.0,0.0,30.0354,3.0
max,3.0,2.0,71.0,8.0,2.0,512.3292,3.0


# Splitting the Data into X and Y

In [12]:
x = np.array(train_data.iloc[:, :-1])
y = np.array(train_data.iloc[:,-1])

In [13]:
x

array([[ 2.        ,  1.        , 29.        , ...,  0.        ,
         3.        , 26.        ],
       [ 3.        ,  2.        , 25.83942308, ...,  0.        ,
         3.        ,  8.05      ],
       [ 2.        ,  2.        , 39.        , ...,  0.        ,
         3.        , 26.        ],
       ...,
       [ 3.        ,  2.        , 32.        , ...,  0.        ,
         3.        , 56.4958    ],
       [ 3.        ,  1.        , 22.        , ...,  0.        ,
         3.        ,  9.8375    ],
       [ 3.        ,  1.        , 25.83942308, ...,  0.        ,
         2.        , 15.5       ]])

In [14]:
y

array([1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1,
       0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1,
       1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1,

# Scaling the Data

In [15]:
sc = StandardScaler()

In [16]:
x = sc.fit_transform(x)
x

array([[-0.35668092, -1.33108386, -0.02959151, ..., -0.47676762,
         0.58789299, -0.13391404],
       [ 0.84666683,  0.75126747, -0.27459247, ..., -0.47676762,
         0.58789299, -0.5302759 ],
       [-0.35668092,  0.75126747,  0.74558647, ..., -0.47676762,
         0.58789299, -0.13391404],
       ...,
       [ 0.84666683,  0.75126747,  0.20296189, ..., -0.47676762,
         0.58789299,  0.53947717],
       [ 0.84666683, -1.33108386, -0.57221609, ..., -0.47676762,
         0.58789299, -0.49080533],
       [ 0.84666683, -1.33108386, -0.27459247, ..., -0.47676762,
        -0.65486814, -0.36576917]])

# Splitting the Training Data into Train and Test

In [17]:
x_train, x_test, y_train, y_test = train_test_split(x, y) 

In [18]:
x_train.shape

(501, 7)

In [19]:
x_test.shape

(167, 7)

In [20]:
y_train.shape

(501,)

In [21]:
y_test.shape

(167,)

# Training the Algorithm

In [22]:
inbuild_algo = linear_model.LogisticRegression()
created_algo = gradient_descent.LogisticRegression(iterations = 10000, tolerance = 0.000001)

In [23]:
inbuild_algo.fit(x_train, y_train)

In [24]:
created_algo.fit(x_train, y_train)

# Testing and Comparing the Algorithm

In [25]:
print(inbuild_algo.score(x_train, y_train))

0.7864271457085829


In [26]:
print(created_algo.score(x_train, y_train))

0.7864271457085829


In [27]:
print(inbuild_algo.score(x_test, y_test))

0.8143712574850299


In [28]:
print(created_algo.score(x_test, y_test))

0.8143712574850299


In [29]:
y_train_pred = created_algo.predict(x_train)
print('Report for Created Algo:')
print(confusion_matrix(y_train, y_train_pred))
print(classification_report(y_train, y_train_pred))

y_train_pred = inbuild_algo.predict(x_train)
print('Report for Inbuild Algo:')
print(confusion_matrix(y_train, y_train_pred))
print(classification_report(y_train, y_train_pred))

Report for Created Algo:
[[252  48]
 [ 59 142]]
              precision    recall  f1-score   support

           0       0.81      0.84      0.82       300
           1       0.75      0.71      0.73       201

    accuracy                           0.79       501
   macro avg       0.78      0.77      0.78       501
weighted avg       0.79      0.79      0.79       501

Report for Inbuild Algo:
[[252  48]
 [ 59 142]]
              precision    recall  f1-score   support

           0       0.81      0.84      0.82       300
           1       0.75      0.71      0.73       201

    accuracy                           0.79       501
   macro avg       0.78      0.77      0.78       501
weighted avg       0.79      0.79      0.79       501



In [30]:
y_test_pred = created_algo.predict(x_test)
print('Report for Created Algo:')
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred))

y_test_pred = created_algo.predict(x_test)
print('Report for Inbuild Algo:')
print(confusion_matrix(y_test, y_test_pred))
print(classification_report(y_test, y_test_pred))

Report for Created Algo:
[[88 11]
 [20 48]]
              precision    recall  f1-score   support

           0       0.81      0.89      0.85        99
           1       0.81      0.71      0.76        68

    accuracy                           0.81       167
   macro avg       0.81      0.80      0.80       167
weighted avg       0.81      0.81      0.81       167

Report for Inbuild Algo:
[[88 11]
 [20 48]]
              precision    recall  f1-score   support

           0       0.81      0.89      0.85        99
           1       0.81      0.71      0.76        68

    accuracy                           0.81       167
   macro avg       0.81      0.80      0.80       167
weighted avg       0.81      0.81      0.81       167



In [31]:
print(created_algo.predict_proba(x_train))

[[0.85073321 0.14926679]
 [0.54720747 0.45279253]
 [0.54463122 0.45536878]
 ...
 [0.19041988 0.80958012]
 [0.4353016  0.5646984 ]
 [0.88198089 0.11801911]]


In [32]:
print(inbuild_algo.predict_proba(x_train))

[[0.84881034 0.15118966]
 [0.55119339 0.44880661]
 [0.54834975 0.45165025]
 ...
 [0.19692937 0.80307063]
 [0.4392213  0.5607787 ]
 [0.8793072  0.1206928 ]]


In [33]:
print(created_algo.predict_proba(x_test))

[[0.64113494 0.35886506]
 [0.87716685 0.12283315]
 [0.05109952 0.94890048]
 [0.04241881 0.95758119]
 [0.60300722 0.39699278]
 [0.86181729 0.13818271]
 [0.3846273  0.6153727 ]
 [0.89947316 0.10052684]
 [0.87183218 0.12816782]
 [0.36268673 0.63731327]
 [0.87179801 0.12820199]
 [0.74155144 0.25844856]
 [0.81493815 0.18506185]
 [0.10678572 0.89321428]
 [0.86711423 0.13288577]
 [0.38436425 0.61563575]
 [0.93892664 0.06107336]
 [0.9246633  0.0753367 ]
 [0.89000759 0.10999241]
 [0.90430313 0.09569687]
 [0.91867807 0.08132193]
 [0.93352351 0.06647649]
 [0.96641858 0.03358142]
 [0.39185737 0.60814263]
 [0.89030516 0.10969484]
 [0.02834613 0.97165387]
 [0.64191455 0.35808545]
 [0.27714347 0.72285653]
 [0.89031167 0.10968833]
 [0.89044391 0.10955609]
 [0.8563787  0.1436213 ]
 [0.54214442 0.45785558]
 [0.6682125  0.3317875 ]
 [0.75427995 0.24572005]
 [0.96913229 0.03086771]
 [0.66374678 0.33625322]
 [0.89869892 0.10130108]
 [0.9731419  0.0268581 ]
 [0.6328079  0.3671921 ]
 [0.43306935 0.56693065]


In [34]:
print(inbuild_algo.predict_proba(x_test))

[[0.64470485 0.35529515]
 [0.87458602 0.12541398]
 [0.05433931 0.94566069]
 [0.04622809 0.95377191]
 [0.59621943 0.40378057]
 [0.85959407 0.14040593]
 [0.38316551 0.61683449]
 [0.89650388 0.10349612]
 [0.86837655 0.13162345]
 [0.3671535  0.6328465 ]
 [0.86833792 0.13166208]
 [0.73693001 0.26306999]
 [0.81203302 0.18796698]
 [0.11183449 0.88816551]
 [0.86476144 0.13523856]
 [0.386603   0.613397  ]
 [0.93580171 0.06419829]
 [0.92150238 0.07849762]
 [0.88713684 0.11286316]
 [0.90011885 0.09988115]
 [0.91522679 0.08477321]
 [0.93036554 0.06963446]
 [0.96326825 0.03673175]
 [0.39676996 0.60323004]
 [0.88747324 0.11252676]
 [0.03113957 0.96886043]
 [0.63876448 0.36123552]
 [0.28359565 0.71640435]
 [0.88748059 0.11251941]
 [0.88628803 0.11371197]
 [0.85430114 0.14569886]
 [0.5483509  0.4516491 ]
 [0.66079019 0.33920981]
 [0.75330219 0.24669781]
 [0.96640537 0.03359463]
 [0.6576231  0.3423769 ]
 [0.88771831 0.11228169]
 [0.97079554 0.02920446]
 [0.63336798 0.36663202]
 [0.44130194 0.55869806]
