In [1]:
import numpy as np
import pandas as pd
import math

In [2]:
df=pd.read_csv("titanic.csv")
print(f"Given data is: \n {df}")

Given data is: 
      Survived  (Intercept)  Sexmale   Age  SibSp  Parch     Fare
0           0            1        1  22.0      1      0   7.2500
1           1            1        0  38.0      1      0  71.2833
2           1            1        0  26.0      0      0   7.9250
3           1            1        0  35.0      1      0  53.1000
4           0            1        1  35.0      0      0   8.0500
..        ...          ...      ...   ...    ...    ...      ...
707         0            1        0  39.0      0      5  29.1250
708         0            1        1  27.0      0      0  13.0000
709         1            1        0  19.0      0      0  30.0000
710         1            1        1  26.0      0      0  30.0000
711         0            1        1  32.0      0      0   7.7500

[712 rows x 7 columns]


In [3]:
indexes = ['Age', 'SibSp', 'Parch', 'Fare']
mx = dict({'Age': 0, 'SibSp': 0, 'Parch': 0, 'Fare': 0})
mn = dict({'Age': 0, 'SibSp': 0, 'Parch': 0, 'Fare': 0})

In [4]:
for key in indexes:
    mx[key] = np.max(df[key])
    mn[key] = np.min(df[key])
print(f"Maximum element in unfit columns are: \n {mx}")
print(f"Minimum element in unfit columns are: \n {mn}")

Maximum element in unfit columns are: 
 {'Age': 80.0, 'SibSp': 5, 'Parch': 6, 'Fare': 512.3292}
Minimum element in unfit columns are: 
 {'Age': 0.42, 'SibSp': 0, 'Parch': 0, 'Fare': 0.0}


In [5]:
for key in indexes:
    df[key] = (df[key] - mn[key]) / (mx[key] - mn[key])
print(f"Normalized data is: \n {df}\n")

Normalized data is: 
      Survived  (Intercept)  Sexmale       Age  SibSp     Parch      Fare
0           0            1        1  0.271174    0.2  0.000000  0.014151
1           1            1        0  0.472229    0.2  0.000000  0.139136
2           1            1        0  0.321438    0.0  0.000000  0.015469
3           1            1        0  0.434531    0.2  0.000000  0.103644
4           0            1        1  0.434531    0.0  0.000000  0.015713
..        ...          ...      ...       ...    ...       ...       ...
707         0            1        0  0.484795    0.0  0.833333  0.056848
708         0            1        1  0.334004    0.0  0.000000  0.025374
709         1            1        0  0.233476    0.0  0.000000  0.058556
710         1            1        1  0.321438    0.0  0.000000  0.058556
711         0            1        1  0.396833    0.0  0.000000  0.015127

[712 rows x 7 columns]



In [6]:
target = df.Survived

In [7]:
df.drop('Survived',axis=1,inplace=True)

In [8]:
def stdnorm(xi, beta):
    val = math.e**((-(xi@beta)**2)/2)/math.sqrt(2*math.pi)
    return val

In [9]:
def grad(xi, yi, beta):
    k = stdnorm(xi, beta)
    val = (xi@beta)*(xi)*(-yi+(1-yi)*(k/(1-k)))
    return val

In [10]:
p=6
beta = np.ones(p).astype('float64')
t = 0.001
iter = 200
for j in range(iter):
    grad_sum = np.zeros(p)
    for i in range(len(df.index)):
        xi = df.loc[i].values
        yi = target[i]
        grad_sum += grad(xi, yi, beta)
    old_beta = beta
    beta += t*grad_sum
    print(f"After iteration {j+1} :")
    print(f"Updated beta is : \n {beta}")

After iteration 1 :
Updated beta is : 
 [0.46282008 0.77688869 0.80524578 0.94022723 0.94817818 0.9375493 ]
After iteration 2 :
Updated beta is : 
 [0.17971937 0.67958636 0.70222033 0.90425495 0.91586224 0.89760594]
After iteration 3 :
Updated beta is : 
 [0.03263167 0.65402947 0.64943404 0.88178676 0.89434399 0.8698688 ]
After iteration 4 :
Updated beta is : 
 [-0.05343059  0.65771031  0.6194127   0.86601777  0.87826104  0.84816921]
After iteration 5 :
Updated beta is : 
 [-0.10908972  0.67367652  0.60074781  0.85383932  0.86516664  0.82977793]
After iteration 6 :
Updated beta is : 
 [-0.14755101  0.6952456   0.5884968   0.84377414  0.85387765  0.81339964]
After iteration 7 :
Updated beta is : 
 [-0.17542534  0.71939435  0.58018503  0.8350328   0.84375089  0.79833475]
After iteration 8 :
Updated beta is : 
 [-0.19644243  0.74456109  0.57441776  0.82715427  0.83440577  0.78416863]
After iteration 9 :
Updated beta is : 
 [-0.21288284  0.76985256  0.57034279  0.81985401  0.82560513  0.77

After iteration 73 :
Updated beta is : 
 [-0.33885364  1.17753241  0.61043855  0.54780623  0.47910107  0.19203975]
After iteration 74 :
Updated beta is : 
 [-0.33803451  1.17751463  0.61142593  0.545129    0.47525053  0.18523022]
After iteration 75 :
Updated beta is : 
 [-0.33720488  1.17747029  0.61240029  0.54248407  0.47142881  0.17846454]
After iteration 76 :
Updated beta is : 
 [-0.33636562  1.17740118  0.61336154  0.53987101  0.46763552  0.17174225]
After iteration 77 :
Updated beta is : 
 [-0.33551755  1.17730901  0.6143096   0.53728942  0.46387029  0.1650629 ]
After iteration 78 :
Updated beta is : 
 [-0.33466144  1.17719536  0.61524441  0.53473889  0.46013275  0.15842605]
After iteration 79 :
Updated beta is : 
 [-0.33379803  1.17706171  0.61616593  0.53221905  0.45642254  0.15183128]
After iteration 80 :
Updated beta is : 
 [-0.33292798  1.17690947  0.61707413  0.5297295   0.45273931  0.14527818]
After iteration 81 :
Updated beta is : 
 [-0.33205194  1.17673994  0.617969    0

After iteration 144 :
Updated beta is : 
 [-0.27718316  1.15616674  0.65120583  0.41993869  0.261645   -0.20084151]
After iteration 145 :
Updated beta is : 
 [-0.27639468  1.15583739  0.65144641  0.41883126  0.25922069 -0.20526624]
After iteration 146 :
Updated beta is : 
 [-0.2756095   1.15550945  0.65168048  0.41773842  0.25681037 -0.20966483]
After iteration 147 :
Updated beta is : 
 [-0.27482762  1.15518294  0.65190813  0.41666005  0.25441393 -0.21403742]
After iteration 148 :
Updated beta is : 
 [-0.27404904  1.15485785  0.65212945  0.41559598  0.25203125 -0.21838419]
After iteration 149 :
Updated beta is : 
 [-0.27327377  1.15453419  0.65234453  0.41454606  0.2496622  -0.22270529]
After iteration 150 :
Updated beta is : 
 [-0.2725018   1.15421198  0.65255348  0.41351016  0.24730669 -0.22700087]
After iteration 151 :
Updated beta is : 
 [-0.27173314  1.15389121  0.65275638  0.41248812  0.24496458 -0.2312711 ]
After iteration 152 :
Updated beta is : 
 [-0.27096777  1.1535719   0.65

In [11]:
print(f"MLE of beta is: \n {beta}\n")

MLE of beta is: 
 [-0.23799673  1.13993185  0.65691815  0.37675694  0.14453475 -0.4124524 ]



In [12]:
X = df.to_numpy()

In [13]:
sum=0
for i in range(len(X)):
    p = stdnorm(X[1], beta)
    sum+=(p>0.5)==target[i]
print(f"Accuracy on test data is: {sum/len(X)} \n")

Accuracy on test data is: 0.5955056179775281 



In [14]:
Jack = np.array([1, 1, 20, 0, 0, 7.5]).astype('float64')
Rose = np.array([1, 0, 19, 1, 1, 512]).astype('float64')
print(f"Given data of Jack: \n{Jack}\n")
print(f"Given data of Rose: \n{Rose}\n")

Given data of Jack: 
[ 1.   1.  20.   0.   0.   7.5]

Given data of Rose: 
[  1.   0.  19.   1.   1. 512.]



In [15]:
i = 2
for key in indexes:
    Jack[i] = (Jack[i] - mn[key]) / (mx[key] - mn[key])
    Rose[i] = (Rose[i] - mn[key]) / (mx[key] - mn[key])
    i+=1
print(f"Normalized data of Jack: \n{Jack}\n")
print(f"Normalized data of Rose: \n{Rose}\n")

Normalized data of Jack: 
[1.         1.         0.24604172 0.         0.         0.01463903]

Normalized data of Rose: 
[1.         0.         0.23347575 0.2        0.16666667 0.99935744]



In [16]:
print(f"Probability of survival of Jack is: {stdnorm(Jack, beta)}\n")
print(f"Probability of survival of Rose is: {stdnorm(Rose, beta)}\n")

Probability of survival of Jack is: 0.2280661248269023

Probability of survival of Rose is: 0.36865661591460586

