In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/titanic-survivore/train.csv
/kaggle/input/titanic-survivore/test.csv


In [2]:
def add_intercept(x):
    x["intercept"]=1
    return x

In [3]:
def load_data(path, label=["Survived"], in_attri = ["Pclass", "Sex", "Age", "Fare"], add_intercept=False):
    def addIntercept(x):
        global add_intercept
        return add_intercept(x)
    input = pd.read_csv(path, delimiter = ',', usecols = in_attri)
    output = None
    if label is not None:
        output = pd.read_csv(path, delimiter = ',', usecols = label)
    if input.ndim == 1:
        input = np.expand_dims(input, -1)
    if add_intercept:
        input = addIntercept(input)
    return input, output
    

In [4]:
input, output = load_data("/kaggle/input/titanic-survivore/train.csv", add_intercept=True)
input.loc[input["Sex"]=="male", "Sex"] = 1
input.loc[input["Sex"]=="female", "Sex"] = 0


In [5]:
def nanHandel(x):
    if not isinstance(x, pd.DataFrame):
        raise ValueError("The argument x must be a Pandas DataFrame.")
    for attribute in x.columns:
        nan = x[attribute].isna()
        for i in range(len(nan)):
            if nan[i]:
                try:
                    x.loc[i, attribute] = (x.loc[i-1, attribute]+x.loc[i+1, attribute])/2
                except Exception:
                    x.loc[i, attribute] = 0

In [6]:
nanHandel(input)
nanHandel(output)


In [7]:
def sigmoid(x):
    return np.exp(x) / (1 + np.exp(x))
def d_sigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))

In [8]:
class newton_Linear_model():
    def __init__(self, max_itr = 100, learning_rate = 0.1, eps = 1e-5, theta = None):
        self.max_itr = max_itr
        self.learning_rate = learning_rate
        self.eps = eps
        self.theta = theta
    def train(self, x, y):
        col_count = len(x.columns)
        x_np = x.to_numpy().astype(float)
        y_np = y.to_numpy().astype(float).reshape(x_np.shape[0])
        self.theta = np.zeros(col_count)
        m,n = x_np.shape
        for i in range(self.max_itr):
            y_prid = sigmoid(x_np@self.theta)
            gradient = (1/m)*x_np.T@(y_prid-y_np)
            diag = y_prid*(1-y_prid)
            H = (1/m) * (x_np.T * diag) @ x_np 
            tPre = self.theta.copy()
            self.theta -= np.linalg.inv(H)@gradient
            if np.linalg.norm(self.theta - tPre, ord = 1) < self.eps:
                print(f"Converged after {i} iteration")
                break
    def predict(self,x,path):
        x = x.to_numpy().astype(float)
        m,n = x.shape
        yPrid = sigmoid(x@self.theta)
        data = np.zeros((m,2),dtype = x.dtype)
        data[:,0]=np.arange(0,m,1)
        data[:,1]= yPrid
        np.savetxt(path, data, delimiter = ',', fmt='%.3f', header = "PassangerID,Survivore", comments='')
        

In [9]:
model_instance = newton_Linear_model()


In [10]:
model_instance.train(input,output)

In [11]:
px, py= load_data("/kaggle/input/titanic-survivore/test.csv", label = None, add_intercept = True)

In [12]:
py

In [13]:
px.loc[px["Sex"]=="male", "Sex"] = 1
px.loc[px["Sex"]=="female", "Sex"] = 0
nanHandel(px)
print(px)

     Pclass Sex   Age      Fare  intercept
0         3   1  34.5    7.8292          1
1         3   0  47.0    7.0000          1
2         2   1  62.0    9.6875          1
3         3   1  27.0    8.6625          1
4         3   0  22.0   12.2875          1
..      ...  ..   ...       ...        ...
413       3   1  33.5    8.0500          1
414       1   0  39.0  108.9000          1
415       3   1  38.5    7.2500          1
416       3   1   NaN    8.0500          1
417       3   1   0.0   22.3583          1

[418 rows x 5 columns]


In [14]:
model_instance.predict(px,"submit.csv")