In [83]:
import numpy as np
import pandas as pd
from sklearn import datasets, metrics, model_selection

In [84]:
def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))


def sigmoid_derivative(f):
    return f * (1 - f)


def tanh(z):
    return np.tanh(z)


def tanh_derivative(f):
    return 1 - np.square(f)


def relu(z):
    return np.maximum(z, 0)


def relu_derivative(f):
    return int(f > 0)

ACTIVATIONS = {'sigmoid': sigmoid,
               'tanh': tanh,
               'relu': relu}
DERIVATIVES = {'sigmoid': sigmoid_derivative,
               'tanh': tanh_derivative,
               'relu': relu_derivative}

def mean_squared_error(y_pred, y_true):
    return 0.5 * (y_pred - y_true) ** 2


def mse_derivative(y_pred, y_true):
    return y_pred - y_true

In [85]:
class MultilayerPerceptron:

    def __init__(self, layers, activation_functions, learning_rate = 0.01, min_weight_dist = 1e-8, max_iter=1e4):
        self.layers_count = len(layers)
        self.activation_functions = activation_functions
        self.layers = layers
        self.min_weight_dist = min_weight_dist
        self.max_iter = max_iter
        self.weights = [2 * np.random.random((x + 1, y)) - 1 for x, y in zip(layers[:-1], layers[1:])]
        self.learning_rate = learning_rate

    def feedforward(self, x):
        activations = [np.hstack((np.ones((x.shape[0], 1)), x))] # добавляем столбец единиц для свободного коэффициента
        for i in range(self.layers_count - 2):
            z = np.dot(activations[i], self.weights[i])
            activation = ACTIVATIONS[self.activation_functions[i]](z)
            activations.append(np.hstack((np.ones((activation.shape[0], 1)), activation))) # # добавляем столбец единиц для свободного коэффициента
        activations.append(np.dot(activations[-1], self.weights[-1]))
        return activations

    def backpropagation(self, activations, y):
        error = activations[-1] - y
        weight_changes = [np.average(activations[-2][:, :, np.newaxis] * error[:, np.newaxis, :], axis=0)]
        for i in range(2, self.layers_count):
            error =  DERIVATIVES[self.activation_functions[-i]](activations[-i][:, 1:]) \
                * np.dot(error, self.weights[-i + 1].T[:, 1:])
            delta = activations[-i - 1][:, :, np.newaxis] * error[:, np.newaxis, :]
            weight_changes.append(np.average(delta, axis=0))
        weight_changes.reverse()
        return weight_changes

    def update_weights(self, weight_changes):
        for i in range(len(weight_changes) - 1):
            self.weights[i] += - self.learning_rate * weight_changes[i]
        self.weights[-1] += - self.learning_rate * weight_changes[-1]

    def stochastic_gradient_step(self, x, y):
        activations = self.feedforward(x)
        weight_changes = self.backpropagation(activations, y)
        self.update_weights(weight_changes)
        return weight_changes

    def fit(self, x, y):
        weight_dist = np.array((np.inf, np.inf))
        iter_num = 0
        batch_size = 5
        while weight_dist.any() > self.min_weight_dist and iter_num < self.max_iter:
            random_ind = np.random.randint(x.shape[0])
            w_dist = self.stochastic_gradient_step(x[random_ind:random_ind+batch_size], y[random_ind:random_ind+batch_size])
            weight_dist = []
            for dist in w_dist:
                weight_dist.append(np.linalg.norm(dist))
            weight_dist = np.array(weight_dist)
            iter_num += 1


    def predict(self, x):
        activations = self.feedforward(x)
        return activations[-1]

mp = MultilayerPerceptron([3, 3, 1], ['sigmoid', 'sigmoid', 'sigmoid'])

In [86]:
titanic_df = pd.read_csv("../data/titanic.csv")
titanic_df = titanic_df.fillna(titanic_df.mean())


In [87]:
titanic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1313 entries, 0 to 1312
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerID  1313 non-null   int64  
 1   Name         1313 non-null   object 
 2   PClass       1313 non-null   object 
 3   Age          1313 non-null   float64
 4   Sex          1313 non-null   object 
 5   Survived     1313 non-null   int64  
 6   SexCode      1313 non-null   int64  
dtypes: float64(1), int64(3), object(3)
memory usage: 71.9+ KB


In [88]:
class_mapping = {'1st': 1,
                 '2nd': 2,
                 '3rd': 3,
                 '*': 4 }

titanic_df['PClass'] = titanic_df['PClass'].map(class_mapping)
titanic_df['PClass'] = titanic_df['PClass'].astype('int64', copy=False)
titanic_df = titanic_df.drop(columns='Sex')

In [89]:
X = titanic_df.drop(columns=['Survived', 'Name', 'PassengerID'])
y = titanic_df['Survived']

In [90]:
X = np.array(X)
y = np.array(y).reshape(y.shape[0], 1)

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, train_size=0.75)

In [91]:
titanic_mp = MultilayerPerceptron([3, 5, 3, 1], ['sigmoid', 'sigmoid', 'sigmoid', 'sigmoid'], 0.1)

In [92]:
titanic_mp.fit(X_train, y_train)
k = titanic_mp.predict(X_test)

In [93]:
classes = []
for pred in k:
    if pred >= 0.5:
        classes.append(1)
    else:
        classes.append(0)

In [94]:
print(metrics.confusion_matrix(y_test, classes))
print(metrics.classification_report(y_test, classes))

[[211   4]
 [ 71  43]]
              precision    recall  f1-score   support

           0       0.75      0.98      0.85       215
           1       0.91      0.38      0.53       114

    accuracy                           0.77       329
   macro avg       0.83      0.68      0.69       329
weighted avg       0.81      0.77      0.74       329



In [95]:
iris = datasets.load_iris()
X_iris = iris.data
y_iris = iris.target
y_iris = y_iris.reshape(y_iris.shape[0], 1)


X_iris_train, X_iris_test, y_iris_train, y_iris_test = model_selection.train_test_split(X_iris, y_iris, train_size=0.8)

In [96]:
iris_mp = MultilayerPerceptron([4, 10, 10, 1], ['sigmoid', 'sigmoid', 'sigmoid', 'sigmoid'], 0.1)
iris_mp.fit(X_iris_train, y_iris_train)

In [97]:
iris_pred = iris_mp.predict(X_iris_test)
iris_classes = []
for pred in iris_pred:
    if pred < 0.5:
        iris_classes.append(0)
    elif pred <= 1.5:
        iris_classes.append(1)
    elif pred <= 2.5:
        iris_classes.append(2)

In [98]:
print(metrics.confusion_matrix(y_iris_test, iris_classes))
print(metrics.classification_report(y_iris_test, iris_classes))

[[ 7  0  0]
 [ 0  7  3]
 [ 0  0 13]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         7
           1       1.00      0.70      0.82        10
           2       0.81      1.00      0.90        13

    accuracy                           0.90        30
   macro avg       0.94      0.90      0.91        30
weighted avg       0.92      0.90      0.90        30

