## Ćwiczenie 3 - Algorytm propagacji wstecznej
Maciej Dutkowski 260396

#### Import i porzygotowanie danych

In [619]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

In [620]:
from sklearn.model_selection import train_test_split

In [621]:
heart_disease = fetch_ucirepo(id=45) 
  
x_train = heart_disease.data.features 
y_train = heart_disease.data.targets

In [622]:
categorical_features_labels = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'thal', 'ca']
numerical_features_labels = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']

In [623]:
indices_to_keep = x_train.dropna().index
x_train = x_train.loc[indices_to_keep].reset_index()
y_train = y_train.loc[indices_to_keep].reset_index()

In [624]:
y_train = y_train['num'].apply(lambda x: 0 if x == 0 else 1)

In [625]:
x_train = pd.get_dummies(x_train, columns = categorical_features_labels)
x_train = x_train.reset_index(drop=True)
boolean_columns = x_train.select_dtypes(include=bool).columns
x_train[boolean_columns] = x_train[boolean_columns].astype(int)

#Normalizacja
scaler = MinMaxScaler()
x_train[numerical_features_labels] = scaler.fit_transform(x_train[numerical_features_labels])

x_train = x_train.drop("index", axis=1)

In [626]:
display(x_train,y_train)

Unnamed: 0,age,trestbps,chol,thalach,oldpeak,sex_0,sex_1,cp_1,cp_2,cp_3,...,slope_1,slope_2,slope_3,thal_3.0,thal_6.0,thal_7.0,ca_0.0,ca_1.0,ca_2.0,ca_3.0
0,0.708333,0.481132,0.244292,0.603053,0.370968,0,1,1,0,0,...,0,0,1,0,1,0,1,0,0,0
1,0.791667,0.622642,0.365297,0.282443,0.241935,0,1,0,0,0,...,0,1,0,1,0,0,0,0,0,1
2,0.791667,0.245283,0.235160,0.442748,0.419355,0,1,0,0,0,...,0,1,0,0,0,1,0,0,1,0
3,0.166667,0.339623,0.283105,0.885496,0.564516,0,1,0,0,1,...,0,0,1,1,0,0,1,0,0,0
4,0.250000,0.339623,0.178082,0.770992,0.225806,1,0,0,1,0,...,1,0,0,1,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,0.583333,0.433962,0.262557,0.396947,0.032258,1,0,0,0,0,...,0,1,0,0,0,1,1,0,0,0
293,0.333333,0.150943,0.315068,0.465649,0.193548,0,1,1,0,0,...,0,1,0,0,0,1,1,0,0,0
294,0.812500,0.471698,0.152968,0.534351,0.548387,0,1,0,0,0,...,0,1,0,0,0,1,0,0,1,0
295,0.583333,0.339623,0.011416,0.335878,0.193548,0,1,0,0,0,...,0,1,0,0,0,1,0,1,0,0


0      0
1      1
2      1
3      0
4      0
      ..
292    1
293    1
294    1
295    1
296    1
Name: num, Length: 297, dtype: int64

In [627]:
X_train, X_test, Y_train, Y_test = train_test_split(x_train, y_train, test_size = 0.5, random_state = 42)

### Model wielowarswowej sieci

In [628]:
import numpy as np
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [629]:
def hidden_activation(z: np.ndarray):
    return np.maximum(0, z)

def hidden_activation_deriv(z: np.ndarray):
    return np.where(z > 0, 1, 0)

def output_activation(z: np.ndarray):
    return 1 / (1 + np.exp(-z))

def output_activation_deriv(z: np.ndarray):
    sig = hidden_activation(z)
    return sig * (1 - sig)

In [630]:
def lce_cost_deriv(y: float, y_pred: float):
    return np.array([-(y/y_pred)])

In [631]:
def calculate_loss(y, z, a, w, L):   
    loss = []
    loss.append((lce_cost_deriv(y, a[L-1]) * output_activation_deriv(z[L-1]))[0])
    for i in range(L-2, -1, -1):
        loss_i = (w[i+1].T @ loss[0]) * hidden_activation_deriv(z[i])
        loss.insert(0, loss_i)
    return loss


In [632]:
def update_weights(w, b, a, x, loss, alpha_rate: float, L):    
    for i in range(L-1, -1, -1):
        if(i > 0):
            w[i] = w[i] - alpha_rate * np.outer(loss[i], a[i-1])
        else:
            w[i] = w[i] - alpha_rate * np.outer(loss[i], x)
        b[i] = b[i] - alpha_rate * loss[i]


In [633]:
def one_sample_learn(x, y, w, b, L, alpha_rate):    
    z = []
    a = []
    for i in range(0, L):
        if(i == 0):
            z.append((w[i] @ x) + b[i])
        else:
            z.append((w[i] @ a[i-1]) + b[i])

        if(i < L-1):
            a.append(hidden_activation(z[i]))
        else:
            a.append(output_activation(z[i]))

    loss = calculate_loss(y=y, z=z, a=a, w=w, L=L)
    update_weights(w=w, b=b, a=a, x=x, loss=loss, alpha_rate=alpha_rate, L=L)

In [634]:

def generate_random_network_2(L, M):
    w = []
    b = []

    # He Initialization for the first hidden layer weights
    w.append(np.random.randn(M, 28) * np.sqrt(2. / 28))
    b.append(np.zeros(M))  # Biases initialized to zeros

    # He Initialization for subsequent hidden layers
    for i in range(L-2):
        w.append(np.random.randn(M, M) * np.sqrt(2. / M))
        b.append(np.zeros(M))  # Biases initialized to zeros

    # Xavier/Glorot Initialization for the output layer weights
    w.append(np.random.randn(1, M) * np.sqrt(2. / (M + 1)))
    b.append(np.zeros(1))  # Output layer bias initialized to zero

    return w, b

In [635]:
def generate_random_network(L, M):
    w = []
    b = []
    w.append(np.random.rand(M, 28))
    b.append(np.random.rand(M))
    for i in range(L-2):
        w.append(np.random.rand(M, M))
        b.append(np.random.rand(M))
    w.append(np.random.rand(1, M))
    b.append(np.random.rand(1))
    return w,b

In [636]:
def learn_network(x_train, y_train, L, M, alpha_rate, epochs):    
    w,b = generate_random_network_2(L,M)
    print(b)
    for _ in range(epochs):
        for i, x in x_train.iterrows():
            x = x.to_numpy()
            one_sample_learn(x=x, y=y_train[i], w=w, b=b, L=L, alpha_rate=alpha_rate)
    print(b)
    return w,b

In [637]:
def model(x_test, w, b, L):
    y_pred_test = []
    for _, x in x_test.iterrows():
        x = x.to_numpy()
        z = []
        a = []
        for i in range(0, L):
            if(i == 0):
                z.append((w[i] @ x) + b[i])
            else:
                z.append((w[i] @ a[i-1]) + b[i])

            if(i < L-1):
                a.append(hidden_activation(z[i]))
            else:
                a.append(output_activation(z[i]))
        y_pred_test.append(a[L-1][0])
    return y_pred_test

In [650]:
w,b = learn_network(x_train=X_train, y_train=Y_train, L=3, M=2, alpha_rate=0.01, epochs=50)

[array([0., 0.]), array([0., 0.]), array([0.])]
[array([0., 0.]), array([0., 0.]), array([0.])]


In [651]:
y_pred_test = model(x_test=X_test, w=w, b=b, L=2)
print(y_pred_test)

threshold = 0.5
y_pred_test_binary = np.where(np.array(y_pred_test) < threshold, 0, 1)
print(y_pred_test_binary)

accuracy = accuracy_score(Y_test, y_pred_test_binary)
precision = precision_score(Y_test, y_pred_test_binary)
recall = recall_score(Y_test, y_pred_test_binary)
f1 = f1_score(Y_test, y_pred_test_binary)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

[0.5, 0.5, 0.31082154449405164, 0.5, 0.5, 0.5, 0.4960651930654472, 0.5, 0.5, 0.5135319331345618, 0.5, 0.5, 0.47335875043836034, 0.48186631765625004, 0.4977369195718031, 0.48209699698265984, 0.5, 0.5, 0.5, 0.3500315388796514, 0.5, 0.5, 0.41656663396660587, 0.5057925518135705, 0.4071594837624859, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.3980891054772237, 0.5, 0.5, 0.27568873600850385, 0.5, 0.5, 0.5, 0.5, 0.5, 0.45266875292316583, 0.5, 0.5, 0.5, 0.46826052583563493, 0.534076691449525, 0.5, 0.4204675818568842, 0.5, 0.5, 0.5, 0.5, 0.5, 0.4683374391622676, 0.5, 0.4067937755978749, 0.48249601496808264, 0.3770145652657229, 0.5, 0.5, 0.4829641660727673, 0.5, 0.5, 0.5, 0.5027654345314234, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5053795619392336, 0.5, 0.32456465547386976, 0.5, 0.3788419872903152, 0.5, 0.5, 0.5028339471450409, 0.5, 0.40344464436802, 0.5, 0.5, 0.5, 0.5, 0.5, 0.4174760781427186, 0.5042705695488509, 0.5, 0.5, 0.46526011817447177, 0.5, 0.4743285262260287, 0.354763555