In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

from sklearn.datasets import load_iris

In [71]:
iris = load_iris()

data, target = iris["data"], iris["target"].reshape(1, -1)
print(f"data의 type : {type(data)},  data의 shape : {data.shape}")
print(f"target의 type : {type(target)},  target의 shape : {target.shape}")

data의 type : <class 'numpy.ndarray'>,  data의 shape : (150, 4)
target의 type : <class 'numpy.ndarray'>,  target의 shape : (1, 150)


In [72]:
np.unique(target, return_counts=True)

(array([0, 1, 2]), array([50, 50, 50], dtype=int64))

In [73]:
print(np.max(data), np.min(data))

# Normalize data to keep our gradients manageable
data = data / 7.9
print(np.max(data), np.min(data))

7.9 0.1
1.0 0.012658227848101266


In [74]:
np.unique(target, return_counts=True)

(array([0, 1, 2]), array([50, 50, 50], dtype=int64))

In [78]:
# We'll shuffle the training set for a good measure
np.random.seed(2023)
shuffle_index = np.random.permutation(len(data))
# print(shuffle_index)
train_idx, test_idx = shuffle_index[:120], shuffle_index[120:]
print(test_idx)

[143  71  65 100  47  72  88 127 111  15  53 134 112 114  49  29  63 110
  28  58 131  39  77  22 101  52 116   3  25  87]


In [64]:
# To build a zero-classifier, map target value of 0 into 1 and map all other target values into 0
target_new = np.zeros(target.shape)
target_new[np.where(target == 0)] = 1
target = target_new

In [81]:
# We'll split data into train and test set. We also transpose data to keep each example as a column.
x_train, x_test = data[train_idx].T, data[test_idx].T
y_train, y_test = target[:, train_idx], target[:, test_idx]

print(f"x_train의 shape : {x_train.shape},   y_train의 shape : {y_train.shape}")
print(f"x_test의  shape : {x_test.shape},   y_test의 shape :  {y_test.shape}")

x_train의 shape : (4, 120),   y_train의 shape : (1, 120)
x_test의  shape : (4, 30),   y_test의 shape :  (1, 30)


In [69]:
np.unique(y_train, return_counts=True), np.unique(y_test, return_counts=True)

((array([0., 1.]), array([70, 50], dtype=int64)),
 (array([0.]), array([30], dtype=int64)))

In [27]:
def sigmoid(z):
    s = 1.0 / (1.0 + np.exp(-z))
    return s

In [28]:
def compute_loss(y, y_hat):
    m = y.shape[1]
    loss = -(1/m) * np.sum(np.multiply(y, np.log(y_hat)) + np.multiply((1-y), np.log(1-y_hat)))
    return loss

In [29]:
m = 60000
learning_rate = 1

X, Y = x_train, y_train
n_x, m = X.shape[0], X.shape[1]

W = np.random.randn(n_x, 1) * 0.01
b = np.zeros((1, 1))

Z = np.matmul(W.T, X) + b
A = sigmoid(Z)

cost = compute_loss(Y, A)

dW = (1/m) * np.matmul(X, (Y-A).T)
db = (1/m) * np.sum(Y-A)

W -= learning_rate * dW
b -= learning_rate * db

print(f"W의 shape : {W.shape}\nX의 shpae : {X.shape}\nb의 shape : {b.shape}\nZ의 shape : {Z.shape}\nA의 shape : {A.shape}\ncost의 value : {cost}")
print(f"dW의 shape : {dW.shape}\ndb의 shape : {db.shape}, Y의 shape {Y.shape}")

W의 shape : (4, 1)
X의 shpae : (4, 120)
b의 shape : (1, 1)
Z의 shape : (1, 120)
A의 shape : (1, 120)
cost의 value : 0.6913363486331121
dW의 shape : (4, 1)
db의 shape : (), Y의 shape (1, 120)


In [30]:
learning_rate = 1

X, Y = x_train, y_train
n_x, m = X.shape[0], X.shape[1]

W = np.random.randn(n_x, 1) * 0.01
b = np.zeros((1, 1))

for i in range(2001):
    Z = np.matmul(W.T, X) + b
    A = sigmoid(Z)
    
    cost = compute_loss(Y, A)
    
    dW = (1/m) * np.matmul(X, (A-Y).T)
    db = (1/m) * np.sum(A - Y)

    W -= learning_rate * dW
    b -= learning_rate * db
    
    if i % 100 == 0:
        print(f"Epoch {i:4d}의 cost : {cost}")
print()       
print(f"Final cost : {cost}")

Epoch    0의 cost : 0.6924733135364344
Epoch  100의 cost : 0.23817671404614882
Epoch  200의 cost : 0.14249143934333022
Epoch  300의 cost : 0.10221388345302652
Epoch  400의 cost : 0.08008600544527969
Epoch  500의 cost : 0.06607778253209391
Epoch  600의 cost : 0.05639576318346241
Epoch  700의 cost : 0.04929222555773965
Epoch  800의 cost : 0.04385042952917087
Epoch  900의 cost : 0.03954311624149281
Epoch 1000의 cost : 0.036045396089887674
Epoch 1100의 cost : 0.03314605860534258
Epoch 1200의 cost : 0.030701782019168813
Epoch 1300의 cost : 0.02861183862290324
Epoch 1400의 cost : 0.026803333988883727
Epoch 1500의 cost : 0.02522219258391912
Epoch 1600의 cost : 0.02382743765017366
Epoch 1700의 cost : 0.02258744063553549
Epoch 1800의 cost : 0.021477391373601936
Epoch 1900의 cost : 0.020477548933156977
Epoch 2000의 cost : 0.01957200557408419

Final cost : 0.01957200557408419


In [31]:
# To assess how well training performs, we'll see some accuracy measure
from sklearn.metrics import classification_report, confusion_matrix

Z = np.matmul(W.T, x_test) + b
A = sigmoid(Z)

predictions = (A > 0.5)[0, :]
labels = (y_test == 1)[0, :]

print(confusion_matrix(predictions, labels)) 

[[30]]


In [34]:
A > 0.5, y_test == 1

(array([[False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False,
         False, False, False]]),
 array([[False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False,
         False, False, False, False, False, False, False, False, False,
         False, False, False]]))

In [35]:
target

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.]])

In [38]:
np.unique(y_train, return_counts=True)

(array([0., 1.]), array([70, 50], dtype=int64))