In [None]:
from sklearn.datasets import load_wine
wine = load_wine()
X = wine.data
y = wine.target
#cfar10 seafar10


In [None]:
#MODEL 1
# Architektura trzywarstwowa:
# - Warstwa wejściowa - 13 cech
# - Warstwa ukryta: 64 neurony, ReLU
# - Warstwa wyjściowa: 3 klasy, softmax
# Uczenie:
# - Gradient descent, cross-entropy loss, 1000 epok, learning rate 0.01
# - Wagi aktualizowane backpropagation
# Dane:
# - Standaryzowane (StandardScaler), etykiety kodowane jako one-hot
# Ewaluacja poprzez accuracy

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle

wine = load_wine()
X = wine.data
y = wine.target #Etykiety

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) #Podział danych

#Standaryzacja inputowanych cech
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
y_train = pd.get_dummies(y_train).values #One-hot encoding dla etykiet
y_test = pd.get_dummies(y_test).values

#Kodowanie parametrów
input_size = X_train.shape[1]
hidden_size = 64
output_size = y_train.shape[1]
learning_rate = 0.01
epochs = 1000

#Losowe wartości dla biasów i inicjalizacja wag
np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size) * 0.01
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size) * 0.01
b2 = np.zeros((1, output_size))

#Funkcje aktywacji i ich pochodne
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

#Funkcja softmax do normalizacji wyników wyjściowych
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

#Funkcja kosztu - entropia krzyżowa
def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    loss = -np.sum(y_true * np.log(y_pred)) / m
    return loss

#Pochodna funkcji kosztu
def cross_entropy_loss_derivative(y_true, y_pred):
    m = y_true.shape[0]
    return (y_pred - y_true) / m

#Propagacja w przód
def forward_prop(X):
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

#Propagacja wsteczna - obliczenie gradientów
def backward_prop(X, y, Z1, A1, Z2, A2):
    m = y.shape[0]
    dZ2 = cross_entropy_loss_derivative(y, A2)
    dW2 = np.dot(A1.T, dZ2)
    db2 = np.sum(dZ2, axis=0, keepdims=True)
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = np.dot(X.T, dZ1)
    db1 = np.sum(dZ1, axis=0, keepdims=True)
    return dW1, db1, dW2, db2

#Trening modelu
for epoch in range(epochs):

    X_train, y_train = shuffle(X_train, y_train)
    Z1, A1, Z2, A2 = forward_prop(X_train)
    loss = cross_entropy_loss(y_train, A2)
    dW1, db1, dW2, db2 = backward_prop(X_train, y_train, Z1, A1, Z2, A2)
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss}') #Wyswietlamy straty co 100 epok

#Ocena Modelu
Z1, A1, Z2, A2 = forward_prop(X_test)
predictions = np.argmax(A2, axis=1)
accuracy = np.mean(predictions == np.argmax(y_test, axis=1))
print(f'Accuracy: {accuracy}')
print(f"Shape of dW1: {dW1.shape}")
print(f"Shape of dW2: {dW2.shape}")
print(f"Shape of db1: {db1.shape}")
print(f"Shape of db2: {db2.shape}")

Epoch 0, Loss: 1.0990333421960812
Epoch 100, Loss: 1.0858826418522525
Epoch 200, Loss: 1.062233137278481
Epoch 300, Loss: 0.9913663178403466
Epoch 400, Loss: 0.8042442226049584
Epoch 500, Loss: 0.5363205693288589
Epoch 600, Loss: 0.33637801015270813
Epoch 700, Loss: 0.22207649424792386
Epoch 800, Loss: 0.1598383279270626
Epoch 900, Loss: 0.12361077441774491
Accuracy: 1.0
Shape of dW1: (13, 64)
Shape of dW2: (64, 3)
Shape of db1: (1, 64)
Shape of db2: (1, 3)


# Porównajmy to z gotowym modelem z biblioteki.

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, GlobalMaxPooling1D, Dropout, Flatten
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

data = load_wine()
X, y = data.data, data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)

model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(GlobalMaxPooling1D())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 76ms/step - accuracy: 0.3948 - loss: 1.1423 - val_accuracy: 0.2500 - val_loss: 1.1130
Epoch 2/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.2795 - loss: 1.1448 - val_accuracy: 0.3889 - val_loss: 1.0544
Epoch 3/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.4081 - loss: 1.1020 - val_accuracy: 0.4444 - val_loss: 1.0215
Epoch 4/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.4077 - loss: 1.0884 - val_accuracy: 0.4722 - val_loss: 0.9956
Epoch 5/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.4063 - loss: 1.0747 - val_accuracy: 0.5278 - val_loss: 0.9773
Epoch 6/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.4998 - loss: 1.0220 - val_accuracy: 0.5556 - val_loss: 0.9644
Epoch 7/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7923eaec2b50>

In [None]:
#MODEL 2
# Architektura trzywarstwowa z pamięcią rekurencyjną:
# - Warstwa wejściowa: 13 cech
# - Warstwa ukryta: 64 neurony, sigmoid, rekurencyjne połączenie (RNN)
# - Warstwa wyjściowa: 3 klasy, softmax
# Uczenie:
# - Gradient descent, cross-entropy loss, 1000 epok, learning rate 0.01
# - Wagi aktualizowane backpropagation przez czas (BPTT)
# Dane:
# - Standaryzowane (StandardScaler), etykiety kodowane jako one-hot
# Ewaluacja poprzez accuracy

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from sklearn.datasets import load_wine


wine = load_wine()
X = wine.data
y = wine.target #Etykiety

#Podział danych
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Standaryzacja danych
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#Zamiana etykiet na format one-hot
y_train = pd.get_dummies(y_train).values
y_test = pd.get_dummies(y_test).values

#Parametry sieci
input_size = X_train.shape[1]
hidden_size = 64
output_size = y_train.shape[1]
learning_rate = 0.01
epochs = 1000

#Losowe wartości dla wag i inicjalizacja biasów
np.random.seed(42)
Wxh = np.random.randn(input_size, hidden_size) * 0.01
Whh = np.random.randn(hidden_size, hidden_size) * 0.01
Why = np.random.randn(hidden_size, output_size) * 0.01
bh = np.zeros((1, hidden_size))
by = np.zeros((1, output_size))

#Funkcje aktywacji i ich pochodne
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    loss = -np.sum(y_true * np.log(y_pred)) / m
    return loss

def cross_entropy_loss_derivative(y_true, y_pred):
    m = y_true.shape[0]
    return (y_pred - y_true) / m

#Propagacja w przód
def forward_prop(X):
    T = X.shape[0]
    h = np.zeros((T, hidden_size))
    y = np.zeros((T, output_size))
    for t in range(T):
        h[t] = sigmoid(np.dot(X[t], Wxh) + np.dot(h[t-1], Whh) + bh)
        y[t] = softmax(np.dot(h[t], Why) + by)
    return h, y

#Propagacja wstecz
def backward_prop(X, y_true, h, y_pred):
    T = X.shape[0]
    dWxh = np.zeros_like(Wxh)
    dWhh = np.zeros_like(Whh)
    dWhy = np.zeros_like(Why)
    dbh = np.zeros_like(bh)  # dB1 (1, 64)
    dby = np.zeros_like(by)  # dB2 (1, 3)
    dh_next = np.zeros_like(h[0])

    for t in reversed(range(T)):
        dy = cross_entropy_loss_derivative(y_true[t], y_pred[t])
        dWhy += np.dot(h[t].reshape(-1, 1), dy.reshape(1, -1))
        dby += dy
        dh = np.dot(dy, Why.T) + dh_next
        dh_raw = dh * sigmoid_derivative(h[t])
        dWxh += np.outer(X[t], dh_raw)
        dWhh += np.outer(h[t-1], dh_raw)
        dbh += np.sum(dh_raw, axis=0, keepdims=True)
        dh_next = np.dot(dh_raw, Whh.T)

    return dWxh, dWhh, dWhy, dbh, dby



#Trening sieci
for epoch in range(epochs):
    X_train, y_train = shuffle(X_train, y_train)
    h, y_pred = forward_prop(X_train)
    loss = cross_entropy_loss(y_train, y_pred)
    dWxh, dWhh, dWhy, dbh, dby = backward_prop(X_train, y_train, h, y_pred)

    Wxh -= learning_rate * dWxh
    Whh -= learning_rate * dWhh
    Why -= learning_rate * dWhy
    bh -= learning_rate * dbh
    by -= learning_rate * dby

    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss}') #Wyswietlamy straty co 100 epok

#Ocena Modelu 2
h_test, y_pred_test = forward_prop(X_test)
predictions = np.argmax(y_pred_test, axis=1)
accuracy = np.mean(predictions == np.argmax(y_test, axis=1))
print(f'Accuracy: {accuracy}')
print(f"Shape of dWxh: {dWxh.shape}")
print(f"Shape of dWhh: {dWhh.shape}")
print(f"Shape of dWhy: {dWhy.shape}")
print(f"Shape of dbh: {dbh.shape}")
print(f"Shape of dby: {dby.shape}")


Epoch 0, Loss: 1.0978546456696723
Epoch 100, Loss: 0.07254076990086356
Epoch 200, Loss: 0.03038129298114878
Epoch 300, Loss: 0.018968754743870435
Epoch 400, Loss: 0.013830673502336714
Epoch 500, Loss: 0.010754300534812399
Epoch 600, Loss: 0.008345419254853166
Epoch 700, Loss: 0.00977247841329793
Epoch 800, Loss: 0.006083849475685241
Epoch 900, Loss: 0.0052779105390970684
Accuracy: 1.0
Shape of dWxh: (13, 64)
Shape of dWhh: (64, 64)
Shape of dWhy: (64, 3)
Shape of dbh: (1, 64)
Shape of dby: (1, 3)


In [None]:
# MODEL 3
# Architektura trzywarstwowa z rekurencyjnym RNN i Dropout:
# - Warstwa wejściowa: 13 cech, tensor o wymiarach (1, 13)
# - Warstwa ukryta 1: 64 neurony, ReLU, rekurencyjne połączenie (SimpleRNN)
# - Warstwa ukryta 2: 64 neurony, ReLU
# - Dropout: 50%
# - Warstwa wyjściowa: 3 klasy, softmax
# Uczenie:
# - Adam, sparse categorical crossentropy
# - 20 epok, batch size 16
# Dane:
# - Standaryzowane (StandardScaler)
# Ewaluacja poprzez accuracy

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Dropout
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

data = load_wine()
X, y = data.data, data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = np.expand_dims(X_train, axis=1)
X_test = np.expand_dims(X_test, axis=1)

model = Sequential()
model.add(SimpleRNN(units=64, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(3, activation='softmax'))

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test))


Epoch 1/20


  super().__init__(**kwargs)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 52ms/step - accuracy: 0.4422 - loss: 1.0707 - val_accuracy: 0.7500 - val_loss: 0.8628
Epoch 2/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6676 - loss: 0.8358 - val_accuracy: 0.9722 - val_loss: 0.6645
Epoch 3/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8359 - loss: 0.6136 - val_accuracy: 0.9722 - val_loss: 0.5070
Epoch 4/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8649 - loss: 0.5058 - val_accuracy: 0.9722 - val_loss: 0.3772
Epoch 5/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9105 - loss: 0.4031 - val_accuracy: 1.0000 - val_loss: 0.2777
Epoch 6/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9105 - loss: 0.3121 - val_accuracy: 1.0000 - val_loss: 0.2036
Epoch 7/20
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

<keras.src.callbacks.history.History at 0x7923e63605d0>

In [None]:
# MODEL 4
# Architektura sieci w pełni połączonej (MLP) z trzema warstwami ukrytymi:
# - Warstwa wejściowa: 13 cech
# - Warstwa ukryta 1: 128 neuronów, sigmoid
# - Warstwa ukryta 2: 64 neurony, sigmoid
# - Warstwa ukryta 3: 32 neurony, sigmoid
# - Warstwa wyjściowa: 3 klasy, sigmoid
# Uczenie:
# - MSE jako funkcja kosztu
# - Gradient descent, learning rate 0.01
# - 1000 epok
# Dane:
# - Standaryzowane (StandardScaler), etykiety kodowane jako one-hot
# Ewaluacja poprzez accuracy


import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelBinarizer
from sklearn.datasets import load_wine

data = load_wine()
X = data.data
y = data.target.reshape(-1, 1)

encoder = LabelBinarizer()
y = encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

input_size = X_train.shape[1]
hidden_size1 = 128
hidden_size2 = 64
hidden_size3 = 32
output_size = y_train.shape[1]
learning_rate = 0.01
epochs = 1000

np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size1)
b1 = np.zeros((1, hidden_size1))
W2 = np.random.randn(hidden_size1, hidden_size2)
b2 = np.zeros((1, hidden_size2))
W3 = np.random.randn(hidden_size2, hidden_size3)
b3 = np.zeros((1, hidden_size3))
W4 = np.random.randn(hidden_size3, output_size)
b4 = np.zeros((1, output_size))

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def mse_loss(y_true, y_pred):
    return ((y_true - y_pred) ** 2).mean()

for epoch in range(epochs):
    Z1 = np.dot(X_train, W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)
    Z3 = np.dot(A2, W3) + b3
    A3 = sigmoid(Z3)
    Z4 = np.dot(A3, W4) + b4
    y_pred = sigmoid(Z4)

    loss = mse_loss(y_train, y_pred)

    dZ4 = y_pred - y_train
    dW4 = np.dot(A3.T, dZ4)
    db4 = np.sum(dZ4, axis=0, keepdims=True)

    dA3 = np.dot(dZ4, W4.T)
    dZ3 = dA3 * sigmoid_derivative(A3)
    dW3 = np.dot(A2.T, dZ3)
    db3 = np.sum(dZ3, axis=0, keepdims=True)

    dA2 = np.dot(dZ3, W3.T)
    dZ2 = dA2 * sigmoid_derivative(A2)
    dW2 = np.dot(A1.T, dZ2)
    db2 = np.sum(dZ2, axis=0, keepdims=True)

    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * sigmoid_derivative(A1)
    dW1 = np.dot(X_train.T, dZ1)
    db1 = np.sum(dZ1, axis=0, keepdims=True)

    W4 -= learning_rate * dW4
    b4 -= learning_rate * db4
    W3 -= learning_rate * dW3
    b3 -= learning_rate * db3
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1

    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss}')

Z1_test = np.dot(X_test, W1) + b1
A1_test = sigmoid(Z1_test)
Z2_test = np.dot(A1_test, W2) + b2
A2_test = sigmoid(Z2_test)
Z3_test = np.dot(A2_test, W3) + b3
A3_test = sigmoid(Z3_test)
Z4_test = np.dot(A3_test, W4) + b4
y_pred_test = sigmoid(Z4_test)

accuracy = np.mean(np.argmax(y_pred_test, axis=1) == np.argmax(y_test, axis=1))
print(f'Accuracy: {accuracy * 100:.2f}%')


Epoch 0, Loss: 0.34420283056183315
Epoch 100, Loss: 0.00013130261646672188
Epoch 200, Loss: 1.8673934510593826e-05
Epoch 300, Loss: 6.581888411918572e-06
Epoch 400, Loss: 3.12576916735045e-06
Epoch 500, Loss: 1.7702072893331888e-06
Epoch 600, Loss: 1.1237032933968243e-06
Epoch 700, Loss: 7.716132659666734e-07
Epoch 800, Loss: 5.61167450586117e-07
Epoch 900, Loss: 4.263300052691548e-07
Accuracy: 100.00%
