In [12]:
import numpy as np # algebra liniowa
import matplotlib.pyplot as plt # używane do rysowania wykresów
from sklearn.datasets import load_digits # importowanie zbioru danych
from sklearn.model_selection import train_test_split # podział danych na część treningową i testową
from sklearn.preprocessing import MinMaxScaler # normalizacja danych
from sklearn.preprocessing import OneHotEncoder # kodowanie one-hot
import optuna # do hiperparametryzacji
import warnings # ignorowanie ostrzeżeń
warnings.filterwarnings("ignore")


In [13]:
image_size = (64, 64)

def load_train_data(input_dir, newSize=(64,64)):
    '''
    '''

    
    import numpy as np
    import pandas as pd
    import os
    from skimage.io import imread
    import cv2 as cv
    from pathlib import Path
    import random
    from shutil import copyfile, rmtree
    import json


    import seaborn as sns
    import matplotlib.pyplot as plt

    import matplotlib
    
    image_dir = Path(input_dir)
    categories_name = []
    for file in os.listdir(image_dir):
        d = os.path.join(image_dir, file)
        if os.path.isdir(d):
            categories_name.append(file)

    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]

    train_img = []
    categories_count=[]
    labels=[]
    for i, direc in enumerate(folders):
        count = 0
        for obj in direc.iterdir():
            if os.path.isfile(obj) and os.path.basename(os.path.normpath(obj)) != 'desktop.ini':
                labels.append(os.path.basename(os.path.normpath(direc)))
                count += 1
                img = imread(obj)#zwraca ndarry postaci xSize x ySize x colorDepth
                img = cv.resize(img, newSize, interpolation=cv.INTER_AREA)# zwraca ndarray
                img = img / 255#normalizacja
                train_img.append(img)
        categories_count.append(count)
    X={}
    X["values"] = np.array(train_img)
    X["categories_name"] = categories_name
    X["categories_count"] = categories_count
    X["labels"]=labels
    return X

In [14]:
class MyNeuralNetwork:
    # possible error cause - input size is not the same as image size 
    def __init__(self, input_size: int, hidden_size: int, output_size: int, loss_func='mse'):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.loss_func = loss_func

        # initialize weights and biases:
        self.input_hidden_layers_weights = np.random.randn(self.input_size, self.hidden_size)
        self.hidden_layer_bias = np.zeros((1, self.hidden_size))
        self.hidden_output_layers_weights = np.random.randn(self.hidden_size, self.output_size)
        self.output_layer_bias = np.zeros((1, self.output_size))

        # watch losses:
        self.train_loss = []
        self.test_loss = []

    def __str__(self):
        return f'input layer size: {self.input_size}, hidden layer size: {self.hidden_size}, output size: {self.output_size}. Loss function: {self.loss_func}'
    
    # data is passed through the network - so hidden layer and output layer activations and weigted sums are calculated
    def forward_propagation(self, x):
        # x - input data (features) - pixels in our case
        self.hidden_layer_weighted_sum = np.dot(x, self.input_hidden_layers_weights) + self.hidden_layer_bias
        self.hidden_layer_activations = self.sigmoid(self.hidden_layer_weighted_sum)
        self.output_layer_weighted_sum = np.dot(self.hidden_layer_activations, self.hidden_output_layers_weights) + self.output_layer_bias
        if self.loss_func == 'categorical_crossentropy':
            self.output_layer_activations = self.softmax(self.output_layer_weighted_sum)
        else:
            self.output_layer_activations = self.sigmoid(self.output_layer_weighted_sum)
        return self.output_layer_activations
    
    # learning is done in this method
    def backward_propagation(self, x, y, learning_rate):
        m = x.shape[0]

        # calculategradients
        if self.loss_func == 'mse':
            self.output_layer_gradient = self.output_layer_activations - y
        elif self.loss_func == 'log_loss':
            self.output_layer_gradient = -(y/self.output_layer_activations - (1-y)/(1-self.output_layer_activations))
        elif self.loss_func == 'categorical_crossentropy':
            self.output_layer_gradient = self.output_layer_activations - y
        else:
            raise ValueError('Not valid loss function! That ain\'t work')

        # calculate new weights              
        self.new_output_layer_weights = (1/m) * np.dot(self.hidden_layer_activations.T, self.output_layer_gradient)
        self.new_output_layer_bias =(1/m) * np.sum(self.output_layer_gradient, axis=0, keepdims=True)
        self.hidden_layer_gradient = np.dot(self.output_layer_gradient, self.hidden_output_layers_weights.T) * self.sigmoid_derivative(self.hidden_layer_activations)
        self.new_hidden_layer_weights = (1/m) * np.dot(x.T, self.hidden_layer_gradient)
        self.new_hidden_layer_bias = (1/m) * np.sum(self.hidden_layer_gradient, axis=0, keepdims=True)

        # update weights and biases, the algorithm don't take new weights as granted - so it's multiplied by learning_rate so it's 0.01(or less) * new_weights
        self.hidden_output_layers_weights -= learning_rate * self.new_output_layer_weights
        self.output_layer_bias -= learning_rate * self.new_output_layer_bias
        self.input_hidden_layers_weights -= learning_rate * self.new_hidden_layer_weights
        self.hidden_layer_bias -= learning_rate * self.new_hidden_layer_bias

    def sigmoid(self, x):
        return 1/(1 + np.exp(-x))
    
    def sigmoid_derivative(self, x):
        return x * (1-x)
    
    def softmax(self, x):
        exps = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exps/np.sum(exps, axis=1, keepdims=True)


In [15]:
class Trainer:
    def __init__(self, model, loss_func='mse'):
        self.model = model
        self.loss_func = loss_func
        self.train_loss = []
        self.test_loss = []

    def calculate_loss(self, y_true, y_pred):
        if self.loss_func == 'mse':
            return np.mean((y_pred - y_true)**2)
        elif self.loss_func == 'log_loss':
            return -np.mean(y_true*np.log(y_pred) + (1-y_true)*np.log(1-y_pred))
        elif self.loss_func == 'categorical_crossentropy':
            return -np.mean(y_true*np.log(y_pred))
        else:
            raise ValueError('Nieprawidłowa funkcja straty')

    def train(self, x_train, y_train, X_test, y_test, epochs, learning_rate):
        for _ in range(epochs):
            self.model.forward_propagation(x_train)
            self.model.backward_propagation(x_train, y_train, learning_rate)
            train_loss = self.calculate_loss(y_train, self.model.a2)
            self.train_loss.append(train_loss)
            
            self.model.forward_propagation(X_test)
            test_loss = self.calculate_loss(y_test, self.model.a2)
            self.test_loss.append(test_loss)

In [16]:
# load train data
train_data = load_train_data('./train_test_sw/train_sw')

In [17]:
# enhance data function
def enhance_train_data(data_to_augment, image_size=(64, 64)):
   augmented_images = []
   augmented_labels = []

   for image,label in zip(data_to_augment['values'], data_to_augment['labels']):
        augmented_images.append(image)
        augmented_labels.append(label)
        for _ in range(10):
           augmented = augment(image, image_size)
           augmented_images.append(augmented.numpy())
           augmented_labels.append(label)
   
   return augmented_images, augmented_labels


def augment(image, image_size=(64,64)):
    import tensorflow as tf
    delta = 0.09 # maximum relative change in brigtness
    
    image = tf.cast(image, tf.float64)
    image = tf.image.random_crop(image, size=[image_size[0], image_size[1], 3])
    image = tf.image.random_brightness(image, delta)
    image = tf.image.random_flip_up_down(image)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_contrast(image, 0.8, 1.2)

    return image

In [18]:
# enhance data
augmented_images, augmented_labels = enhance_train_data(train_data, image_size)

In [38]:
scaler = MinMaxScaler()
X = scaler.fit_transform(np.array(np.array(augmented_images), dtype=np.ndarray)) # <- possible error cause!!!

print(len(augmented_images))
print(len(augmented_labels))
# we don't extract any characteristis, only input is pixels
# so i think the cause lays in the input size
# either of pictures or something else

# Prooces data and change it from list to array:
y = np.array(augmented_labels)

# Apply one hot encoding to categories names (output data)
koder = OneHotEncoder()
y_jednokodowane = koder.fit_transform(y.reshape(-1, 1))

# Podziel zbiór danych na zestawy treningowy i testowy
X_train, X_test, y_train, y_test = train_test_split(X, y_jednokodowane, test_size=0.2, random_state=42)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 3 dimensions. The detected shape was (11297, 64, 64) + inhomogeneous part.

In [28]:
# Utwórz instancję klasy NeuralNetwork
rozmiar_wejscia = X.shape[0] ### <- possible error cause!!! maybe we take the wrong shape!
print(X)
print(rozmiar_wejscia)
print(X.shape[0])
rozmiar_warstwy_ukrytej = 64
rozmiar_wyjscia = len(np.unique(y))
funkcja_straty = 'categorical_crossentropy'
epoki = 1000
wspolczynnik_uczenia = 0.1

nn = MyNeuralNetwork(rozmiar_wejscia, rozmiar_warstwy_ukrytej, rozmiar_wyjscia, funkcja_straty)

# Wyświetl architekturę sieci neuronowej
print(nn)

[[0.         0.         0.12       0.        ]
 [0.04       0.04166667 0.12       0.        ]
 [0.16       0.16666667 0.24       0.        ]
 [0.16       0.25       0.36       0.        ]
 [0.12       0.08333333 0.12       0.        ]
 [0.12       0.04166667 0.         0.        ]
 [0.16       0.04166667 0.04       0.        ]
 [0.2        0.125      0.08       0.        ]
 [0.24       0.20833333 0.12       0.        ]
 [0.32       0.25       0.2        0.        ]
 [0.36       0.29166667 0.24       0.        ]
 [0.36       0.33333333 0.32       0.        ]
 [0.48       0.41666667 0.32       0.        ]
 [0.48       0.45833333 0.36       0.        ]
 [0.76       0.95833333 1.         0.        ]
 [0.56       0.45833333 0.4        0.        ]
 [0.84       0.75       0.52       0.        ]
 [1.         0.95833333 0.72       0.        ]
 [0.52       0.45833333 0.44       0.        ]
 [0.52       0.54166667 0.52       0.        ]
 [0.6        0.54166667 0.52       0.        ]
 [0.64       

In [29]:
trener = Trainer(nn, funkcja_straty)
trener.train(X_train, y_train, X_test, y_test, epoki, wspolczynnik_uczenia)

# Przekonwertuj y_test z kodowania one-hot na etykiety
etykiety_y_test = np.argmax(y_test, axis=1)

# Oceń wydajność sieci neuronowej
prognozy = np.argmax(nn.forward_propagation(X_test), axis=1)
dokladnosc = np.mean(prognozy == etykiety_y_test)
print(f"Dokładność: {dokladnosc:.2%}")

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 3 dimensions. The detected shape was (9037, 64, 64) + inhomogeneous part.