In [None]:
import torch as T
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.preprocessing import StandardScaler

In [None]:
# Definicija neuronske mreze
class NeuralNet(nn.Module):
    def __init__(self, layers=[], optim=None, loss_fn=None, device='cpu'):
        super(NeuralNet, self).__init__()

        # Prebacimo otpakovanu listu slojeva u nn.Sequential
        self.layers: nn.Sequential = nn.Sequential(*layers)

        # Definisimo funkciju gubitka za nas model
        self.loss_fn = loss_fn

        # Definisimo optimizator funkcije gubitka
        self.optim = optim

        # Definisimo uredjaj na kojem treniramo model
        self.device = device
        self.to(device)

    def forward(self, x):
        # Proslijedimo ulaz kroz sve slojeve
        return self.layers(x)

    def train_model(self, x, y, epochs=1000, early_stopping=True, early_stopping_epochs=3,  verbose=False):
        self.train()  # Treniranje modela

        epoch_loss_increase = 0
        previous_epoch_loss = 0
        minimum_loss = 0
        losses = np.array([])

        for epoch in range(epochs):
            x = x.to(self.device)
            y = y.to(self.device)

            # Proslijedimo ulaz kroz sve slojeve
            y_pred = self.layers(x)

            # Izracunajmo gubitak i propagirajmo unazad
            loss = self.loss_fn(y_pred, y)
            losses = np.append(losses, np.array([loss.item()]))

            self.optim.zero_grad()
            loss.backward()
            self.optim.step()

            if verbose:
                print(f'[TRAIN] Epoch {epoch} \ Loss: {loss.item()}')

            # Provjerimo da li je greska porasla iznad ES kriterijuma
            if early_stopping:
                if loss.item() > previous_epoch_loss:
                    epoch_loss_increase += 1

                    if epoch_loss_increase == early_stopping_epochs:
                        print(f'Early stopping [E: {epoch}] ...')
                        break
                elif loss.item() < previous_epoch_loss:
                    minimum_loss = loss.item()
                    # if verbose:
                    #     print(
                    #         f'---! NEW BEST EPOCH: !---\nCurrent: {minimum_loss}\nPrevious: {previous_epoch_loss}\n---! NEW BEST EPOCH: !---\n')
                    epoch_loss_increase = 0
                else:
                    epoch_loss_increase = 0

            # Sacuvajmo trenutnu vrijednost gubitka za sledeci pass
            previous_epoch_loss = loss.item()

        return losses[-1]

    def test_model(self, test_loader, verbose=True):
        self.eval()  # Testiranje modela

        total_loss = 0
        acc_scores = np.array([])
        rec_scores = np.array([])
        prec_scores = np.array([])

        batch = 0
        with T.no_grad():
            for x, y in test_loader:
                if verbose:
                    print(f'Testing batch [{batch}/{len(test_loader) * test_loader.batch_size}]')
                    batch += test_loader.batch_size

                x = x.to(self.device)
                y = y.to(self.device)

                # Proslijedimo ulaz kroz sve slojeve
                y_pred = self.layers(x)
                predicted_labels = T.argmax(y_pred, dim=1).cpu().numpy()

                # Sacuvajmo pretpostavke modela
                acc_scores = np.append(acc_scores, accuracy_score(
                    y.cpu().numpy(), predicted_labels))
                rec_scores = np.append(rec_scores, recall_score(
                    y.cpu().numpy(), predicted_labels, average='weighted', zero_division=0))
                prec_scores = np.append(prec_scores, precision_score(
                    y.cpu().numpy(), predicted_labels, average='weighted', zero_division=0))

                loss = self.loss_fn(y_pred, y)
                total_loss += loss.item()

                if verbose:
                    print(
                        f'[TEST] current loss: {loss.item()}, total: {total_loss}')

        print(f'Average loss: {total_loss/len(test_loader.dataset)}')
        print(f'Accuracy: {np.mean(acc_scores.astype(float))}')
        print(f'Recall: {np.mean(rec_scores.astype(float))}')
        print(f'Precision: {np.mean(prec_scores.astype(float))}')

        # Radi specifikacije projektnog, za dobijanje 'najpreciznijeg' modela
        return np.mean(acc_scores)

    def predict(self, x):
        # upitno dal treba al aj
        self.eval()  # Testiranje modela

        # Proslijedimo ulaz kroz sve slojeve
        return self.layers(x)


In [None]:
# Ucitajmo podatke dataseta
X, y = fetch_covtype(return_X_y=True)

# reskaliranje i standardizacija
y = y.reshape((-1, 1))
X = StandardScaler().fit_transform(X)
# y = StandardScaler().fit_transform(y)

# Krosentropijska greska zahtijeva kategorizaciju 0:N-1, a ne 1:N
y = y - 1
y = y.reshape(-1)

# Ucitajmo trening i testni skup (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

# Na osnovu trening skupa, ucitajmo i validacioni skup (25% trening skupa)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, shuffle=True)

# nemam ja tolko dobar komp:)
# X_train = X_train[:15000]
# y_train = y_train[:15000]

# Konvertovanje u torch tenzore (feature == int, ali ocekuje se T.long na izlazu)
X_train = T.tensor(X_train, dtype=T.float32)
X_test = T.tensor(X_test, dtype=T.float32)
X_val = T.tensor(X_val, dtype=T.float32)
y_train = T.tensor(y_train, dtype=T.long)
y_test = T.tensor(y_test, dtype=T.long)
y_val = T.tensor(y_val, dtype=T.long)

# nije koristeno nigdje, za sad
train = TensorDataset(X_train, y_train)
test = TensorDataset(X_test, y_test)
val = TensorDataset(X_val, y_val)

In [None]:
# print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

In [None]:
# grid search, random search, bayesian, pbt, gbo, automl
# --> grid search

# Definisimo grid/prostor hiperparametara
#                   v      v       v
hp_grid = {
    'lr':          [1e-4,  1e-3,         1e-4],
    'weight_decay':[0,     0,               0],
    'batch_size':  [1024,   512,           512],
    'num_epochs':  [100,    200,           300],
    'es_epochs':   [2+1,   3+1,           3+1],
}
num_varied_hyperparams = 3 # radi prikupljanja hiperparametara

device = T.device("cuda:0") if T.cuda.is_available() else T.device("cpu")

# util funkcija za instanciranje modela po hiperparametrima
# returns: istrenirani_model
def instantiate_model(hyperparams, layers, loss_fn) -> NeuralNet:
    # Adam -> brza konvergencija / ucenje, losa generalizacija na validacionom
    # SGD/Nesterov -> spora konvergencija / ucenje, bolja generalizacija na validacionom
    lr = hyperparams['lr']
    weight_decay = hyperparams['weight_decay']

    net = None
    # Instancirajmo mrezu
    net = NeuralNet(layers=layers, optim=None, loss_fn=loss_fn, device=device)
    # optim = T.optim.SGD(net.parameters(), lr=lr,
    #                     weight_decay=weight_decay, momentum=0.75, nesterov=True)
    optim = T.optim.Adam(net.parameters(), lr=lr, weight_decay=weight_decay, amsgrad=True)
    net.optim = optim

    return net

# util funkcija za treniranje modela po datim hiperparametrima
def train_model_hyperparams(train_loader, model, epochs, use_es=True, es_epochs=3) -> NeuralNet:
    batch = 0
    num_batches = len(train_loader) * train_loader.batch_size
    i = 0
    for x, y in train_loader:
        # samo da ne ispisuje previse na konzoli
        if i % 10 == 0:
            print(f'--- batch: [{batch}/{num_batches}] ---')

        model.train_model(x, y,
                          epochs,
                          early_stopping=use_es,
                          early_stopping_epochs=es_epochs, verbose=False)
        batch += len(x)
        i += 1

    return model

In [None]:
# Definisimo baznu arhitekturu i funkciju gubitka (za viseklasnu klasif.)
architectures = [
    [
      nn.Linear(54, 512),
      nn.GELU(),
      nn.Dropout(0.15),

      nn.Linear(512, 108),
      nn.ReLU(),
      nn.Dropout(0.2),

      nn.Linear(108, 27),
      nn.ReLU(),
      nn.Linear(27, 7),
      nn.LogSoftmax(dim=1),
    ],
   [
      nn.Linear(54, 128),
      nn.ReLU(),
      nn.Dropout(0.2),

      nn.Linear(128, 128),
      nn.ReLU(),
      nn.Dropout(0.1),

      nn.Linear(128, 7),
      nn.LogSoftmax(dim=1)
    ]
]

loss_fn = nn.CrossEntropyLoss(reduction='mean')
# loss_fn = nn.MultiLabelSoftMarginLoss()

# tuple: (model, accuracy)
models = []

for layers in architectures:
  for i in range(num_varied_hyperparams):
      model = None

      # Podesimo hiperparametre
      hyperparams = {k: v[i] for k, v in hp_grid.items()}
      batch_size = hyperparams['batch_size']
      num_epochs = hyperparams['num_epochs']
      es_epochs = hyperparams['es_epochs']

      print(f'Loaded hyperparameters: {hyperparams}')

      # kreirajmo ucitavac batch-a
      train_loader = DataLoader(train, batch_size=batch_size)
      test_loader = DataLoader(test, batch_size=batch_size)
      val_loader = DataLoader(val, batch_size=batch_size)

      # 1. Istancirajmo model po datim parametrima
      model = instantiate_model(hyperparams, layers, loss_fn)

      # 2. Testirajmo model prije treniranja
      print("--- Untrained model ---")
      model.test_model(test_loader, verbose=False)

      # 3. Trenirajmo model
      print(f'Training model...')
      model = train_model_hyperparams(train_loader, model, num_epochs,
                                      use_es=False, es_epochs=0)

      # 4. Testirajmo model poslije treniranje
      print("--- Trained model ---")
      model.test_model(test_loader, verbose = False)

      # 5. Azurirajmo hiperparametre na validacionom skupu (sa ES)
      model = train_model_hyperparams(val_loader, model, num_epochs,
                                      use_es=True, es_epochs=es_epochs)

      # 6. Procijenimo preciznost na testnom skupu
      # batch_size i nije bitan, jer model ne uci trenutno
      print(f'--- Validated model for {hyperparams} ---')
      acc_score = model.test_model(val_loader, verbose=False)
      print(f'Accuracy: {acc_score}')

      models.append((model, acc_score, hyperparams))

Loaded hyperparameters: {'lr': 0.0001, 'weight_decay': 0, 'batch_size': 1024, 'num_epochs': 100, 'es_epochs': 3}
--- Untrained model ---
Average loss: 0.0376447033184754
Accuracy: 0.4862245089517008
Recall: 0.4862245089517008
Precision: 0.23666960525967573
Accuracy: 0.4862245089517008
Training model...
--- batch: [0/349184] ---
--- batch: [51200/349184] ---
--- batch: [102400/349184] ---
--- batch: [153600/349184] ---
--- batch: [204800/349184] ---
--- batch: [256000/349184] ---
--- batch: [307200/349184] ---
--- Trained model ---
Average loss: 0.0006550930828140999
Accuracy: 0.7259187685074766
Recall: 0.7259187685074766
Precision: 0.6965493440826748
Accuracy: 0.7259187685074766
--- batch: [0/116736] ---
Early stopping [E: 40] ...
Early stopping [E: 32] ...
Early stopping [E: 65] ...
Early stopping [E: 34] ...
Early stopping [E: 15] ...
Early stopping [E: 73] ...
Early stopping [E: 58] ...
Early stopping [E: 2] ...
Early stopping [E: 2] ...
Early stopping [E: 25] ...
Early stopping [E:

In [None]:
for model in models:
    print(f'--------------------------------')
    print(f'Hyperparameters: \t{model[2]}')
    print(f'Accuracy: \t\t\t{model[1]}')

# Notirajmo najprecizniji model:
best_entry = max(models, key=lambda x: x[1])
best_model, accuracy, hyperparams = best_entry

print(f'Best accuracy: {accuracy}')
print(f'Hyperparameters: {hyperparams}')
print(f'Layers: ')
for layer in best_model.layers:
    print(f'--- {layer}')

print(f'Optimizer: {best_model.optim}')
print(f'Loss function: {best_model.loss_fn}')

T.save(best_model, './best_model.pth')

--------------------------------
Hyperparameters: 	{'lr': 0.0001, 'weight_decay': 0, 'batch_size': 1024, 'num_epochs': 100, 'es_epochs': 3}
Accuracy: 			0.7409996403184961
--------------------------------
Hyperparameters: 	{'lr': 0.001, 'weight_decay': 0, 'batch_size': 512, 'num_epochs': 200, 'es_epochs': 4}
Accuracy: 			0.6112974061745785
--------------------------------
Hyperparameters: 	{'lr': 0.0001, 'weight_decay': 0, 'batch_size': 512, 'num_epochs': 300, 'es_epochs': 4}
Accuracy: 			0.6449811151946491
--------------------------------
Hyperparameters: 	{'lr': 0.0001, 'weight_decay': 0, 'batch_size': 1024, 'num_epochs': 100, 'es_epochs': 3}
Accuracy: 			0.6968380234294047
--------------------------------
Hyperparameters: 	{'lr': 0.001, 'weight_decay': 0, 'batch_size': 512, 'num_epochs': 200, 'es_epochs': 4}
Accuracy: 			0.6276358084003247
--------------------------------
Hyperparameters: 	{'lr': 0.0001, 'weight_decay': 0, 'batch_size': 512, 'num_epochs': 300, 'es_epochs': 4}
Accura