In [13]:
# hyperparameters tuning

# import pytorch
import torch 
import torch.nn as nn 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score
from sklearn.datasets import fetch_covtype

In [14]:
# nn architecture

class NeuralNetwork(nn.Module):
    def __init__(self, hidden_neurons):
        super().__init__()
        # 54 dimensions and 7 classes
        self.layer1 = nn.Linear(54, hidden_neurons)
        self.activation1 = nn.ReLU()
        self.layer2 = nn.Linear(hidden_neurons, 128)
        self.activation2 = nn.ReLU()
        self.layer3 = nn.Linear(128, 64)
        self.activation3 = nn.ReLU()
        self.final_layer = nn.Linear(64, 7)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.layer1(x)
        x = self.activation1(x)
        x = self.layer2(x)
        x = self.activation2(x)
        x = self.layer3(x)
        x = self.activation3(x)
        x = self.final_layer(x)
        x = self.softmax(x)

        return x

In [20]:
device =  torch.device("cpu") # koristimo gpu samo ako je dostupan, inace koristimo cpu
loss_function = nn.CrossEntropyLoss() # loss function

In [16]:
# precision and recall 
import numpy as np

# method that counts precision for model using sklearn library


def calculate_recall_precission(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    y_true = []
    y_pred = []

    with torch.no_grad():
        for input, label in test_loader:
            input = input.to(device) 
            label = label.to(device) # labels are one hot encoded
            outputs = model(input) # output has 7 classes 
           # _,predicted_class = torch.max(outputs.cpu(),1) # we take the class with highest probability
            label_class = torch.argmax(label.cpu())
            predicted_class = torch.argmax(outputs.cpu())
            y_true.append(label_class.numpy())
            y_pred.append(predicted_class.numpy())
            

    precision = precision_score(y_true, y_pred, average='macro', zero_division=1.0)
    recall = recall_score(y_true, y_pred, average='macro', zero_division=1.0)
    print(precision)
    print(recall)
    return precision, recall


In [17]:
# method that trains model
def train_model(model, train_loader, optimizer, epochs):
    
    model.train()
    epochs = 7

    for epoch in range(epochs):
        if epoch % 5 == 0:
            print("Epoch: ", epoch)
    
        for inputs, labels in train_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = model(inputs)
            #print(labels.shape)
            #label_class = torch.argmax(labels)
            label_class = torch.argmax(labels, dim=1)
            loss = loss_function(outputs, label_class)
            loss.backward()
            optimizer.step()
    

In [21]:
# parameter grid
from sklearn.model_selection import ParameterGrid, train_test_split
from sklearn.preprocessing import OneHotEncoder
from torch.utils.data import TensorDataset, DataLoader

params_grid = {
    'lr': [0.001, 0.01],
    'hidden_neurons': [128, 256],
    'batch_size': [32, 64],
}

parameter_grid = ParameterGrid(params_grid)

for params in parameter_grid:

    data = fetch_covtype()
    X = data['data'][:50000]
    y = data['target'][:50000]
    # split data into train and validation set

    x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    encoder = OneHotEncoder()

    y_train_encoded = encoder.fit_transform(y_train.reshape(-1, 1))
    y_val_encoded = encoder.fit_transform(y_val.reshape(-1, 1))

    # create tensors

    X_train_tensor = torch.tensor(x_train).float()
    X_val_tensor = torch.tensor(x_val).float()
    y_train_tensor = torch.tensor(y_train_encoded.toarray())
    y_val_tensor = torch.tensor(y_val_encoded.toarray())

    # define training and test dataset 

    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

    # define dataloader

    train_loader = DataLoader(train_dataset, batch_size=params['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=params['batch_size'], shuffle=True)

    # create model with params
    model = NeuralNetwork(params['hidden_neurons'])


    # add learning rate to optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=params['lr'])

    # train model

    model.train()
    epochs = 10

    for epoch in range(epochs):
        if epoch % 5 == 0:
            print("Epoch: ", epoch)
        
        for input, label in train_loader:
            input = input.to(device)
            label = label.to(device)
            output = model(input)

            loss = loss_function(output, torch.argmax(label, dim=1))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # calculate precision and recall
    print(calculate_recall_precission(model, val_loader))
    print(params)

    


Epoch:  0
Epoch:  5
0.9383842994066637
0.14285714285714285
(0.9383842994066637, 0.14285714285714285)
{'batch_size': 32, 'hidden_neurons': 128, 'lr': 0.001}
Epoch:  0


KeyboardInterrupt: 