In [856]:
# train neural network model for covertype dataset
# import pytorch
import torch 
import torch.nn as nn 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score

In [857]:
class CovertypeClassifier(nn.Module):

    # define nn architecture
    def __init__(self):
        super().__init__()
        # 54 dimensions and 7 classes
        self.layer1 = nn.Linear(54, 64)
        self.activation1 = nn.ReLU()
        self.layer2 = nn.Linear(64, 128)
        self.activation2 = nn.ReLU()
        self.layer3 = nn.Linear(128, 64)
        self.activation3 = nn.ReLU()
        self.final_layer = nn.Linear(64, 7)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.layer1(x)
        x = self.activation1(x)
        x = self.layer2(x)
        x = self.activation2(x)
        x = self.layer3(x)
        x = self.activation3(x)
        x = self.final_layer(x)
        # x = self.softmax(x)

        return x

In [858]:
net = CovertypeClassifier()
net

CovertypeClassifier(
  (layer1): Linear(in_features=54, out_features=64, bias=True)
  (activation1): ReLU()
  (layer2): Linear(in_features=64, out_features=128, bias=True)
  (activation2): ReLU()
  (layer3): Linear(in_features=128, out_features=64, bias=True)
  (activation3): ReLU()
  (final_layer): Linear(in_features=64, out_features=7, bias=True)
  (softmax): Softmax(dim=1)
)

In [859]:
# fetch data 

from sklearn.datasets import fetch_covtype

# load dataset
covertype = fetch_covtype()

# separate features and labels
X = covertype.data
y = covertype.target

In [860]:
# split dataset into train and test sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, shuffle=True)


In [861]:
# convert data to pytorch tensors 
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F

X_train_tensor = torch.tensor(X_train).float()
X_test_tensor = torch.tensor(X_test).float()
y_train_tensor = torch.tensor(y_train).long() 
y_test_tensor = torch.tensor(y_test).long()

y_train_tensor.shape


torch.Size([464809])

In [862]:
# define training and test dataset 

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# define dataloader

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [863]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") # koristimo gpu samo ako je dostupan, inace koristimo cpu
print(device)
net = CovertypeClassifier().to(device) # we set defice for our model

cpu


In [864]:
import numpy as np

# method that counts precision for model using sklearn library


def calculate_recall_precission(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, dim=1)
            y_true.extend(labels.numpy())
            y_pred.extend(predicted.numpy())
            print(predicted.numpy())
            # print(predicted.numpy())
    recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
    precission = precision_score(y_true, y_pred, average='macro', zero_division=0)

    print(recall)
    print(precission)


In [865]:
# optimizer class

optimizer = torch.optim.Adam(net.parameters(), lr=1e-3, weight_decay=1e-5)

# loss function for multiclass classification

loss_function = nn.CrossEntropyLoss()


In [866]:
# calculate_precision(net, test_loader)
calculate_recall_precission(net, test_loader)

[5 4 4 4 4 4 4 4 4 5 4 5 4 4 4 4 4 5 4 5 4 4 4 4 4 5 4 5 4 5 4 5]
[4 4 4 4 4 4 5 4 4 4 5 4 4 5 4 4 4 4 4 5 4 4 4 5 4 4 4 4 4 5 4 4]
[4 5 4 5 4 4 4 4 4 4 5 4 4 4 4 4 4 4 4 4 5 5 4 4 4 4 4 4 4 4 5 5]
[4 4 4 4 5 4 5 5 5 4 5 4 5 4 4 4 5 4 4 4 4 4 5 4 4 4 4 5 5 4 4 4]
[4 4 4 5 5 4 5 4 4 4 4 4 4 5 4 4 5 4 4 5 4 5 4 4 4 5 4 4 4 4 5 4]
[4 5 5 5 4 4 4 5 4 4 5 5 4 4 4 5 4 4 5 5 4 4 4 4 4 5 4 4 4 4 4 4]
[4 4 4 4 4 5 4 4 5 4 4 4 4 5 4 4 4 5 4 5 4 4 4 4 4 4 5 4 4 4 4 4]
[4 4 5 4 4 5 4 4 4 4 4 4 4 4 4 5 5 5 5 4 5 5 4 5 4 4 4 4 4 5 5 4]
[4 4 4 4 4 4 4 4 4 4 4 5 5 4 4 4 4 4 4 5 4 4 5 4 4 4 4 4 5 4 4 5]
[4 5 5 5 5 4 4 4 4 4 4 4 4 4 5 5 4 4 4 5 5 5 4 4 5 5 4 4 4 5 4 4]
[4 4 4 5 4 4 4 4 5 4 4 4 4 5 4 5 4 4 4 4 4 5 4 4 4 4 4 5 5 4 4 4]
[5 4 4 4 4 4 5 5 5 5 4 4 4 4 4 4 4 5 4 4 4 5 4 4 4 5 4 4 4 4 5 4]
[4 4 4 4 4 4 4 4 4 5 4 4 4 4 4 4 5 4 5 4 4 4 4 4 4 5 4 5 4 4 4 4]
[4 4 5 4 4 4 4 4 5 4 4 4 5 4 5 4 5 4 4 5 4 4 4 4 4 5 5 4 4 4 5 4]
[4 5 4 4 5 4 4 4 4 5 4 4 4 4 4 4 5 5 4 4 5 4 4 4 4 5 4 5 4 4 4 4]
[4 4 4 4 4

In [867]:
# train network 

net.train()
epochs = 20

for epoch in range(epochs):
    if epoch % 5 == 0:
        print("Epoch: ", epoch)
    
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outputs = net(inputs)
        print(labels.shape)

        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()

Epoch:  0


IndexError: Target 7 is out of bounds.

In [None]:
# split data to train and test set and split test set to test and validation set

covertype = fetch_covtype()

X = covertype.data
y = covertype.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.3, shuffle=True)

# now we split test set for validation set

X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size= 0.5, shuffle=True)

