In [258]:
# train neural network model for covertype dataset
# import pytorch
import torch 
import torch.nn as nn 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score

In [259]:
class CovertypeClassifier(nn.Module):

    # define nn architecture
    def __init__(self):
        super().__init__()
        # 54 dimensions and 7 classes
        self.layer1 = nn.Linear(54, 64)
        self.activation1 = nn.ReLU()
        self.layer2 = nn.Linear(64, 128)
        self.activation2 = nn.ReLU()
        self.layer3 = nn.Linear(128, 64)
        self.activation3 = nn.ReLU()
        self.final_layer = nn.Linear(64, 7)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.layer1(x)
        x = self.activation1(x)
        x = self.layer2(x)
        x = self.activation2(x)
        x = self.layer3(x)
        x = self.activation3(x)
        x = self.final_layer(x)
        x = self.softmax(x)

        return x

In [260]:
net = CovertypeClassifier()
net

CovertypeClassifier(
  (layer1): Linear(in_features=54, out_features=64, bias=True)
  (activation1): ReLU()
  (layer2): Linear(in_features=64, out_features=128, bias=True)
  (activation2): ReLU()
  (layer3): Linear(in_features=128, out_features=64, bias=True)
  (activation3): ReLU()
  (final_layer): Linear(in_features=64, out_features=7, bias=True)
  (softmax): Softmax(dim=1)
)

In [261]:
# fetch data 

from sklearn.datasets import fetch_covtype

# load dataset
covertype = fetch_covtype()

# separate features and labels
X = covertype.data
y = covertype.target

In [262]:
# split dataset into train and test sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, shuffle=True)


In [263]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder()

y_train_encoded = encoder.fit_transform(y_train.reshape(-1, 1))
y_test_encoded = encoder.fit_transform(y_test.reshape(-1, 1))


In [264]:
# convert data to pytorch tensors 
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F

X_train_tensor = torch.tensor(X_train).float()
X_test_tensor = torch.tensor(X_test).float()
y_train_tensor = torch.tensor(y_train_encoded.toarray())
y_test_tensor = torch.tensor(y_test_encoded.toarray())

y_train_tensor.shape

# print(y_train_tensor)


torch.Size([464809, 7])

In [265]:
y_test

array([1, 3, 1, ..., 1, 3, 3])

In [266]:
# define training and test dataset 

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# define dataloader

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [267]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") # koristimo gpu samo ako je dostupan, inace koristimo cpu
print(device)
net = CovertypeClassifier().to(device) # we set defice for our model

cuda:0


In [274]:
import numpy as np

# method that counts precision for model using sklearn library


def calculate_recall_precission(model, test_loader):
    model.eval()  # Set the model to evaluation mode
    y_true = []
    y_pred = []

    with torch.no_grad():
        for input, label in test_loader:
            input = input.to(device) 
            label = label.to(device) # labels are one hot encoded
            outputs = model(input) # output has 7 classes 
           # _,predicted_class = torch.max(outputs.cpu(),1) # we take the class with highest probability
            label_class = torch.argmax(label.cpu())
            predicted_class = torch.argmax(outputs.cpu())
            y_true.append(label_class.numpy())
            y_pred.append(predicted_class.numpy())
            

    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    print(precision)
    print(recall)
    

In [269]:
# optimizer class
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)

# loss function for multiclass classification
loss_function = nn.CrossEntropyLoss()


In [276]:
# calculate metrics before training
calculate_recall_precission(net, test_loader)

0.004247954688483322
0.14285714285714285


  _warn_prf(average, modifier, msg_start, len(result))


In [277]:
# train network 

net.train()
epochs = 7

for epoch in range(epochs):
    if epoch % 5 == 0:
        print("Epoch: ", epoch)
    
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outputs = net(inputs)
        #print(labels.shape)

        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()

Epoch:  0
Epoch:  5


In [278]:
calculate_recall_precission(net, test_loader)

0.004641283826305853
0.14285714285714285


  _warn_prf(average, modifier, msg_start, len(result))


In [223]:
# split data to train and test set and split test set to test and validation set

covertype = fetch_covtype()

X = covertype.data
y = covertype.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.3, shuffle=True)

# now we split test set for validation set

X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size= 0.5, shuffle=True)

# one hot encoding for labels

y_train_encoded = encoder.fit_transform(y_train.reshape(-1, 1))
y_val_encoded = encoder.fit_transform(y_val.reshape(-1, 1))




In [224]:
# create tensors from data
x_train_tensor = torch.tensor(X_train).float()
x_val_tensor = torch.tensor(X_val).float()

y_train_tensor = torch.tensor(y_train_encoded.toarray())
y_val_tensor = torch.tensor(y_val_encoded.toarray())


In [225]:
# create dataset and dataloader

train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
val_dataset = TensorDataset(x_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)

In [226]:
# validation 

def validation(model, val_loader):
    model.eval()
    val_loss = 0
    
    with torch.no_grad():
        for input, label in val_loader:
            input = input.to(device)
            label = label.to(device)
            prediction = model(input)
            loss = loss_function(prediction, label)
            val_loss += loss.item()

    return val_loss / len(val_loader)


In [227]:
# early stopping implementation

net.train()
epochs = 20

num_of_epochs_without_improvement = 0
val_loss_min = np.Inf

for epoch in range(epochs):
    if epoch % 5 == 0:
        print("Epoch: ", epoch)
    net.train()
    
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outputs = net(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
    
    val_loss = validation(net, val_loader)
    print(val_loss)
    if val_loss <= val_loss_min:
        num_of_epochs_without_improvement = 0
        val_loss_min = val_loss
    else:
        num_of_epochs_without_improvement += 1

    if num_of_epochs_without_improvement == 3:
        print("Early stopping")
        break


Epoch:  0
2.1037940807454563
2.1037940807454563
2.10377113654575
2.103782608645603
2.1037940807454563
Epoch:  5
2.1038055528453095
Early stopping


In [None]:
# hyperparameter tuning

