In [1]:
import numpy as np
import pandas as pd
from funcs.model import MLP

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [2]:
df = pd.read_excel("data/Dry_Bean_Dataset.xlsx")

In [3]:
y = df["Class"]
y = y.astype("category").cat.codes
X = df.iloc[:,:-1]

#print(X)

#normalizando X
X = (X-X.min())/(X.max()-X.min())

#print(X)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,test_size=0.2, random_state=42)
print(type(X_train))

<class 'pandas.core.frame.DataFrame'>


In [5]:
class BeansDataset(Dataset):
    
    def __init__(self, X:np.array, y:np.array, transform=None):
        """
        Args:
            X: Features.
            y: target.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.X = X.astype('float32')
        self.y = y.astype('float32')
        
        ####
        #Inversao desnecessaria causava o primeiro erro
        ####
        #self.y = self.y.reshape((len(self.y), 1))
        
        self.n_samples = self.X.shape[0]
        self.transform = transform

    def __len__(self):
        return self.n_samples

    def __getitem__(self, idx):
        
        sample = self.X[idx], self.y[idx]

        if self.transform:
            sample = self.transform(sample)

        return sample

In [6]:
batch_size = 1500

test_dataset = BeansDataset(X_test.to_numpy(), y_test.to_numpy())
train_dataset = BeansDataset(X_train.to_numpy(), y_train.to_numpy())

print(type(test_dataset))


# This will apply the softmax
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, shuffle=True, batch_size=batch_size)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, shuffle=True, batch_size=batch_size)

<class '__main__.BeansDataset'>


In [7]:
model = MLP(16,10,7)
device="cpu"
num_epochs=100
# specify loss function (categorical cross-entropy)
criterion = nn.CrossEntropyLoss() # specify optimizer (stochastic gradient descent) and learning rate = 0.01
optimizer = torch.optim.Adam(model.parameters(),lr = 0.01)

In [8]:
n_total_steps = len(train_loader)
torch.set_printoptions(edgeitems=4)
for epoch in range(num_epochs):
    # enumerate mini batches
    for i, (inputs, targets) in enumerate(train_loader):
        inputs = inputs.to(device)
        #print(targets)
        ###
        #Outro problema aqui, os rotulos precisam ser indexes (ints), mas da forma construida eram tudo no formato de float
        ###
        targets = targets.to(device,dtype=torch.int64)
        #print(targets)
        # clear the gradients
        optimizer.zero_grad()
        # compute the model output
        yhat = model(inputs)
        # calculate loss
        #if i+1==2 or i+1==4:
        #    print(yhat)
        #print(targets)
        loss = criterion(yhat, targets)
        # credit assignment
        loss.backward()
        # update model weights
        optimizer.step()
        print(f"epoch {epoch + 1} / {num_epochs}, step {i + 1}/{n_total_steps}, loss = {loss.item():.4f}")

epoch 1 / 100, step 1/8, loss = 1.9130
epoch 1 / 100, step 2/8, loss = 1.8977
epoch 1 / 100, step 3/8, loss = 1.9036
epoch 1 / 100, step 4/8, loss = 1.8754
epoch 1 / 100, step 5/8, loss = 1.8630
epoch 1 / 100, step 6/8, loss = 1.8583
epoch 1 / 100, step 7/8, loss = 1.8336
epoch 1 / 100, step 8/8, loss = 1.8226
epoch 2 / 100, step 1/8, loss = 1.8139
epoch 2 / 100, step 2/8, loss = 1.7883
epoch 2 / 100, step 3/8, loss = 1.7795
epoch 2 / 100, step 4/8, loss = 1.7619
epoch 2 / 100, step 5/8, loss = 1.7414
epoch 2 / 100, step 6/8, loss = 1.7349
epoch 2 / 100, step 7/8, loss = 1.6979
epoch 2 / 100, step 8/8, loss = 1.6528
epoch 3 / 100, step 1/8, loss = 1.6500
epoch 3 / 100, step 2/8, loss = 1.6201
epoch 3 / 100, step 3/8, loss = 1.6233
epoch 3 / 100, step 4/8, loss = 1.5809
epoch 3 / 100, step 5/8, loss = 1.5830
epoch 3 / 100, step 6/8, loss = 1.5573
epoch 3 / 100, step 7/8, loss = 1.5131
epoch 3 / 100, step 8/8, loss = 1.5278
epoch 4 / 100, step 1/8, loss = 1.4851
epoch 4 / 100, step 2/8, 

In [9]:
with torch.no_grad():
    n_corrects = 0
    n_samples = 0
    for i, (inputs, targets) in enumerate(train_loader):
        inputs = inputs.to(device)
        #print(inputs)
        targets = targets.to(device,dtype=torch.int64)
        outputs = model(inputs)
        #print(outputs)
        
        _,prediction = torch.max(outputs,1)
        #print(prediction)
        n_samples+=targets.shape[0]
        n_corrects += (prediction == targets).sum().item()
        
    acc = 100*n_corrects/n_samples
    print(f'accuracy equals {acc}')

accuracy equals 91.85341660543718
