In [None]:
import pandas as pd
import numpy as np
import time
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler

In [29]:
# Define a customized class for spambase data
class MadelonDataset(Dataset):
    """Customized class that pre-processes spam dataset"""
    def __init__(self, x_file, y_file):
        """
        Args:
            csv_file (string): Name of dataset file
        """
        # Read in data
        self.main = pd.read_csv(x_file, header = None,sep = '\s+')
        self.lab = pd.read_csv(y_file, header = None,sep = '\s+')
    
    def __len__(self):
        return len(self.main)

    def __getitem__(self, idx):
        # Get the features and make it into tensor
        features = self.main.iloc[idx, 0:500].values
        features = features.astype('double')
        features = torch.from_numpy(features)
        # Get the label
        if self.lab.iloc[idx, 0] == -1:
            self.lab.iloc[idx, 0] = 0
            
        label = self.lab.iloc[idx, 0]
        # Combine features and label into a tuple
        sample = (features,label)
        return sample

In [30]:
# Hyper-parameters 
input_size = 500
hidden_size = 500
num_classes = 2
num_epochs = 5
batch_size = 10
learning_rate = 0.01

In [31]:
train_dataset = MadelonDataset("madelon_train.data","madelon_train.labels")
test_dataset = MadelonDataset("madelon_valid.data","madelon_valid.labels")

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

In [32]:
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #CUDA is a GPU


In [33]:
# Neural Network Class 
class NeuralNet(nn.Module):
    """ Feedforward Neural Network with 2 hidden layers and ReLU activation function."""
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        torch.manual_seed(10) # For reproducibility
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc4 = nn.Linear(hidden_size, num_classes)  
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.relu(out)
        out = self.fc4(out)
        return out

In [34]:
# Set model
model = NeuralNet(input_size, hidden_size, num_classes).double().to(device)

In [35]:
# Set loss function and optimize algorithm
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

# Set start time
start = time.time()


for epoch in range(num_epochs):
    train_loss, test_loss = [], []
    correct = 0
    total = 0
    # Train the model
    for i, (x, y) in enumerate(train_loader):  
        # Move tensors to the configured device
        x = x.to(device)
        y = y.to(device=device, dtype=torch.int64)
        
        
        # Forward pass
        outputs = model(x)
        loss = criterion(outputs, y)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss.append(loss.item())
    
    with torch.no_grad():
        
        # Predict the model
        for x, y in test_loader:
            x = x.to(device)
            y = y.to(device)
            outputs = model(x)
            lose = criterion(outputs, y)
            _, predicted = torch.max(outputs.data, 1)
            total += y.size(0)
            correct += (predicted == y).sum().item()
            test_loss.append(lose.item())

        print ("Epoch:", epoch + 1, ", Training Loss: ", np.mean(train_loss), ", Test loss: ", np.mean(test_loss))

# Set end time        
end = time.time()  
print('Accuracy is: {} %'.format(100 * correct / total))
print('It takes ' +  str(end - start)  + ' seconds to run Feedforward Neural Network algorithm on spambase dataset.')

Epoch: 1 , Training Loss:  79.36483975950941 , Test loss:  0.7277146871190765
Epoch: 2 , Training Loss:  0.6951967256612925 , Test loss:  0.6933738097585008
Epoch: 3 , Training Loss:  0.693995607916262 , Test loss:  0.6937354242506832
Epoch: 4 , Training Loss:  0.6937024403043637 , Test loss:  0.6938521395905951
Epoch: 5 , Training Loss:  0.6943954674343209 , Test loss:  0.6931906536266362
Accuracy is: 50.0 %
It takes 19.379312753677368 seconds to run Feedforward Neural Network algorithm on spambase dataset.
