In [21]:
import numpy as np
from torchvision import transforms
import torch
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold

In [22]:
#############################################################################
def train_ann(ann=None, dataloader=None, criterion=None, epochs=None):
    for epoch in range(epochs):  # loop over the dataset multiple times

        running_loss = 0.0
        i = 0
        for data, target, index in dataloader:
            #print(i)
            #print(data)
            # get the inputs; data is a list of [inputs, labels/target]
            inputs = data
            labels = target
    
            # zero the parameter gradients
            optimizer.zero_grad()
    
            # forward + backward + optimize
            outputs = ann(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
            # print statistics
            running_loss += loss.item()
            if i % 20 == 19:    # print every 20 mini-batches
                print('[epoch %d, pattern number %d] loss: %.3f' %
                      (epoch + 1, index, running_loss / 20))
                running_loss = 0.0
            i += 1

    print('Finished Training')


In [23]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
        assert len(X) == len(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        data, target = self.X[i], self.y[i]
        # to be able to get the pattern/example index later
        index = i
        return data,target,index


In [24]:
# this is one way to define a network
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden)   # hidden layer
        self.predict = torch.nn.Linear(n_hidden, n_output)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x)) # activation function for hidden layer
        x = self.predict(x)             # linear output
        return x

In [43]:
if __name__ == '__main__':    
    torch.manual_seed(1)    # reproducible
    
    # x data (tensor), shape=(100, 1)
    X = torch.unsqueeze(torch.linspace(-1, 1, 4000), dim=1)
    # noisy y data (tensor), shape=(100, 1)
    y = X.pow(2) + 0.2*torch.rand(X.size())
    
    dataset = MyDataset(X, y)
    
    # Set the k-fold
    k = 3

 Split the indices into k mutually exclusive subsets $\mathcal{D}_i$

In [44]:
    indices = range(len(dataset))
    partitions = kf = KFold(n_splits=k, random_state=None, shuffle=True)

The error vector contains errors $e_i$ for every pattern $z^{(i)}$.
The size of this vector in a sigle task scenario with continuos output
(univariate regression) for a dataset with N pattern is (1 x N).

In [45]:
    error_vector = np.arange(len(dataset))
    predicted = np.arange(len(dataset))

In [46]:
    from IPython.core.display import display, HTML
    display(HTML("<style>div.output_scroll { height: 44em; }</style>"))
    
    fold = 0
    loss_function = torch.nn.MSELoss()

    for train_index, test_index in kf.split(indices):
        fold += 1
        print("Training in fold number:", fold)
        
        # Define the network for this fold. It is a kind of weight reset.
        # In more complex scenarios we could use different ANN for every fold.
        # For example, assuming there is a function taking an integer and
        # returning a network we could make net = get_network_for_fold(fold)
        net = Net(n_feature=1, n_hidden=10, n_output=1)
        # print(net)  # net architecture
    
        # We globaly define the hyperparamers but they could be paramerters 
        # of the training algo.
        epochs = 2
        optimizer = torch.optim.SGD(net.parameters(), lr=0.2)
        criterion = torch.nn.MSELoss()
        
        current_training_d_without_d_i = SubsetRandomSampler(
                indices=train_index)
        
        current_training_d_loader = torch.utils.data.DataLoader(dataset, 
                                                            batch_size=1, 
                                                            shuffle=False, 
                                        sampler=current_training_d_without_d_i, 
                                                            batch_sampler=None,
                                                            num_workers=0,
                                                            collate_fn=None,
                                                            pin_memory=False,
                                                            drop_last=False,
                                                            timeout=0,
                                                        worker_init_fn=None, 
                                                multiprocessing_context=None)
        
        current_d_i = SubsetRandomSampler(indices=test_index)
        
        current_d_i_loader = torch.utils.data.DataLoader(dataset, 
                                                            batch_size=1, 
                                                            shuffle=False, 
                                                        sampler=current_d_i, 
                                                            batch_sampler=None,
                                                            num_workers=0,
                                                            collate_fn=None,
                                                            pin_memory=False,
                                                            drop_last=False,
                                                            timeout=0,
                                                        worker_init_fn=None, 
                                                multiprocessing_context=None)
        # train CNN
        # $f_i$ is the learning algorithm. In this case, is the ANN with the 
        # "best parameters" according to the loss function used inside the
        # training loop. Note that network architecture, loss function 
        # (criterion) and number of iterations (epochs) remain constant.
        # However, these paramters could be changed to perform a model 
        # selection/evaluation.
        train_ann(ann=net, dataloader=current_training_d_loader, 
                  criterion=criterion, epochs=epochs)
        
        f_i = net
    
        # Calculate loss of the trained model output and the data elements of
        # the current partition. Note that we could use now a different loss
        # function than the one used to train the network itself. Nevertheless,
        # I use the same here (L1 loss).
        current_loss = 0.0
        print("Validating in fold number:", fold)
        i = 0
        for data, target, index in current_d_i_loader:
            # get the inputs; data is a list of [inputs, labels]
            inputs = data
            labels = target
    
            # only forward because we are performing evaluation
            outputs = net(inputs)
            loss = loss_function(outputs, labels)
    
            # print statistics
            current_loss = loss.item()
            # index is the pattern/example index
            predicted[index] = outputs.item()
            error_vector[index] = current_loss
            if index % 20 == 19:    # print every 20 examples
                print('[fold number %d, pattern number %d] current (single pattern) loss: %.3f' %
                      (fold, index, current_loss))
            i += 1
    print("Finished fold iterations")

Training in fold number: 1
[epoch 1, pattern number 3393] loss: 0.137
[epoch 1, pattern number 1138] loss: 0.130
[epoch 1, pattern number 3594] loss: 0.072
[epoch 1, pattern number 160] loss: 0.068
[epoch 1, pattern number 3219] loss: 0.083
[epoch 1, pattern number 1745] loss: 0.064
[epoch 1, pattern number 1739] loss: 0.021
[epoch 1, pattern number 1946] loss: 0.031
[epoch 1, pattern number 3852] loss: 0.018
[epoch 1, pattern number 2814] loss: 0.019
[epoch 1, pattern number 1538] loss: 0.009
[epoch 1, pattern number 892] loss: 0.029
[epoch 1, pattern number 2758] loss: 0.014
[epoch 1, pattern number 3084] loss: 0.007
[epoch 1, pattern number 3550] loss: 0.004
[epoch 1, pattern number 3670] loss: 0.015
[epoch 1, pattern number 2251] loss: 0.005
[epoch 1, pattern number 1352] loss: 0.005
[epoch 1, pattern number 829] loss: 0.010
[epoch 1, pattern number 519] loss: 0.007
[epoch 1, pattern number 1669] loss: 0.014
[epoch 1, pattern number 1785] loss: 0.005
[epoch 1, pattern number 3981] 

[epoch 2, pattern number 2022] loss: 0.007
[epoch 2, pattern number 903] loss: 0.006
[epoch 2, pattern number 3575] loss: 0.005
[epoch 2, pattern number 3708] loss: 0.006
[epoch 2, pattern number 203] loss: 0.009
[epoch 2, pattern number 2433] loss: 0.007
[epoch 2, pattern number 914] loss: 0.006
[epoch 2, pattern number 3657] loss: 0.005
[epoch 2, pattern number 1522] loss: 0.007
[epoch 2, pattern number 2186] loss: 0.007
[epoch 2, pattern number 3664] loss: 0.003
[epoch 2, pattern number 2199] loss: 0.006
[epoch 2, pattern number 3143] loss: 0.007
[epoch 2, pattern number 1437] loss: 0.005
[epoch 2, pattern number 2974] loss: 0.006
[epoch 2, pattern number 1028] loss: 0.007
[epoch 2, pattern number 694] loss: 0.005
[epoch 2, pattern number 2140] loss: 0.006
[epoch 2, pattern number 1621] loss: 0.007
[epoch 2, pattern number 1857] loss: 0.010
[epoch 2, pattern number 811] loss: 0.006
[epoch 2, pattern number 1471] loss: 0.004
[epoch 2, pattern number 3219] loss: 0.006
[epoch 2, patter

[epoch 1, pattern number 2776] loss: 0.005
[epoch 1, pattern number 2705] loss: 0.009
[epoch 1, pattern number 568] loss: 0.008
[epoch 1, pattern number 3856] loss: 0.005
[epoch 1, pattern number 3139] loss: 0.004
[epoch 1, pattern number 546] loss: 0.012
[epoch 1, pattern number 3204] loss: 0.008
[epoch 1, pattern number 1294] loss: 0.007
[epoch 1, pattern number 227] loss: 0.006
[epoch 1, pattern number 3763] loss: 0.007
[epoch 1, pattern number 2847] loss: 0.007
[epoch 1, pattern number 1206] loss: 0.010
[epoch 1, pattern number 926] loss: 0.006
[epoch 1, pattern number 954] loss: 0.009
[epoch 1, pattern number 1821] loss: 0.004
[epoch 1, pattern number 3417] loss: 0.005
[epoch 1, pattern number 3784] loss: 0.006
[epoch 1, pattern number 1691] loss: 0.008
[epoch 1, pattern number 3393] loss: 0.008
[epoch 1, pattern number 801] loss: 0.007
[epoch 1, pattern number 2730] loss: 0.006
[epoch 1, pattern number 3540] loss: 0.004
[epoch 1, pattern number 1447] loss: 0.005
[epoch 1, pattern

[epoch 2, pattern number 3959] loss: 0.004
[epoch 2, pattern number 1186] loss: 0.009
[epoch 2, pattern number 2423] loss: 0.010
[epoch 2, pattern number 3691] loss: 0.004
[epoch 2, pattern number 559] loss: 0.007
[epoch 2, pattern number 2447] loss: 0.006
[epoch 2, pattern number 3200] loss: 0.006
[epoch 2, pattern number 1691] loss: 0.007
[epoch 2, pattern number 401] loss: 0.007
[epoch 2, pattern number 1726] loss: 0.006
[epoch 2, pattern number 3278] loss: 0.004
[epoch 2, pattern number 3738] loss: 0.008
Finished Training
Validating in fold number: 2
[fold number 2, pattern number 2239] current (single pattern) loss: 0.019
[fold number 2, pattern number 3619] current (single pattern) loss: 0.000
[fold number 2, pattern number 1699] current (single pattern) loss: 0.000
[fold number 2, pattern number 2459] current (single pattern) loss: 0.023
[fold number 2, pattern number 1059] current (single pattern) loss: 0.000
[fold number 2, pattern number 1519] current (single pattern) loss: 0

[epoch 1, pattern number 2884] loss: 0.005
[epoch 1, pattern number 3954] loss: 0.008
[epoch 1, pattern number 3209] loss: 0.009
[epoch 1, pattern number 928] loss: 0.006
[epoch 1, pattern number 1931] loss: 0.007
[epoch 1, pattern number 3283] loss: 0.006
[epoch 1, pattern number 3854] loss: 0.006
[epoch 1, pattern number 3499] loss: 0.007
[epoch 1, pattern number 1586] loss: 0.009
[epoch 1, pattern number 1489] loss: 0.004
[epoch 1, pattern number 3600] loss: 0.008
[epoch 1, pattern number 1167] loss: 0.006
[epoch 1, pattern number 3086] loss: 0.005
[epoch 1, pattern number 2381] loss: 0.018
[epoch 1, pattern number 402] loss: 0.010
[epoch 1, pattern number 245] loss: 0.004
[epoch 1, pattern number 3238] loss: 0.004
[epoch 1, pattern number 23] loss: 0.004
[epoch 1, pattern number 545] loss: 0.005
[epoch 1, pattern number 1073] loss: 0.005
[epoch 1, pattern number 1676] loss: 0.006
[epoch 1, pattern number 3917] loss: 0.007
[epoch 1, pattern number 1093] loss: 0.009
[epoch 1, pattern

[fold number 3, pattern number 2139] current (single pattern) loss: 0.012
[fold number 3, pattern number 1499] current (single pattern) loss: 0.000
[fold number 3, pattern number 2359] current (single pattern) loss: 0.000
[fold number 3, pattern number 1979] current (single pattern) loss: 0.004
[fold number 3, pattern number 2739] current (single pattern) loss: 0.000
[fold number 3, pattern number 1779] current (single pattern) loss: 0.000
[fold number 3, pattern number 2339] current (single pattern) loss: 0.001
[fold number 3, pattern number 479] current (single pattern) loss: 0.011
[fold number 3, pattern number 819] current (single pattern) loss: 0.000
[fold number 3, pattern number 439] current (single pattern) loss: 0.002
[fold number 3, pattern number 99] current (single pattern) loss: 0.003
[fold number 3, pattern number 2099] current (single pattern) loss: 0.004
[fold number 3, pattern number 379] current (single pattern) loss: 0.000
[fold number 3, pattern number 3559] current

In [47]:
# Visualize the erros
print(error_vector.size)

4000


In [48]:
from sklearn.metrics import explained_variance_score

In [50]:
true = dataset.y
pred = predicted

# Explained variance regression score function.
# Best possible score is 1.0, lower values are worse.
explained_variance_score(true, pred) 

0.18669748339461245