In [3]:
import typing
import numpy as np
import pandas as pd

from torch import nn, optim, tensor, FloatTensor
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn.functional as F
from torchvision import transforms

In [10]:
class TEA(nn.Module):
    """A pytorch module to build Target-Embedding Autoencoders"""

    def __init__(self, input_dim: int, hidden_dim: int, output_dim: int):
        """
        Parameters
        ----------
        input_dim : int
            The number of input features
        hidden_dim : int
            The number of features in the hidden layer
        output_dim : int
            The number of output features
        """
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        return x

    def train(self, data_loader, optimizer, criterion):
        """
        Parameters
        ----------
        data_loader : torch.utils.data.DataLoader
        optimizer : torch.optim
        criterion : function
        """
        running_loss = 0
        for i, row in enumerate(data_loader):
            optimizer.zero_grad()
            y_hat = self.forward(row['X'])
            loss = criterion(y_hat, row['y'].view(y_hat.size()[0],1))
            loss.backward()
            optimizer.step()    
            running_loss += loss.item()
        print("Train loss: {0:.3f}".format(running_loss / (i + 1))) # loss needs to be averaged over all batches
        return(running_loss / (i + 1))

    def test(self, data_loader, criterion):
        """
        Parameters
        ----------
        data_loader : torch.utils.data.DataLoader
        criterion : function
        """
        running_loss = 0
        for i, row in enumerate(data_loader):
            y_hat = self.forward(row['X'])
            loss = criterion(y_hat, row['y'].view(y_hat.size()[0],1)) 
            running_loss += loss.item()
        print("Test loss: {0:.3f}".format(running_loss / (i + 1))) # loss needs to be averaged over all batches
        return(running_loss / (i + 1))

In [11]:
synthetic_data = pd.read_csv("data/net1_expression_data.tsv", sep='\t')

In [12]:
class TeaDataset(Dataset):
    """A custom Pytorch class for training TEA models.
    """

    def __init__(self, X, y, transform):
        """
        Paramaters
        ----------
        X : np.array of floats
            The input features
        y : np.array 
            The labels in [0, 1]
        transform : torch.transform
            To include a 'to_tensor' function to convert from ndarray
        """      
        self.X = X
        self.y = y
        self.transform = transform

    def __len__(self):
        """Return the length of the object"""
        return self.X.shape[0]

    def __getitem__(self, idx):
        """Return a single sample from the dataset"""
        sample = {'X': self.X[idx], 'y': self.y[idx]}
        if self.transform:
            sample = self.transform(sample)
        return sample

Unnamed: 0,G1,G2,G3,G4,G5,G6,G7,G8,G9,G10,...,G1634,G1635,G1636,G1637,G1638,G1639,G1640,G1641,G1642,G1643
0,0.425448,0.017829,0.907989,0.448247,0.172324,0.273489,0.843766,0.648201,1.004533,0.365305,...,0.011979,0.963306,1.169870,0.331381,0.350600,0.822844,0.304483,0.319917,0.364280,0.765945
1,0.442400,0.050525,0.869368,0.445851,0.173311,0.274889,0.764049,0.747870,1.022589,0.434106,...,0.022247,1.014137,0.888465,0.281649,0.485940,0.915617,0.317507,0.238074,0.509130,0.691403
2,1.056847,0.208454,0.467448,0.505077,0.244883,0.208451,0.665355,1.192092,0.824068,0.146987,...,0.422066,0.895203,1.028826,0.825126,0.444819,0.349069,0.042310,0.165208,0.952178,0.678781
3,1.117226,0.003001,0.317654,0.387204,0.253792,0.179360,0.939244,0.868668,0.963028,0.233785,...,0.001163,1.046540,1.058098,0.484225,0.150689,0.449126,0.125197,0.000047,0.878127,0.566691
4,0.971068,0.001056,0.354651,0.474532,0.207718,0.102833,0.745871,0.909753,1.151865,0.318988,...,0.000845,1.041745,1.061129,0.384363,0.326859,0.512270,0.261410,0.000156,0.883981,0.646715
5,1.139386,0.122047,0.402465,0.348436,0.168614,0.255774,0.924055,0.823940,1.025145,0.310873,...,0.000262,0.894509,0.925117,0.295704,0.508041,0.754407,0.064153,0.040764,0.766373,0.725356
6,1.064869,0.140508,0.481763,0.474857,0.182643,0.112430,1.081748,0.998682,1.160934,0.359449,...,0.001865,0.878323,0.912965,0.205815,0.696312,0.659339,0.051364,0.035758,0.655370,0.748289
7,0.876117,0.073814,1.058292,0.730366,0.053656,0.175109,1.202648,1.047473,0.995017,0.113200,...,0.000996,0.828337,0.958336,0.704889,0.680660,0.487411,0.420395,0.500600,0.387646,0.631003
8,1.205966,0.153407,0.760861,0.655846,0.157731,0.141754,1.039769,1.008289,1.035285,0.072027,...,0.000505,0.801840,1.095022,0.504716,0.592740,0.444697,0.227262,0.124716,0.389183,0.655293
9,1.000675,0.078986,0.803631,0.838975,0.088306,0.151089,1.071518,1.130214,1.110817,0.095975,...,0.005712,0.745761,1.059646,0.533138,0.647711,0.687846,0.359815,0.210771,0.317452,0.581441


In [None]:
class ToTensor(object):
    """Take a sample from a SyntheticClassificationDataset and convert it to a Pytorch tensor"""
    def __call__(self, sample):
        X, y = sample['X'], sample['y']
        
        transformed_sample = {
            'X': tensor(X).type(FloatTensor), 
            'y': tensor(y).type(FloatTensor)}
        return transformed_sample

class Normalise(object):
    """Take a sample from a SyntheticClassificationDataset and normalise the input features (X)

    Parameters
    ----------
        method : str
            From 'z' or 'range'. The default is 'z'.
            'z' z-transforms the features
            'range' range normalises so that the data lie in the range [0,1]
    """
    def __init__(self, method = 'z'):
        assert isinstance(method, str)
        assert method in ['z', 'range']
        self.method = method

    def __call__(self, sample):
        X, y = sample['X'], sample['y']
        if self.method == "z":
            normalised_X = (X - X.mean())/ X.std()
        else:
            normalised_X = (X - X.min())/X.max()

        return {'X': normalised_X, 'y': y}