Import network and other stuff

In [19]:
import os, pickle, random, time
from filelock import FileLock
from src.Network import *
from src.dataloader import RNA_Dataset
from constants import *

Define PATH for results for the playround experiment (with you own data). If you comment this code all the results with real data in 'results' folder will be updated and you want be able to test processing jupyter notes with real result

In [20]:
# PATH_RESULTS = './results_test/'

Experimental settings are constrained by the following conditions:
N_REPEATS * N_INITS <= NUM_GPUS / GPU_PER_RUN
GPU_PER_RUN * memory of one GPU >= size of network

In [23]:
NUM_GPUS = 1 # number of GPU dedicated to training process
N_REPEATS = 1 # number of repeats
N_INITS = 1 # number of runs in parallel
GPU_PER_RUN = 0.2

Some settings for my particular dataloading flow

In [24]:
spleenDataFile = 'dataSpleen.csv'
liverDataFile = 'dataLiver.csv'
DATA_FILES = {'spleen': spleenDataFile, 'liver': liverDataFile}

Set params of training network

In [25]:
zscore, trainPerc = True, .85
classTrain, AETrain, sparse = True, True, True
latentDim, n_hidden = 128, 1024
nEpochs = 20000
learningRateAE, learningRateD, weightDecay = .0001, .0001, .01
alpha, beta, gamma, lambd = 10.0, 1.0, 0.0001, 0.0001
use_cuda = torch.cuda.is_available()
shuffleTrain = False

Define trainer for specific dataset and experiment

In [26]:
def trainer(DATA_NAME, EXPERIMENT, shuffleTrain):
    
    PATH = PATH_DATA + DATA_NAME
    if len(EXPERIMENT) >1:
        rootDir = PATH_RESULTS + DATA_NAME + 'all/'
    else:
        rootDir = PATH_RESULTS + DATA_NAME + EXPERIMENT[0] + '/'
    os.makedirs(rootDir + 'losses/', exist_ok=True)

    if os.path.exists(rootDir + 'losses/' + "losses.pickle"):
        try:
            os.remove(rootDir + 'losses/' + "losses.pickle")
        except OSError as e:
            print(f"Error: {e.strerror}")
            pass

    if os.path.exists(rootDir + "model.pth"):
        try:
            os.remove(rootDir + "model.pth")
        except OSError as e:
            print(f"Error: {e.strerror}")
            pass

    print(f"""
Starting one run experiment {EXPERIMENT} with the dataset {DATA_NAME}
          """)

    start = time.time()
    seed = random.randrange(1, 1000)
    dataset = RNA_Dataset(PATH, DATA_FILES, EXPERIMENT, zscore, trainPerc, seed)
    inputDim = len(dataset.features)
    
    Train, Valid = dict(), dict()
    shuffleValid = False #for visualization shuffleTrain = False
    for _ in EXPERIMENT:
        TrainTemp, ValidTemp = dataset._download(_, shuffleTrain, shuffleValid)
        Train[_] = TrainTemp
        Valid[_] = ValidTemp

    network = Network(EXPERIMENT, use_cuda,
            inputDim, latentDim, n_hidden, learningRateAE, learningRateD, weightDecay, alpha, beta, lambd, gamma, 
            classTrain, AETrain, sparse)
    Losses = network.trainLoop(Train, Valid, nEpochs)

    # save model for further inference (show latents distribution)
    torch.save(network, rootDir + 'model.pth')

    # Save losses (comment if already trained and start from loading and plot losses)
    with open(rootDir + 'losses/'+f'losses.pickle', 'wb') as handle:
            pickle.dump(Losses, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print(f"Training for the experiment {EXPERIMENT} with the  dataset {DATA_NAME} took {time.time()-start} seconds")

Pick dataset and experiment for one run of training

In [27]:
DATA_NAME, EXPERIMENT = '1_[res]_[sus, delayedSus]/', ['spleen', 'liver']
trainer(DATA_NAME, EXPERIMENT, shuffleTrain)


Starting one run experiment ['spleen', 'liver'] with the dataset 1_[res]_[sus, delayedSus]/
          
Training for the experiment ['spleen', 'liver'] with the  dataset 1_[res]_[sus, delayedSus]/ took 683.873119354248 seconds
