In [1]:
#import argparse
import datetime
import sys
import json
from collections import defaultdict
from pathlib import Path
from tempfile import mkdtemp

import numpy as np
import torch
import torch.distributions as dist
from torch import optim
from torch.utils.data import DataLoader

import math

import models
#import objectives
import objectives_dev as objectives
from utils import Logger, Timer, save_model, save_vars, unpack_data

from utils import log_mean_exp, is_multidata, kl_divergence, get_mean

from datasets_dev import ATAC_Dataset, RNA_Dataset

import torch
import torch.distributions as dist
import torch.nn as nn
import torch.nn.functional as F
from numpy import prod, sqrt
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import save_image, make_grid

from utils import Constants
from vis import plot_embeddings, plot_kls_df

#args
experiment = 'rna_atac'
model = 'rna_atac_dev' 
obj = 'dreg'
K = 20
looser = True
llik_scaling = 0
batch_size = 256
epochs = 10
latent_dim = 20
num_hidden_layers = 1
learn_prior = False
logp = False
print_freq = 0
no_analytics = False
seed = 1

class params():
    
    def __init__(self,
                 experiment,
                 model,
                 obj,
                 K,
                 looser,
                 llik_scaling,
                 batch_size,
                 epochs,
                 latent_dim,
                 num_hidden_layers,
                 learn_prior,
                 logp,
                 print_freq,
                 no_analytics,
                 seed):
        
        self.experiment = experiment
        self.model = model
        self.obj = obj
        self.K = K
        self.looser = looser
        self.llik_scaling = llik_scaling
        self.batch_size = batch_size
        self.epochs = epochs
        self.latent_dim = latent_dim
        self.num_hidden_layers = num_hidden_layers
        self.learn_prior = learn_prior
        self.logp = logp
        self.print_freq = print_freq
        self.no_analytics = no_analytics
        self.seed = seed
        
args = params(experiment,
                model,
                 obj,
                 K,
                 looser,
                 llik_scaling,
                 batch_size,
                 epochs,
                 latent_dim,
                 num_hidden_layers,
                 learn_prior,
                 logp,
                 print_freq,
                 no_analytics,
                 seed)

In [2]:
# random seed
# https://pytorch.org/docs/stable/notes/randomness.html
torch.backends.cudnn.benchmark = True
torch.manual_seed(args.seed)
np.random.seed(args.seed)

device = torch.device("cpu")

# load model
modelC = getattr(models, 'VAE_{}'.format(args.model))
model = modelC(args).to(device)

In [3]:
#Select pretrained model
pretrained_path = '../experiments/' + args.experiment

print('Loading model {} from {}'.format(model.modelName, pretrained_path))

model.load_state_dict(torch.load(pretrained_path + '/model.rar'))
model._pz_params = model._pz_params

Loading model rna-atac from ../experiments/rna_atac


In [4]:
# set up run path
runId = datetime.datetime.now().isoformat()
experiment_dir = Path('../experiments/' + args.experiment)
experiment_dir.mkdir(parents=True, exist_ok=True)
runPath = mkdtemp(prefix=runId, dir=str(experiment_dir))

In [5]:
train, test = model.getDataLoaders(batch_size=args.batch_size,device=device)

Loading  data ...
Original data contains 5081 cells x 19322 peaks
Finished loading takes 0.05 min
Loading  data ...
Original data contains 5081 cells x 229429 peaks
Finished loading takes 0.33 min


In [14]:
for i, d in enumerate(train):
    if i==0:
        #data = unpack_data(d,device=device)
        #data = d
        data = [d.to(device) for d in train]

AttributeError: 'list' object has no attribute 'to'

In [64]:
print(len(data))
print(data[0].shape)
print(data[1].shape)

2
torch.Size([10, 19322])
torch.Size([10, 43703])


In [9]:
ds = train.dataset

In [21]:
print(len(ds))
print(len(ds[0]))
print(ds[0])

5081
2
[tensor([0., 0., 0.,  ..., 0., 0., 0.]), tensor([0., 0., 0.,  ..., 0., 0., 0.])]


In [20]:
print(len([*train]))

509


In [75]:
model.eval()
torch.no_grad()

_, px_zs, _ = model.forward(data)
# cross-modal matrix of reconstructions
recons_mat = [[get_mean(px_z) for px_z in r] for r in px_zs]

In [76]:
#print(recons)
print(len(recons_mat))
print(len(recons_mat[0]))
print(len(recons_mat[1]))
print(recons_mat[0][0].shape)
print(recons_mat[0][1].shape)

2
2
2
torch.Size([1, 10, 19322])
torch.Size([1, 10, 43703])


In [77]:
for r, recons_list in enumerate(recons_mat):
            for o, recon in enumerate(recons_list):
                _data = data[r].cpu()
                recon = recon.squeeze(0).cpu()

In [79]:
print(_data.shape)
print(recon.shape)

torch.Size([10, 43703])
torch.Size([10, 43703])


In [None]:
train, test = model.getDataLoaders(batch_size=5081,device=device) #姑息

In [None]:
for i,d in test:
    x = d

In [8]:
model.reconstruct(x,runPath,epoch=1)

In [9]:
def getDataLoadersMNIST(batch_size, shuffle=True, device="cuda"):
    kwargs = {'num_workers': 1, 'pin_memory': True} if device == "cuda" else {}
    tx = transforms.ToTensor()
    train = DataLoader(datasets.MNIST('../data', train=True, download=True, transform=tx),
                        batch_size=batch_size, shuffle=shuffle, **kwargs)
    test = DataLoader(datasets.MNIST('../data', train=False, download=True, transform=tx),
                        batch_size=batch_size, shuffle=shuffle, **kwargs)
    return train, test

In [10]:
def getDataLoadersRNA(batch_size, shuffle=True, device="cuda"):
    kwargs = {'num_workers': 1, 'pin_memory': True} if device == "cuda" else {}

    #SingleCellDatasetを移植
    path = '../data/SNARE-seq/RNA-seq' #後でarg指定できるようにする
    transpose = False 
        
    dataset = RNA_Dataset(path,transpose=transpose)
        
    train = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True, **kwargs)
    test = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=False, **kwargs)

    return train, test

In [11]:
def getDataLoadersATAC(batch_size, shuffle=True, device="cuda"):
    kwargs = {'num_workers': 1, 'pin_memory': True} if device == "cuda" else {}
        
    #SingleCellDatasetを移植
    path = '../data/SNARE-seq/ATAC-seq'
    low = 0.01
    high = 0.9
    min_peaks = 100
    transpose = False 
    dataset = ATAC_Dataset(path, low=low, high=high, min_peaks=min_peaks,transpose=transpose)
        
    dataset.create_binary()
        
    train = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True, **kwargs)
    test = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=False, **kwargs)
        
    return train, test 

In [12]:
def getDataLoadersRNA_ATAC(self, batch_size, shuffle=True, device='cuda'):

        # load base datasets
        t1, s1 = self.vaes[0].getDataLoaders(batch_size, shuffle, device)
        t2, s2 = self.vaes[1].getDataLoaders(batch_size, shuffle, device)

        train_rna_atac = TensorDataset([
            ResampleDataset(t1.dataset),
            ResampleDataset(t2.dataset)
        ])
        test_rna_atac = TensorDataset([
            ResampleDataset(s1.dataset),
            ResampleDataset(s2.dataset)
        ])

        kwargs = {'num_workers': 2, 'pin_memory': True} if device == 'cuda' else {}

        train = DataLoader(train_rna_atac, batch_size=batch_size, shuffle=shuffle, **kwargs)
        test = DataLoader(test_rna_atac, batch_size=batch_size, shuffle=shuffle, **kwargs)
        return train, test