In [3]:
%matplotlib notebook
import numpy as np
from numpy import random
import pandas as pd
import scipy as sp
from prosstt import simulation as sim
from prosstt import tree
from prosstt.tree import Tree

import matplotlib.pyplot as plt

In [34]:
def save_files(job_id, save_dir, X, labs, brns, scalings, uMs, Hs, gene_scale, alpha, beta):
    # make the data more presentable by adding gene and cell names
    cell_names = ["cell_" + str(i) for i in range(X.shape[0])]
    gene_names = ["gene_" + str(i) for i in range(X.shape[1])]

    pdX = pd.DataFrame(X, columns=gene_names, index=cell_names).astype(int)
    pdCells = pd.DataFrame({"pseudotime": labs, "branches": brns, "scalings": scalings}, index=cell_names,
                           columns=["pseudotime", "branches", "scalings"])
    pdGenes = pd.DataFrame({"alpha": alpha, "beta": beta, "genescale": gene_scale}, index=gene_names,
                           columns=["alpha", "beta", "genescale"])

    pdX.to_csv(save_dir + "/" + job_id + "_simulation.txt", sep="\t")
    pdCells.to_csv(save_dir + "/" + job_id + "_cellparams.txt", sep="\t")
    pdGenes.to_csv(save_dir + "/" + job_id + "_geneparams.txt", sep="\t")

    num_branches = len(uMs)
    for i in range(num_branches):
        np.savetxt(fname=save_dir + "/" + job_id + "_ums" + str(i) + ".txt", X=uMs[i])
        np.savetxt(fname=save_dir + "/" + job_id + "_hs" + str(i) + ".txt", X=Hs[i])


def save_params(job_id, save_dir, G, br_lengths, br_compl, rseed, topology):
    paramfile = save_dir + "/" + job_id + "_params.txt"
    with open(paramfile, 'w') as out:
        out.write("Genes: " + str(G) + "\n")
        out.write("pseudotimes: " + str(br_lengths) + "\n")
        out.write("topology: " + str(topology) + "\n")
        out.write("#modules: " + str(br_compl) + "\n")
        out.write("random seed: " + str(rseed))

In [31]:
loc = "/data/niko/final/benchmark1/test38/test38_"

G = 758
pseudotimes = [50, 50, 50]
topology = [[0, 1], [0, 2]]
gene_params = pd.read_csv(loc + "geneparams.txt", sep="\t", index_col=0)
alpha = np.array(gene_params["alpha"])
beta = np.array(gene_params["beta"])
base_exp = np.array(gene_params["genescale"])

uMs = [np.zeros((pseudotimes[i], G)) for i in range(3)]
Ms = [np.zeros((pseudotimes[i], G)) for i in range(3)]
for i in range(3):
    uMs[i] = np.loadtxt(loc + "ums" + str(i) + ".txt")
    Ms[i] = np.exp(uMs[i]) * base_exp

t = tree.Tree(topology, pseudotimes, 3, 1, 10, G)
t.add_genes(Ms)

In [32]:
sample_time = np.arange(0, t.get_max_time())
X, labs, brns, scalings = sim.sample_data_balanced(4, G, t, sample_time, alpha, beta, scale_v=0.8)

 |████████████████████████████████████████████████████████████████████████████████████████████████████| 99.8% 

In [36]:
job_id = "single"
save_dir = "/home/npapado/Documents/repos/prosstt/data/single/"
save_params(job_id, save_dir, G, pseudotimes, 10, "none", topology)
save_files(job_id, save_dir, X, labs, brns, scalings, uMs, uMs, base_exp, alpha, beta) # replace with real hs from simulation
scalefile = save_dir + "/" + job_id + "_scalings.txt"
np.savetxt(scalefile, scalings)