In [1]:
import os
import os.path
import numpy

In [2]:
# copied from quantile_normalise
def load_intensity_data(paths):
    intensities = []
    for path in paths:
        with open(path) as f:
            intensity = numpy.loadtxt(f)
            intensities.append(intensity)
    return intensities

In [3]:
# copied from quantile_normalise
def load_metadata():
    metadata = {}
    with open("metadata.txt") as f:
        for i, line in enumerate(f):
            line = line.strip().split()
            if i == 0:
                names = line[1:]
            else:
                values = line[1:]
                metadata[line[0]] = {k: v for k, v in zip(names, values)}
    return metadata

In [4]:
metadata = load_metadata()
blood_IDs = [i for i in metadata]
blood_CELs = [metadata[i]["blood_cel"] for i in blood_IDs]

muscle_IDs = [i for i in blood_IDs if metadata[i]["muscle_cel"] != "refused_biopsy"]
muscle_CELs = [metadata[i]["muscle_cel"] for i in muscle_IDs]

# here we produce a dump of the entire experiment, blood and muscle data.

In [6]:
def dump_experiment(blood_IDs, blood_CELs, target):
    import os
    try:
        os.mkdir(target)
    except FileExistsError:
        pass
    blood_intensity = load_intensity_data([os.path.join("qn", i + ".txt") for i in blood_CELs])
    gene_names = set()
    with open("probesets/annotated_probesets.csv", "r") as f:
        for i, line in enumerate(f):
            if i != 0:
                gene_names.add(line.split()[0])

    for gene_name in gene_names:
        open(os.path.join(target, gene_name), 'w').close()

    headers = {}
    with open("probesets/annotated_probesets.csv", "r") as f:
        for i, line in enumerate(f):
            line = line.rstrip().split()
            if not headers:
                headers_keys = line
                headers = {key: value for value, key in enumerate(headers_keys)}
                continue
            gene_name = line[headers["gene_name"]]
            x = int(line[headers["x"]])
            y = int(line[headers["y"]])
            gene_path = os.path.join(target, gene_name)
            #print(gene_path)
            with open(gene_path, "a") as f_out:
                if os.stat(gene_path).st_size == 0:
                    for header in headers_keys:
                        print(header, end="\t", file=f_out)
                    for patient_id in blood_IDs:
                        print("patient_" + patient_id, end="\t", file=f_out)
                    print(file=f_out)            
                for header in headers_keys:
                    print(line[headers[header]], end="\t", file=f_out)
                for j, patient_id in enumerate(blood_IDs):
                    print(blood_intensity[j][y][x], end="\t", file=f_out)
                print(file=f_out)

In [7]:
dump_experiment(blood_IDs, blood_CELs, "experiment_blood")

In [8]:
dump_experiment(muscle_IDs, muscle_CELs, "experiment_muscle")