In [None]:
import csv
import numpy as np
from matplotlib import pyplot as plt
import sklearn
import clustergenes
import math
from tqdm.auto import tqdm

%matplotlib inline

In [None]:
# Parse gene file

gene_names = []
patient_data = []
with open("./gene-data.csv", "r") as datafile:
    datacsv = csv.reader(datafile)
    header = next(datacsv)
    patient_data = [[] for _ in range(len(header) - 1)]
    npatients = len(patient_data)
    
    for line in datacsv:
        gene_names.append(line[0])
        assert(npatients+1 == len(line))

        for i in range(npatients):
            patient_data[i].append(float(line[i+1]))
patient_data = np.array(patient_data)

In [None]:
# Show concentrations

fig, ax = plt.subplots(figsize=(12, 6))
im = ax.imshow(patient_data.T, aspect="auto", cmap="magma", vmin=-10, vmax=15)
fig.colorbar(im, ax=ax)

plt.show()

fig.subplots_adjust(left=0.05, right=1.07, bottom=0.05, top=0.97)
fig.savefig("./slides/figures/full_genes.eps")


In [None]:
clustering = clustergenes.GeneCluster(patient_data=patient_data)

for interval in clustering.intervals:
    n = interval["nclusters"]
    plen = interval["p_length"]
    print(f"n={n:4}, plen={plen:6.2f}")

In [None]:
# Rearrage patient data

n_clusters = clustering.intervals[0]["nclusters"]
clusters = clustering.get_clusters(n_clusters)

fig, axs = plt.subplots(math.ceil(n_clusters/3), 3, figsize=(16, 8))
axs = axs.flatten()

for i in range(n_clusters):
    cluster_patients = []
    for j in clusters[i]:
        cluster_patients.append(patient_data[j])
    cluster_patients = np.array(cluster_patients)
    axs[i].imshow(cluster_patients.T, aspect="auto", cmap="magma")

fig.subplots_adjust(left=0.03, right=0.99, bottom=0.05, top=0.98)
fig.savefig("./slides/figures/result.eps")

plt.show()