# DIP + UnPaSt analysis

In [1]:
import numpy as np
import pandas as pd

from sklearn.cluster import SpectralCoclustering

## Load data

In [2]:
time_data = pd.read_csv("2_Pelz_timepoint_infos.norm_lib_size_nolog2.tsv", sep="\t")
labels = np.array(["VB3-Saat", "VB3-7", "VB3-8", "VB3-9", "VB3-13", "VB3-14", "VB3-15", "VB3-16", "VB3-17", "VB3-22", "VB3-24", "VB3-25", "VB3-31", "VB3-32", "VB3-33", "VB3-38", "VB3-40", "VB3-41", "VB3-42", "VB3-45", "VB3-46", "VB3-47", "VB3-48"])

In [3]:
unpast = pd.read_csv("DIP_norm_nolog2.consensus_seed=42.bin=kmeans,pval=0.01,clust=WGCNA,direction=DOWN-UP,ds=3,dch=0.995,max_power=10,precluster=True.biclusters.tsv", sep="\t")
unpast_dips = set()
for _, r in unpast.iterrows():
    unpast_dips |= set(r["genes"].split(" "))

## perform spectral coclustering

In [4]:
clustering = SpectralCoclustering(n_clusters=9, random_state=42).fit(time_data[labels])
sp_co_labels = clustering.row_labels_

all_dips = np.array(time_data["DI"].to_list())
features = ["dips", "time"]
for f in features:
    if f == "dips":
        ids = [1, 6, 0, 4, 8, 5, 2, 7, 3]
 #   else:
  #      ids = [7, 8, 0, 4, 2, 5, 6, 1, 3]

    empty = [0 for _ in ids]
    sel_data = dict({"id": ids, "SNP": empty, "n_genes": list(), "n_samples": list(), "genes": list(), "samples": list(), "not_in_unpast": list()})
    all_data = dict({"id": ids, "SNP": empty, "n_genes": list(), "n_samples": list(), "genes": list(), "samples": list()})

    for i in ids:
        timepoints = labels[clustering.columns_[i]]
        dips = all_dips[clustering.rows_[i]]
        
        all_data["n_samples"].append(len(timepoints))
        all_data["samples"].append(" ".join(timepoints))
        all_data["n_genes"].append(len(dips))
        all_data["genes"].append(" ".join(dips))

        sel_dips = [d for d in dips if d in unpast_dips]
        sel_data["n_samples"].append(len(timepoints))
        sel_data["samples"].append(" ".join(timepoints))
        sel_data["not_in_unpast"].append(len(dips)-len(sel_dips))
        sel_data["n_genes"].append(len(sel_dips))
        sel_data["genes"].append(" ".join(sel_dips))


    df = pd.DataFrame(sel_data)
    df.to_csv(f"spec_coclustering_{f}_selected.tsv", sep="\t", index=False)

    full_df = pd.DataFrame(all_data)
    full_df.to_csv(f"spec_coclustering_{f}_all.tsv", sep="\t", index=False)