In [2]:
import sys
import os
import pydot
from IPython.display import Image, display
import networkx as nx
import metient.util.plotting_util as putil
import torch
import fnmatch
import pandas as pd

REPO_DIR = os.path.join(os.getcwd(), "../../../")

MACHINA_DATA_DIR = os.path.join(REPO_DIR, 'metient/data/machina_sims')
OUT_DIR = os.path.join(REPO_DIR, "metient/test/output_plots")

def view_pydot(pdot):
    plt = Image(pdot.create_png())
    display(plt)

def get_G(graph_fn):
    (graph,) = pydot.graph_from_dot_file(graph_fn)
    graphs = pydot.graph_from_dot_file(graph_fn)
    graph = graphs[0]  # Assuming there's only one graph in the file
    nx_graph = nx.drawing.nx_pydot.from_pydot(graph)
    G = torch.tensor(nx.to_numpy_array(nx_graph, dtype=int))
    #view_pydot(graph)
    return G

    
sites = ["m8", "m5"]
mig_types = ["M", "mS", "R", "S"]


data = []
for site in sites:

    for mig_type in mig_types:
        og_sims_dir = "/data/morrisq/divyak/projects/machina/data/sims"
        site_mig_data_dir = os.path.join(og_sims_dir, site, mig_type)
        seeds = fnmatch.filter(os.listdir(site_mig_data_dir), 'reads_seed*.tsv')
        seeds = [s.replace(".tsv", "").replace("reads_seed", "") for s in seeds]
        
        for seed in seeds:
            G = get_G(os.path.join(site_mig_data_dir, f"G_seed{seed}.dot"))
            clonality = putil.site_clonality_with_G(G)
            pattern = putil.seeding_pattern_with_G(G)
            
            #full_pattern = clonality.replace("clonal", "")
            if pattern == 'primary single-source':
                full_pattern = "prim_only"
            else:
                full_pattern = "met_to_met"
#             print(full_pattern, clonality, pattern)
            data.append([site, mig_type, seed, full_pattern])

gt_df = pd.DataFrame(data, columns=["site", "mig_type", "seed", "gt_pattern"])
gt_df
        


See https://github.com/networkx/networkx/issues/5723
  nx_graph = nx.drawing.nx_pydot.from_pydot(graph)


Unnamed: 0,site,mig_type,seed,gt_pattern
0,m8,M,19,met_to_met
1,m8,M,35,met_to_met
2,m8,M,172,met_to_met
3,m8,M,76,met_to_met
4,m8,M,216,met_to_met
...,...,...,...,...
75,m5,S,23,prim_only
76,m5,S,25,met_to_met
77,m5,S,62,prim_only
78,m5,S,49,met_to_met


In [3]:
gt_df['gt_pattern'].value_counts()

met_to_met    64
prim_only     16
Name: gt_pattern, dtype: int64

In [4]:
gt_df.to_csv(os.path.join(MACHINA_DATA_DIR,'gt_pattern.csv'), index=False)
