# Identify coregulators 


## Section 1 : Setup and experiment options 

In [None]:
# %load_ext autoreload
# %autoreload 2
try:
    import coregtor
except ImportError:
    %pip install coregtor
    import coregtor

# Additional imports
from pathlib import Path
import pandas as pd
import time
print("Required packages loaded")

In [None]:
# Set the base path of the folder where you data is stored and where you want to save the results file 
base_path = Path("docs/temp") # UPDATE THIS

# Links to input files 
input = {
  "data_file_path" : Path(base_path/"brain_ge.gct"), # UPDATE THIS 
  "data_description":" ", # UPDATE THIS
  "tf_file_path": Path(base_path/"human_tf.txt") # UPDATE THIS
}

# Experiment options
options = {
  "target_gene":"GFAP",
  "model_type":"rf",
  "model_options":{
    "n_estimators":1000,
    "max_depth":5,
    "random_state":123
  },
  "create_context_method":"tree_paths",
  "transform_context_method":"gene_frequency",
  "context_similarity_method":"cosine"
}

# Load data 
ge_data = coregtor.utils.exp.read_GE_data(file_path=input["data_file_path"]) # this is just a utility method
tf_data = pd.read_csv(input["tf_file_path"], names=["gene_name"], header=None)
print("Data loaded. Options set")

## Section 2 : Generating similarity matrix of root nodes 

In [None]:
runtimes = {} # to track runtime of different parts in seconds

# first generate the training input for the model
start = time.perf_counter()
X,Y = coregtor.create_model_input(ge_data,target_gene=options["target_gene"],t_factors=tf_data)
runtimes["create_model_input"] = time.perf_counter() - start
print("Input data generated")

# use the training data to create a model
start = time.perf_counter()
model = coregtor.create_model(X,Y, model= options["model_type"], model_options=options["model_options"])
runtimes["create_model"] = time.perf_counter() - start
print("Model ready")

# generate tree paths
start = time.perf_counter()
all_paths = coregtor.tree_paths(model,X,Y)
runtimes["create_tree_paths"] = time.perf_counter() - start
print("Tree paths generated")

# generate context and transform it 
start = time.perf_counter()
path_set = coregtor.create_context(all_paths,method=options["create_context_method"])
runtimes["create_context"] = time.perf_counter() - start

start = time.perf_counter()
gf_histogram = coregtor.transform_context(path_set,method= options["transform_context_method"])
runtimes["transform_context"] = time.perf_counter() - start
print("Context ready")

# compare context
start = time.perf_counter()
sim_matrix = coregtor.compare_context(gf_histogram,options["context_similarity_method"])
runtimes["create_similarity_matrix"] = time.perf_counter() - start
print("Similarity matrix ready")

## Section 3 : Interactive result generation 

In [None]:
f,_,__= coregtor.utils.plot.dendrogram(sim_matrix)

In [None]:
coregtor.utils.plot.cophenetic(sim_matrix,methods=["average"])

In [None]:
options["selected_threshold"]= 0.8 # UPDATE THIS

# finally generate results 
results1,_ = coregtor.identify_coregulators(sim_matrix,target_gene=options["target_gene"],distance_threshold=options["selected_threshold"])
results1

## Section 4 : Save results 

In [None]:

options["notes"] = "  " # UPDATE THIS with something you want to record about this experiment

saved_file = coregtor.utils.exp.save_exp_results(options,input,sim_matrix,results1,runtimes,base_path)