# NetFlow3D Step-by-Step Testing Notebook

This notebook walks through the steps of the NetFlow3D pipeline, using the example data provided in the `example` directory.

In [None]:
import os
import sys
import pandas as pd
import json
import networkx as nx
from collections import defaultdict

# Add the project root to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), os.pardir)))

import NetFlow3D as nf3d
import funcs

## 1. Define File Paths

In [None]:
job_name = "test"
output_path = os.path.join(os.getcwd(), "example/output/", job_name) + "/"
if not os.path.exists(output_path):
    os.makedirs(output_path)

# Input files
input_maf = os.path.abspath(os.path.join(os.getcwd(), "example/input/mutations.maf"))
expr_input_file = os.path.abspath(os.path.join(os.getcwd(), "example/input/expressed_genes.txt"))

# Metadata files
id_mapping_file = os.path.abspath(os.path.join(os.getcwd(), "metadata/HUMAN_9606_idmapping.dat.gz"))
expr_whitelist_file = os.path.abspath(os.path.join(os.getcwd(), "metadata/expr_white_list.txt"))
background_mutability_file = os.path.abspath(os.path.join(os.getcwd(), "metadata/background.txt"))
prolen_file = os.path.abspath(os.path.join(os.getcwd(), "metadata/uniprot2prolen.json"))
PDB_intra_resource = os.path.abspath(os.path.join(os.getcwd(), "graph/PDB_intra/"))
PDB_inter_resource = os.path.abspath(os.path.join(os.getcwd(), "graph/PDB_inter/"))
AF2_intra_resource = os.path.abspath(os.path.join(os.getcwd(), "graph/AF2_pLDDT0/"))
PIONEER_inter_resource = os.path.abspath(os.path.join(os.getcwd(), "metadata/HomoSapiens_interfaces_PIONEER_veryhigh.txt"))
binary_interactome = os.path.abspath(os.path.join(os.getcwd(), "metadata/HomoSapiens_binary_HINThq.txt"))
canonical_isoform_file = os.path.abspath(os.path.join(os.getcwd(), "metadata/UP000005640_9606.fasta"))

## 2. Calculate Background Mutation Rates

In [None]:
mutrate_file = os.path.join(output_path, "mutrate.txt")
nf3d.get_bmr_uniprot(background_mutability_file, id_mapping_file, mutrate_file)

# Display the first few lines of the output file
mutrate_df = pd.read_csv(mutrate_file, sep='\t')
mutrate_df.head()

## 3. Process Expression Data

In [None]:
expr_uniprots_file = os.path.join(output_path, "Expr_uniprots.txt")
nf3d.get_expr_uniprot(id_mapping_file, expr_whitelist_file, expr_input_file, expr_uniprots_file)

# Display the first few lines of the output file
expr_uniprots_df = pd.read_csv(expr_uniprots_file, sep='\t', header=None)
expr_uniprots_df.head()

## 4. Preprocess Mutation Data

In [None]:
preprocessed_maf_file = os.path.join(output_path, "Preprocessed.maf")
nf3d.mutation_preprocessing(input_maf, id_mapping_file, preprocessed_maf_file, expr_uniprots_file)

# Display the first few lines of the output file
preprocessed_maf_df = pd.read_csv(preprocessed_maf_file, sep='\t')
preprocessed_maf_df.head()

## 5. Loss-of-Function (LoF) Analysis

In [None]:
lof_results_file = os.path.join(output_path, "All_intra_LoF_pvalue.txt")
nf3d.lof_analysis(preprocessed_maf_file, mutrate_file, prolen_file, lof_results_file)

# Display the first few lines of the output file
lof_results_df = pd.read_csv(lof_results_file, sep='\t')
lof_results_df.head()

## 6. In-frame Mutation Analysis

In [None]:
final_output_intra_res, final_output_intra_uniprot, final_output_intra_pdb, final_output_inter_pdb, final_output_intra_af2, final_output_inter_pioneer = nf3d.inframe_analysis(
    preprocessed_maf_file, mutrate_file, prolen_file, 
    PDB_intra_resource, PDB_inter_resource, AF2_intra_resource, PIONEER_inter_resource, 
    binary_interactome, output_path, threads=1
)

# Display the first few lines of one of the output files
inframe_results_df = pd.read_csv(final_output_intra_res, sep='\t')
inframe_results_df.head()

## 7. Generate Result Table

In [None]:
signatures_file = os.path.join(os.getcwd(), "example/output/", f"{job_name}_signatures.txt")
nf3d.generate_result_table(lof_results_file, final_output_intra_pdb, final_output_inter_pdb, final_output_intra_af2, final_output_inter_pioneer, canonical_isoform_file, signatures_file)

# Display the first few lines of the output file
signatures_df = pd.read_csv(signatures_file, sep='\t')
signatures_df.head()

## 8. Network Propagation

In [None]:
subnetworks_file = os.path.join(os.getcwd(), "example/output/", f"{job_name}_subnetworks.txt")
nf3d.network_propagation(binary_interactome, subnetworks_file, output_path, expr_uniprots_file, lof_results_file, final_output_intra_pdb, final_output_inter_pdb, final_output_intra_af2, final_output_inter_pioneer, threads=1)

# Display the first few lines of the output file
subnetworks_df = pd.read_csv(subnetworks_file, sep='\t')
subnetworks_df.head()