In [1]:
import omicverse as ov


   ____            _     _    __                  
  / __ \____ ___  (_)___| |  / /__  _____________ 
 / / / / __ `__ \/ / ___/ | / / _ \/ ___/ ___/ _ \ 
/ /_/ / / / / / / / /__ | |/ /  __/ /  (__  )  __/ 
\____/_/ /_/ /_/_/\___/ |___/\___/_/  /____/\___/                                              

Version: 1.5.7, Tutorials: https://omicverse.readthedocs.io/


In [2]:
import os
# set path
target_directory = "/Users/suxinwan/Documents/HMPA_resource/detail_interaction"
# change path
os.chdir(target_directory)

In [4]:
import pandas as pd
import os
import omicverse as ov
import networkx as nx
import matplotlib.pyplot as plt

folder_path = "test"
result_path = "test"

os.chdir("~/HMPA_resource/detail_interaction")

# obtain pepnn result
txt_files = [file for file in os.listdir(folder_path) if file.endswith(".txt")]

for txt_file in txt_files:

    df = pd.read_csv(os.path.join(folder_path, txt_file), sep='\t')
    
    # filter prm_score 
    filtered_df = df[df['prm_score'] > 1.04]
    gene_list = filtered_df['prot_id'].tolist()

    # generate G interaction
    G_res_csv_path = os.path.join(result_path, f"G_res_{os.path.splitext(txt_file)[0]}.csv")
    G_res = ov.bulk.string_interaction(gene_list, 9606)

    peptide = os.path.splitext(txt_file)[0]
    new_rows = []
    for gene in pd.concat([G_res['preferredName_A'], G_res['preferredName_B']]).unique():
        new_rows.append({'stringId_A': '', 'stringId_B': '', 'preferredName_A': peptide, 'preferredName_B': gene, 'ncbiTaxonId': 9606, 'score': ""})

    new_rows_df = pd.DataFrame(new_rows)
    G_res_updated = pd.concat([G_res, new_rows_df], ignore_index=True)
    G_res_updated.to_csv(G_res_csv_path, index=False)
    
    # generate G graph
    G = nx.from_pandas_edgelist(G_res_updated, 'preferredName_A', 'preferredName_B', ['score'])

    # set layout
    pos = nx.spring_layout(G)  

    # set node color and size
    node_colors = 'lightblue'
    node_sizes = 50

    nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=node_sizes, alpha=0.8)
    nx.draw_networkx_edges(G, pos, edge_color='gray', alpha=0.5)

    # save
    plt.title(f'Interaction Network for {peptide}', fontsize=12)
    plt.axis('off')  
    #plt.savefig(f'{result_path}/{peptide}_network.png')
    plt.close()  

In [5]:
import pandas as pd
import networkx as nx
import glob
import os

os.chdir("~/HMPA_resource/detail_interaction/test")

files = glob.glob('G_res_hmpa_*.csv')
for file in files:
    
    df = pd.read_csv(file)
    G = nx.Graph()

    # ad  edges and scores
    for idx, row in df.iterrows():
        if pd.notna(row["score"]):
            G.add_edge(row["preferredName_A"], row["preferredName_B"], score=row["score"])
        else:
            G.add_edge(row["preferredName_A"], row["preferredName_B"])

    # calculate weight scores
    results = []
    for node in G:
        if "hmpa_" in node:
            scores = {}
            for target in G:
                if target != node:
                    if nx.has_path(G, node, target):
                        paths = nx.all_simple_paths(G, source=node, target=target, cutoff=3)
                        path_scores = [
                            sum(G[u][v]['score'] for u, v in zip(path[:-1], path[1:])) / len(path[:-1])
                            for path in paths if len(path) > 1
                        ]
                        if path_scores:
                            scores[target] = round(sum(path_scores) / len(path_scores), 3)
            results.append(pd.Series(scores, name=node))

    # save result
    result_df = pd.concat(results, axis=1).T
    for idx, row in df.iterrows():
        if row["preferredName_A"] in result_df.index:
            new_score = result_df.loc[row["preferredName_A"], row["preferredName_B"]]
            df.at[idx, "score"] = new_score

    # update file
    updated_file_path = f"score_{os.path.basename(file)}"
    df.to_csv(updated_file_path, index=False)

print("Score calculation and replacement completed.")

Score calculation and replacement completed.


In [6]:
import pandas as pd
import glob
import os

# set path
directory = "~/HMPA_resource/detail_interaction/test"
t_directory = '~/HMPA_resource/detail_interaction/network_score'

files = glob.glob(os.path.join(directory, 'score_G_res_*.csv'))

# create target directory
for file in files:

    df = pd.read_csv(file)
    df = df[['preferredName_A', 'preferredName_B', 'score']]
    
    # rename
    df.columns = ['source', 'target', 'score']
    new_filename = os.path.basename(file).replace('score_G_res_', 'score_')
    
    # save
    df.to_csv(os.path.join(t_directory, new_filename), index=False)

print("Extraction and saving completed.")

Extraction and saving completed.


In [7]:
import pandas as pd
import os

# set path
directory = "~/HMPA_resource/detail_interaction/network_score"
result_df = pd.DataFrame(columns=['hmpa_id', 'node_name', 'score', 'category_name'])

for filename in os.listdir(directory):
    if filename.endswith(".csv"):

        filepath = os.path.join(directory, filename)
        df = pd.read_csv(filepath, header=1, names=['source', 'target', 'score'], sep=',', dtype={'score': float})
        df_filtered = df[df['source'].str.startswith('hmpa_')]
        
        # rename
        df_filtered = df_filtered[['source', 'target', 'score']]
        df_filtered.columns = ['hmpa_id', 'node_name', 'score']
        
        # select range for visualization
        def categorize(score):
            if score >= 0.9:
                return 'A'
            elif score >= 0.8:
                return 'B'
            elif score >= 0.7:
                return 'C'
            elif score >= 0.6:
                return 'D'
            elif score >= 0.5:
                return 'E'
            else:
                return 'F'
        
        # add category
        df_filtered['category_name'] = df_filtered['score'].apply(categorize)        
        result_df = pd.concat([result_df, df_filtered], ignore_index=True)

# save
result_filepath = "/Users/suxinwan/Documents/HMPA_resource/detail_interaction/hmpa_network_node_category.csv"
result_df.to_csv(result_filepath, index=False)

print("Results saved to:", result_filepath)

Results saved to: /Users/suxinwan/Documents/HMPA_resource/detail_interaction/test/hmpa_network_node_category.csv
