# Generation of linex networks based on detected lipid ions

In [1]:
import os
import ast
import pandas as pd
import numpy as np
import networkx as nx

import linex2metaspace as lx2m

Package 'lynx' (LipidLynxX) not available. Lipid name conversions will not be possible.


In [2]:
from pandarallel import pandarallel
pandarallel.initialize(nb_workers=19, progress_bar=True, use_memory_fs=False)

INFO: Pandarallel will run on 19 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [None]:
# importing custom functions for imputation and evaluation
import sys

sys.path.append("/home/mklein/sc_imputation_denoising")

from sc_imputation_denoising.evaluation.evaluation_workflow import (
    evaluation_workflow,
)


IE version 1.2


In [4]:
# this folder contains all connectivities, plots, graphs etc.
main_path = '/home/mklein/Dropouts/linex'

In [5]:
# global prep
ref_lip_dict = lx2m.get_lx2_ref_lip_dict()
class_reacs = lx2m.get_organism_combined_class_reactions(ref_lip_dict=ref_lip_dict, organism='HSA')

In [6]:
from tqdm import tqdm

def multi_to_single_graph(multi_graph):
    # Create a new empty nx.Graph
    graph = nx.Graph()

    # Add all nodes from the original MultiGraph to the new Graph
    graph.add_nodes_from(multi_graph.nodes())

    # Iterate through all edges in the MultiGraph and add them to the new Graph
    for u, v, data in multi_graph.edges(data=True):
        # Check if an edge already exists between the nodes in the new Graph
        if not graph.has_edge(u, v):
            # If not, add the edge to the new Graph
            data = {k:str(v) for k, v in data.items()}
            graph.add_edge(u, v, **data)

    return graph

In [35]:
dataset = "Lx_Glioblastoma"

In [36]:
datasets = dict(
    Mx_Seahorse=dict(c=0.1, i=0.2, repl=1, cond=4),
    Lx_Pancreatic_Cancer=dict(c=0.2, i=0.05, repl=1, cond=4),
    Lx_Glioblastoma=dict(c=0.05, i=0.05, repl=4, cond=6),
    Lx_HepaRG=dict(c=0.2, i=0.2, repl=5, cond=4),
)

In [37]:
params = datasets[dataset]
max_fdr = 1

In [38]:
ds_config = f"{dataset}_cond{params['cond']}_mnar_c{params['c']}_i{params['i']}_fdr1_repl{params['repl']}"
repl=params['repl']
print(ds_config)

Lx_HepaRG_cond4_mnar_c0.2_i0.2_fdr1_repl5


In [39]:
wflow = evaluation_workflow.get_from_pickle(f'/scratch/klein/temp/evaluation_pickle_{ds_config}_{repl}.pkl')

In [None]:
adata = wflow.dataset

mol_names = 'moleculeNames'
if type(adata.var[mol_names][0]) is str:
    app_list = []
    for i in adata.var.index:
        app_list.append(ast.literal_eval(adata.var.loc[i, mol_names]))
    adata.var[mol_names] = pd.Series(app_list, index=adata.var.index)
assert type(adata.var[mol_names][0]) is list

# TODO should linex networks be constructed using filtered anndata matrices or not?
print(adata.shape)
# sc.pp.filter_genes(adata, min_cells=140)
print(adata.shape)

In [42]:
wflow.dataset.var

Unnamed: 0_level_0,annotation_id,formula,adduct,ON_sample,moleculeName,moleculeClass,fdr,n_cells
annotation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
C10H21NO4+K,C10H21NO4+K,C10H21NO4,+K,True,Miglustat,,1,16421
C12H15N5O3+K,C12H15N5O3+K,C12H15N5O3,+K,True,Entecavir,,1,17419
C13H17NO3+Na,C13H17NO3+Na,C13H17NO3,+Na,True,Lophophorine,,1,16418
C14H17NO4+K,C14H17NO4+K,C14H17NO4,+K,True,3-(4-hydroxy-3-methoxyphenyl)-N-(4-oxobutyl)pr...,,1,22616
C14H19NO5+K,C14H19NO5+K,C14H19NO5,+K,True,Harzianopyridone,,1,27187
...,...,...,...,...,...,...,...,...
C57H110O13P+H,C57H110O13P+H,C57H110O13P,+H,True,PI(48:0),PI,1,6161
C57H110O13P+Na,C57H110O13P+Na,C57H110O13P,+Na,True,PI(48:0),PI,1,7249
C57H111O13P+Na,C57H111O13P+Na,C57H111O13P,+Na,True,phosphatidylinositol 48:0(1-),,1,10589
C59H98O6+H,C59H98O6+H,C59H98O6,+H,True,TG(56:8),TG,1,7111


In [25]:
def create_linex_network(row):
    
    graph_path = os.path.join(row['save_to'] + '.graphml')
    if not os.path.exists(os.path.dirname(graph_path)):
        os.makedirs(os.path.dirname(graph_path))
    adata = row['adata']

    mol_names = 'moleculeNames'
    if type(adata.var[mol_names][0]) is str:
        app_list = []
        for i in adata.var.index:
            app_list.append(ast.literal_eval(adata.var.loc[i, mol_names]))
        adata.var[mol_names] = pd.Series(app_list, index=adata.var.index)
    assert type(adata.var[mol_names][0]) is list

    # LINEX2 preprocessing
    parsed_lipids = lx2m.parse_annotation_series(adata.var[mol_names], 
                                                ref_lip_dict, 
                                                verbose=True) # True if you want to see all lipids that were not parsed

    keep_annotations = lx2m.annotations_parsed_lipids(parsed_lipids)
    parsed_annotations = adata.var.copy()
    parsed_annotations['parsed_lipids'] = parsed_lipids
    parsed_annotations = parsed_annotations.loc[keep_annotations,:] 

    bootstraps = 30

    net = lx2m.bootstrap_networks(
            lx2m.unique_sum_species(parsed_annotations['parsed_lipids']),
            parsed_annotations['parsed_lipids'],
            n=bootstraps,
            lx2_class_reacs=class_reacs,
            lx2_reference_lipids=lx2m.get_lx2_ref_lips(),
            return_composed=True
        ) 

    ion_net = lx2m.ion_weight_graph(net, 
        lx2m.unique_sum_species(parsed_annotations['parsed_lipids']), 
        bootstraps=bootstraps,
        parsed_lipids=parsed_annotations['parsed_lipids'],
        #feature_similarity=feature_sim
    )

    single_ion_net = multi_to_single_graph(ion_net)

    nx.write_graphml(single_ion_net, graph_path)
    
    try:
        _ = nx.read_graphml(graph_path)
    except:
        return np.nan
    
    return graph_path

In [26]:
linex_path = create_linex_network(pd.Series({'save_to': os.path.join(main_path, dataset), 'adata': adata}))

Could not parse: 'C8HF15O2'
Could not parse: 'C8HF15O2'
Could not parse: 'C14H29O9P'
Could not parse: 'C15H29O10P'
Could not parse: 'C16H21N3O8S'
Could not parse: 'C16H31O10P'
Could not parse: 'C16H31O10P'
Could not parse: 'C17H33O10P'
Could not parse: 'C17H34NO8P'
Could not parse: 'C17H34NO8P'
Could not parse: 'C18H22N2O8'
Could not parse: 'C18H22N2O8'
Could not parse: 'C18H34NO10P'
Could not parse: 'C19H24N2O8'
Could not parse: 'C19H24N2O8'
Could not parse: 'C19H24N2O8'
Could not parse: 'C19H26O10'
Could not parse: 'C19H27N3O9S'
Could not parse: 'C19H27NO7S'
Could not parse: 'C19H35NO6'
Could not parse: 'C20H15F3N4O3'
Could not parse: 'C20H15F3N4O3'
Could not parse: 'C21H20O9'
Could not parse: 'C21H20O9'
Could not parse: 'C21H21NO7'
Could not parse: 'C21H24N2O4S'
Could not parse: 'C21H31N5O3'
Could not parse: 'C21H37NO6'
Could not parse: 'C21H39O6P'
Could not parse: 'C21H39O6P'
Could not parse: 'C22H24N2O8'
Could not parse: 'C22H24N2O8'
Could not parse: 'C22H29N3O13S'
Could not parse

  return pd.Series(out_l, index=all_lipids.index)


In [21]:
print(linex_path)

/home/mklein/Dropouts/linex/Lx_Pancreatic_Cancer.graphml
