In [None]:
import pathlib, os, torch, pickle, time
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ['HF_HOME'] = str(pathlib.Path("~/scratch-llm/storage/cache/huggingface/").expanduser().absolute()) # '/scratch-llm/storage/cache/'
# os.environ["TRANSFORMERS_CACHE"] = "~/scratch-llm/storage/models/"

import numpy as np
import pandas as pd

from transformers import AutoTokenizer
from nebulagraph_lite import nebulagraph_let as ng_let
from llama_index.graph_stores.nebula import NebulaPropertyGraphStore

from llama_index.core import Settings
from llama_index.core.schema import TextNode
from llama_index.core.prompts import PromptTemplate
from llama_index.core.base.llms.types import ChatMessage
from llama_index.core.response_synthesizers import TreeSummarize
from llama_index.core.vector_stores.simple import SimpleVectorStoreData, SimpleVectorStore, VectorStoreQuery

from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

from typing import List
from numpy import dot
from numpy.linalg import norm
from pydantic import BaseModel, Field
from llama_index.core.output_parsers import PydanticOutputParser
from IPython.display import display, Markdown

# NebulaGraph conexion

In [2]:
# load NebulaGraph JupyterNotebook extension
# !udocker pull vesoft/nebula-metad:v3
# !udocker create --name=nebula-metad vesoft/nebula-metad:v3
# !udocker setup --execmode=F1 nebula-metad
# time.sleep(5)  # wait for the container to be ready
# !udocker pull vesoft/nebula-storaged:v3
# !udocker create --name=nebula-storaged vesoft/nebula-storaged:v3
# !udocker setup --execmode=F1 nebula-storaged
# time.sleep(5)  # wait for the container to be ready
# !udocker pull vesoft/nebula-graphd:v3
# !udocker create --name=nebula-graphd vesoft/nebula-graphd:v3
# !udocker setup --execmode=F1 nebula-graphd
# time.sleep(5)  # wait for the container to be ready

n = ng_let(in_container=True)
n.start() # Takes around 5 mins

[1;3;38;2;47;75;124mudocker command: udocker /home/lasa14/.conda/envs/llm-rag/bin/udocker --allow-root ps | grep graphd | awk '{print $1}' | xargs -I {} udocker --allow-root rm -f {}[0m
[1;3;38;2;102;81;145merror: Error: deleting directory:  /home/lasa14/.udocker/containers/2141d09a-b208-3cd6-8a16-8c2749a11286
Error: deleting container
[0m
Retrying in 5 seconds... udocker command failed with return code 123
[1;3;38;2;47;75;124mudocker command: udocker /home/lasa14/.conda/envs/llm-rag/bin/udocker --allow-root ps | grep graphd | awk '{print $1}' | xargs -I {} udocker --allow-root rm -f {}[0m
[1;3;38;2;102;81;145merror: Error: deleting directory:  /home/lasa14/.udocker/containers/2141d09a-b208-3cd6-8a16-8c2749a11286
Error: deleting container
[0m
Retrying in 15 seconds... udocker command failed with return code 123
[1;3;38;2;47;75;124mudocker command: udocker /home/lasa14/.conda/envs/llm-rag/bin/udocker --allow-root ps | grep graphd | awk '{print $1}' | xargs -I {} udocker --allow

In [None]:
%reload_ext ngql
%ngql --address 127.0.0.1 --port 9669 --user root --password nebula

## SimpleVectorStore:

In [None]:
# Load the actual data into all_nodes_embeddded
with open(os.path.expanduser('~/scratch-llm/storage/nodes/all_nodes_all-mpnet-base-v2.pkl'), 'rb') as f:
    all_nodes_embedded: List[TextNode] = pickle.load(f)
# Create dictionaries from the nodes
embedding_dict = {node.id_: node.get_embedding() for node in all_nodes_embedded}
text_id_to_ref_doc_id = {node.id_: node.ref_doc_id or "None" for node in all_nodes_embedded}
metadata_dict = {node.id_: node.metadata for node in all_nodes_embedded}

# Initialize the SimpleVectorStore with the dictionaries
vector_store = SimpleVectorStore(
    data = SimpleVectorStoreData(
        embedding_dict=embedding_dict,
        text_id_to_ref_doc_id=text_id_to_ref_doc_id,
        metadata_dict=metadata_dict,
    ),
    stores_text=True
)

## NebulaPropertyGraphStore:

In [4]:
graph_store = NebulaPropertyGraphStore(
    space = "PrimeKG",
    username = "root",
    password = "nebula",
    url = "nebula://localhost:9669",
    props_schema= """`node_index` STRING, `node_type` STRING, `node_id` STRING, `node_name` STRING, 
        `node_source` STRING, `mondo_id` STRING, `mondo_name` STRING, `group_id_bert` STRING, 
        `group_name_bert` STRING, `orphanet_prevalence` STRING, `display_relation` STRING """,
)

# LLM

## Llama-3.2-3B-Instruct

In [36]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct", padding_side="left", device_map="auto")    
if tokenizer.pad_token_id is None: #no <pad> token previously defined, only eos_token
    tokenizer.pad_token = "<|end_of_text|>"
    tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)


llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-3.2-3B-Instruct",
    context_window=8192,
    max_new_tokens=3048,
    generate_kwargs={
        "temperature": 0.10, 
        "do_sample": True,
        "pad_token_id": tokenizer.pad_token_id,
        "top_k": 10, 
        "top_p": 0.9,
        # "repetition_penalty": 0.9,  # Added to reduce repetition
        # "no_repeat_ngram_size": 3,  # Prevents repetition of n-grams
    },
    model_kwargs={
        "torch_dtype": torch.float16,
    },
    tokenizer=tokenizer,
    # device_map="auto",  # Automatically offload layers to CPU if GPU memory is insufficient
    device_map="cuda" if torch.cuda.is_available() else "cpu",
    stopping_ids=[tokenizer.eos_token_id],
    tokenizer_kwargs={"max_length": None},
    is_chat_model=True,
)

Settings.llm = llm
Settings.chunk_size = 1024
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-m3") #"sentence-transformers/all-mpnet-base-v2") # BAAI/bge-small-en-v1.5 /  m3 / sentence-transformers/all-mpnet-base-v2

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/54.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/687 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/444 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

# DiseaseMode

MetadataFilters:

In [20]:
from llama_index.core.vector_stores.types import MetadataFilters, FilterOperator
disease_dict = {
    "key": "node_type",
    "value": "disease",
    "operator": FilterOperator.EQ
}
disease_filter = MetadataFilters(filters=[disease_dict])

class DiseaseMode():
    def __init__(self, vector_store: SimpleVectorStore, graph_store: NebulaPropertyGraphStore):
        self.vector_store = vector_store
        self.graph_store = graph_store

    def retrieve(self, query: str):        
        query_embedding = Settings.embed_model.get_text_embedding(query)
        vector_results = self.vector_store.query(
            VectorStoreQuery(
                query_embedding=query_embedding, 
                similarity_top_k=1,
                filters=disease_filter,
            )
        )
        
        top_node_id = vector_results.ids[0]
        top_node_score = vector_results.similarities[0]
        kg_node = self.graph_store.get(ids=[top_node_id])[0]
                
        results = [{ # Create results list with primary node
            "node_index": kg_node.id_,
            "node_name": kg_node.properties["node_name"],
            "text": getattr(kg_node, "text", ""),
            "score": top_node_score
        }]
        
        print(f"Best node from vector query: Node ID: {kg_node.id_}, "
              f"Score: {top_node_score:.4f}, Name: {kg_node.properties['node_name']}")
        
        # Find related nodes through graph query
        graph_nodes = self.graph_store.structured_query(
            """
            MATCH (e:Node__) WHERE id(e) == $ids
            MATCH p=(e)-[r:Relation__{label:"disease-disease"}]-(t) 
            UNWIND relationships(p) as rel
            RETURN DISTINCT id(t), t.Props__.node_name, t.Chunk__.text
            """, 
            param_map={"ids": top_node_id}
        )
        
        # Calculate similarity for related nodes and add relevant ones to results
        all_similarities = []
        for node in graph_nodes:
            node_text = node["t.Props__.node_name"] + ": " + node["t.Chunk__.text"]
            node_embedding = Settings.embed_model.get_text_embedding(node_text)
                    
            similarity = dot(query_embedding, node_embedding) / (norm(query_embedding) * norm(node_embedding))
            all_similarities.append((node, similarity))
            
        if len(all_similarities) > 3:
            sim = [s for _, s in all_similarities]
            threshold = np.percentile(sim, 75) # keep top 25% of nodes
        else:
            threshold = 0.7
        
        for node, similarity in all_similarities:
            if similarity > threshold: #and similarity >= 0.5:
                results.append({
                    "node_index": node["id(t)"],
                    "node_name": node["t.Props__.node_name"],
                    "text": node_text,
                    "score": similarity
                })
        
        results = sorted(results, key=lambda x: x["score"], reverse=True)
        print("\nBest related nodes from graph query:")
        for node in results:  # Skip primary node
            print(f"ID: {node['node_index']} | node name: {node['node_name']} | score: {node['score']:.4f}")
        
        graph_phenotype = graph_store.structured_query(
            """
            MATCH (e:Node__) WHERE id(e) == $ids
            MATCH (e)-[r:Relation__{label:"disease-phenotype-positive"}]-(t) 
            RETURN DISTINCT id(t), t.Props__.node_name
            """, 
            param_map={"ids": top_node_id}
        )
        # join the phenotype names without the " '' " characters
        phenotypes = ", ".join(node["t.Props__.node_name"].replace("'", "") for node in graph_phenotype)

        nodes_with_text = [node for node in results if node['text'].strip()]
        context = [f"'{node['node_name']}': {node['text']}" for node in nodes_with_text] if nodes_with_text else None
        phenotype_context = [f"Is associated with the following phenotypes: {phenotypes}\n"] if phenotypes else None
        
        if results:
            return (context, phenotype_context, top_node_id, results[0]['node_name']) if context else (phenotype_context, top_node_id, results[0]['node_name'])
        else:
            return [f"No graph relationships found for {results[0]['node_name']}"] if results else ["No results found"]


## Prompt templates:

In [37]:
context_phenotype_template = """    
    Context information is below:
    {text_chunks}

    Phenotype context is below:
    {phenotype_context}

    You are a medical knowledge assistant specializing in rare diseases. Your task is to create a comprehensive list of symptoms for {query_str}.

    CRITICAL INSTRUCTIONS:
    1. Use the information from the context and your own knowledge to provide a comprehensive answer
    2. Return MAXIMUM the 16 most relevant symptoms, if there are more than 16 symptoms, return the most relevant ones
    3. Use HPO medical terminology and avoid using including redundant symptoms
    4. Return your answer as a non-numbered list of symptoms only, each symptom should be a string using HPO terminology
"""

no_rag_template = """
    You are a medical knowledge assistant specializing in rare diseases. Your task is to create a comprehensive list of symptoms for {query_str}.

    CRITICAL INSTRUCTIONS:
    1. Use only your own knowledge to provide a comprehensive answer
    2. Return MAXIMUM the 16 most relevant symptoms, if there are more than 16 symptoms, return the most relevant ones
    3. Use only HPO medical terminology and avoid including redundant symptoms
    4. Return your answer as a non-numbered list of symptoms only, each symptom should be a string using HPO terminology
"""


## Chat

timeout in case call takes too long:

In [28]:
import signal
def safe_llm_call(summarizer, *args, timeout=300, **kwargs):
        """Safe LLM call with timeout handling"""
        # PASS TO THE NEXT ITEM IF LLM ENTERS AN INFINITE LOOP
        class TimeoutException(Exception):
            pass

        def handler(signum, frame):
            raise TimeoutException()
        
        signal.signal(signal.SIGALRM, handler)
        signal.alarm(timeout)
        
        try:
            response = summarizer.get_response(*args, **kwargs)
            signal.alarm(0)
            return response
        except TimeoutException:
            print("LLM call timed out. Skipping this item.")
            return None
        except RecursionError:
            print("RecursionError: LLM summarizer entered an infinite loop. Skipping this item.")
            return None
        finally:
            signal.alarm(0)

### Top node retrievals

In [58]:
user = ["stormorken-sjaastad-langslet syndrome",
	"krt1-related diffuse nonepidermolytic keratoderma",
	"autosomal dominant charcot-marie-tooth disease type 2e",
	"glycogen storage disease due to glucose-6-phosphatase deficiency type ib",
	"x-linked spinocerebellar ataxia type 4",
	"autosomal dominant polycystic kidney disease",
    "primary dystonia, dyt2 type",
	"ank3-related intellectual disability-sleep disturbance syndrome",
	"14q22q23 microdeletion syndrome",
	"3-methylglutaconic aciduria type 9"
]

models = ["BAAI/bge-m3", "sentence-transformers/all-mpnet-base-v2"]

# pd dataframe to store user query and node name of the best node
results = pd.DataFrame(index=user, columns=models)

for model in models:
    Settings.embed_model = HuggingFaceEmbedding(model_name=model)
    
    if model == "BAAI/bge-m3":
        # Load the actual data into all_nodes_embeddded
        with open(os.path.expanduser('~/scratch-llm/storage/nodes/all_nodes_bge-m3.pkl'), 'rb') as f:
            all_nodes_embedded: List[TextNode] = pickle.load(f)
        # Create dictionaries from the nodes
        embedding_dict = {node.id_: node.get_embedding() for node in all_nodes_embedded}
        text_id_to_ref_doc_id = {node.id_: node.ref_doc_id or "None" for node in all_nodes_embedded}
        metadata_dict = {node.id_: node.metadata for node in all_nodes_embedded}

        # Initialize the SimpleVectorStore with the dictionaries
        vector_store = SimpleVectorStore(
            data = SimpleVectorStoreData(
                embedding_dict=embedding_dict,
                text_id_to_ref_doc_id=text_id_to_ref_doc_id,
                metadata_dict=metadata_dict,
            ),
            stores_text=True
        )
        
    elif model == "sentence-transformers/all-mpnet-base-v2":
        # Load the actual data into all_nodes_embeddded
        with open(os.path.expanduser('~/scratch-llm/storage/nodes/all_nodes_all-mpnet-base-v2.pkl'), 'rb') as f:
            all_nodes_embedded: List[TextNode] = pickle.load(f)
        # Create dictionaries from the nodes
        embedding_dict = {node.id_: node.get_embedding() for node in all_nodes_embedded}
        text_id_to_ref_doc_id = {node.id_: node.ref_doc_id or "None" for node in all_nodes_embedded}
        metadata_dict = {node.id_: node.metadata for node in all_nodes_embedded}

        # Initialize the SimpleVectorStore with the dictionaries
        vector_store = SimpleVectorStore(
            data = SimpleVectorStoreData(
                embedding_dict=embedding_dict,
                text_id_to_ref_doc_id=text_id_to_ref_doc_id,
                metadata_dict=metadata_dict,
            ),
            stores_text=True
        )

    for i, u in enumerate(user):
        # print(f"\n=== RAG {i+1} === \nUser query: {u}\n")
        result = DiseaseMode(vector_store, graph_store).retrieve(u)
        if len(result) == 4:
            context, phenotypes, top_node_id, top_node_name = result
        elif len(result) == 3:
            phenotypes, top_node_id, top_node_name = result
            context = None
        else:
            raise ValueError("Unexpected number of return values from retrieve()")

        results.loc[u, model] = top_node_name
results

Best node from vector query: Node ID: 31514, Score: 0.6652, Name: Stormorken syndrome

Best related nodes from graph query:
ID: 31514 | node name: Stormorken syndrome | score: 0.6652
Best node from vector query: Node ID: 30500, Score: 0.6523, Name: palmoplantar keratoderma, nonepidermolytic, focal 1

Best related nodes from graph query:
ID: 30500 | node name: palmoplantar keratoderma, nonepidermolytic, focal 1 | score: 0.6523
Best node from vector query: Node ID: 99891, Score: 0.7347, Name: MME-related autosomal dominant Charcot Marie Tooth disease type 2

Best related nodes from graph query:
ID: 99891 | node name: MME-related autosomal dominant Charcot Marie Tooth disease type 2 | score: 0.7347
Best node from vector query: Node ID: 33023, Score: 0.7384, Name: glycogen storage disease due to lactate dehydrogenase deficiency

Best related nodes from graph query:
ID: 33023 | node name: glycogen storage disease due to lactate dehydrogenase deficiency | score: 0.7384
ID: 27292 | node name:

Unnamed: 0,BAAI/bge-m3,sentence-transformers/all-mpnet-base-v2
stormorken-sjaastad-langslet syndrome,Stormorken syndrome,Stormorken syndrome
krt1-related diffuse nonepidermolytic keratoderma,"palmoplantar keratoderma, nonepidermolytic, fo...","palmoplantar keratoderma i, striate, focal, or..."
autosomal dominant charcot-marie-tooth disease type 2e,MME-related autosomal dominant Charcot Marie T...,MME-related autosomal dominant Charcot Marie T...
glycogen storage disease due to glucose-6-phosphatase deficiency type ib,glycogen storage disease due to lactate dehydr...,glycogen storage disease due to glucose-6-phos...
x-linked spinocerebellar ataxia type 4,CDG syndrome type 4,X-linked spinocerebellar ataxia
autosomal dominant polycystic kidney disease,"deafness, autosomal dominant",polycystic kidney disease
"primary dystonia, dyt2 type",torsion dystonia,dystonia
ank3-related intellectual disability-sleep disturbance syndrome,intellectual disability-hypotonia-spasticity-s...,intellectual disability-hypotonia-spasticity-s...
14q22q23 microdeletion syndrome,maternal 14q32.2 microdeletion syndrome,17q21.31 microdeletion syndrome
3-methylglutaconic aciduria type 9,3-methylglutaconic aciduria,3-methylglutaconic aciduria


In [None]:
results.to_latex(os.path.expanduser('~/scratch-llm/results/disease_mode/disease_mode_results.tex'))

### Example RAG/no-RAG response

In [None]:
user = "Developmental and epileptic encephalopathy 111"

print(f"\n=== RAG === \nUser query: {user}\n")
prompt_template = PromptTemplate(context_phenotype_template) 
summarizer = TreeSummarize(verbose=True, llm=llm, summary_template=prompt_template)        
response = safe_llm_call(summarizer,
        query_str=user,
        text_chunks="\n".join(context) if context else "",
        phenotype_context=phenotypes,
        timeout=1000  
)   
display(Markdown(response))

print(f"\n\n === no RAG === \nUser query: {user}\n")
# include a prompt template for the LLM chat 
template = PromptTemplate(no_rag_template)
prompt = template.format(query_str=user)
response = llm.chat([ChatMessage(role="user", content=prompt)])
response_text = response.message.content if hasattr(response, "message") else str(response)
display(Markdown(response_text))


=== RAG === 
User query: Developmental and epileptic encephalopathy 111

19 text chunks after repacking
1 text chunks after repacking


Developmental and epileptic encephalopathy 111 is a rare genetic disorder characterized by severe developmental delay, intellectual disability, and seizures. The following are the 16 most relevant symptoms:

* Hypotonia
* Muscle weakness
* Visual impairment
* Reduced visual acuity
* Dementia
* Neurodevelopmental delay
* Neurological speech impairment
* Growth delay
* Hypothyroidism
* Intellectual disability
* Hypertonia
* Lower limb spasticity
* Spasticity
* Seizure
* Cerebral atrophy
* Delayed speech and language development



 === no RAG === 
User query: Developmental and epileptic encephalopathy 111



Here is a list of 16 symptoms for Developmental and epileptic encephalopathy 111:

* Abnormality of the cerebellum
* Abnormality of the cerebral cortex
* Abnormality of the cerebral white matter
* Abnormality of the dentate nucleus
* Abnormality of the globus pallidus
* Abnormality of the hippocampus
* Abnormality of the motor cortex
* Abnormality of the neocortex
* Abnormality of the substantia nigra
* Abnormality of the thalamus
* Behavioral abnormality
* Cerebral atrophy
* Developmental delay
* Epilepsy
* Hypotonia
* Seizure

#### Checks with NebulaGraph 

In [63]:
%ngql CREATE SPACE IF NOT EXISTS PrimeKG(vid_type=FIXED_STRING(256))
%ngql USE PrimeKG;

In [None]:
%%ngql
MATCH (e:Node__)
WHERE id(e) == "38849"
RETURN DISTINCT 
        e.Props__.node_name, 
        e.Chunk__.text

Unnamed: 0,e.Props__.node_name,e.Chunk__.text
0,Wieacker-Wolff syndrome (spectrum),


In [66]:
%%ngql
# find all disease nodes where e.Chunk__.text is empty
MATCH (e:Node__)
MATCH (e)-[r1:Relation__{label:"disease-phenotype-positive"}]-(t1) # {label:"disease-phenotype-positive"}
WHERE e.Props__.node_type == "disease" AND e.Chunk__.text == ""
RETURN DISTINCT 
    e.Props__.node_name,
    e.Props__.node_type,
    e.Chunk__.text,
    t1.Props__.node_name,
    t1.Props__.node_type

Unnamed: 0,e.Props__.node_name,e.Props__.node_type,e.Chunk__.text,t1.Props__.node_name,t1.Props__.node_type
0,familial focal alopecia,disease,,Autosomal dominant inheritance,effect/phenotype
1,familial focal alopecia,disease,,Patchy alopecia,effect/phenotype
2,myelolymphatic insufficiency,disease,,Hyposegmentation of neutrophil nuclei,effect/phenotype
3,myelolymphatic insufficiency,disease,,X-linked recessive inheritance,effect/phenotype
4,"cerebellar, ocular, craniofacial, and genital syndrome",disease,,Low-set ears,effect/phenotype
...,...,...,...,...,...
12449,"Wieacker-Wolff syndrome, female-restricted",disease,,Inability to walk,effect/phenotype
12450,"Wieacker-Wolff syndrome, female-restricted",disease,,Distal muscle weakness,effect/phenotype
12451,"Wieacker-Wolff syndrome, female-restricted",disease,,Kyphosis,effect/phenotype
12452,"Wieacker-Wolff syndrome, female-restricted",disease,,Flexion contracture,effect/phenotype


In [69]:
n.stop()

[1;3;38;2;47;75;124mudocker command: udocker /home/lasa14/.conda/envs/llm-rag/bin/udocker --allow-root ps | grep graphd | awk '{print $1}' | xargs -I {} udocker --allow-root rm -f {}[0m
[1;3;38;2;102;81;145merror: Error: deleting directory:  /home/lasa14/.udocker/containers/d9a1b12a-62b5-3764-80e3-dd482130cc1d
Error: deleting container
[0m
Retrying in 5 seconds... udocker command failed with return code 123
[1;3;38;2;47;75;124mudocker command: udocker /home/lasa14/.conda/envs/llm-rag/bin/udocker --allow-root ps | grep graphd | awk '{print $1}' | xargs -I {} udocker --allow-root rm -f {}[0m
[1;3;38;2;102;81;145merror: Error: deleting directory:  /home/lasa14/.udocker/containers/d9a1b12a-62b5-3764-80e3-dd482130cc1d
Error: deleting container
[0m
Retrying in 15 seconds... udocker command failed with return code 123
[1;3;38;2;47;75;124mudocker command: udocker /home/lasa14/.conda/envs/llm-rag/bin/udocker --allow-root ps | grep graphd | awk '{print $1}' | xargs -I {} udocker --allow