In [71]:
import json

def extract_patient_data(patient_data):
    # Extracting the necessary fields.
    patient_id = patient_data.get("id", None)
    if patient_id is None:
        # Generate an ID if not provided
        patient_id = f"patient_{hash(str(patient_data)) % (10**8)}"  # Simple hash-based ID
    
    positive_phenotypes = patient_data.get("positive_phenotypes", [])
    all_candidate_genes = patient_data.get("all_candidate_genes", [])
    true_genes = patient_data.get("true_genes", [])
    true_diseases = patient_data.get("true_diseases", [])
    
    # Printing out the information
    print(f"Patient ID: {patient_id}")
    print(f"Positive Phenotypes: {positive_phenotypes}")
    print(f"All Candidate Genes: {all_candidate_genes}")
    print(f"True Genes: {true_genes}")
    print(f"True Diseases: {true_diseases}")
    print("\n")  # For better readability
    return patient_id, positive_phenotypes, all_candidate_genes, true_genes, true_diseases

In [72]:
test_sample = {
    "true_genes": ["ENSG00000146085"],
    "age": "Onset_Infant",
    "positive_phenotypes": ["HP:0000124", "HP:0001249", "HP:0001254", "HP:0001259", "HP:0001252", "HP:0001510", "HP:0002017", "HP:0004374", "HP:0410042", "HP:0001903", "HP:0100806", "HP:0001279", "HP:0000712", "HP:0100739", "HP:0031009", "HP:0001363", "HP:0001360"],
    "negative_phenotypes": {
        "HP:0000083": ["init_phenotypes"],
        "HP:0001019": ["phenotype_distractor.9"],
        "HP:0002716": ["phenotype_distractor.9"],
        "HP:0002240": ["phenotype_distractor.9"],
        "HP:0000643": ["noisy_phenotype"],
        "HP:0001369": ["noisy_phenotype"],
        "HP:0008226": ["noisy_phenotype"]
    },
    "n_distractor_genes": 11,
    "distractor_genes": ["ENSG00000143727", "ENSG00000014216", "ENSG00000158352", "ENSG00000105664", "ENSG00000197915", "ENSG00000157766", "ENSG00000188994", "ENSG00000095464", "ENSG00000132155", "ENSG00000152457", "ENSG00000168056"],
    "dropout_phenotypes": {
        "positive_phenotypes": ["HP:0001733", "HP:0001873", "HP:0002098"],
        "negative_phenotypes": ["HP:0000648", "HP:0001263", "HP:0001266", "HP:0001332", "HP:0001875", "HP:0001903", "HP:0001987", "HP:0002072", "HP:0100806"]
    },
    "corruption_phenotypes": {"positive_phenotypes": ["HP:0002240"]},
    "id": 2,
    "in_gene_gene_kg": True,
    "category": "known_gene_disease",
    "broad_category": "known_gene_disease",
    "all_candidate_genes": ["ENSG00000152457", "ENSG00000146085", "ENSG00000157766", "ENSG00000014216", "ENSG00000158352", "ENSG00000143727", "ENSG00000095464", "ENSG00000168056", "ENSG00000132155", "ENSG00000197915", "ENSG00000188994", "ENSG00000105664"],
    "additional_labels": {
        "n_hops_g_p": [[2, 2, 2, 3, 2, 3, 3, 2, 2, 3, 3, 2, 3, 2, 2, 2, 2]],
        "n_hops_p_p": [[0, 2, 2, 3, 2, 3, 3, 2, 2, 2, 4, 2, 3, 4, 2, 2, 2],
                      [2, 0, 2, 3, 2, 2, 2, 2, 2, 2, 3, 2, 3, 2, 2, 2, 2],
                      [2, 2, 0, 3, 2, 3, 3, 2, 2, 4, 3, 2, 3, 2, 2, 2, 2],
                      [3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 4, 3, 4, 3, 3, 3, 3],
                      [2, 2, 2, 3, 0, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2],
                      [3, 2, 3, 3, 2, 0, 3, 2, 3, 3, 3, 2, 3, 2, 2, 2, 2],
                      [3, 2, 3, 3, 2, 3, 0, 3, 2, 2, 4, 4, 3, 4, 2, 3, 2],
                      [2, 2, 2, 3, 2, 2, 3, 0, 2, 2, 4, 2, 3, 2, 2, 2, 2],
                      [2, 2, 2, 3, 2, 3, 2, 2, 0, 2, 4, 2, 3, 3, 2, 2, 2],
                      [2, 2, 4, 3, 2, 3, 2, 2, 2, 0, 4, 2, 3, 3, 3, 2, 2],
                      [4, 3, 3, 4, 2, 3, 4, 4, 4, 4, 0, 4, 4, 4, 3, 4, 2],
                      [2, 2, 2, 3, 2, 2, 4, 2, 2, 2, 4, 0, 3, 3, 2, 2, 2],
                      [3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 4, 3, 0, 3, 3, 3, 3],
                      [4, 2, 2, 3, 2, 2, 4, 2, 3, 3, 4, 3, 3, 0, 2, 2, 2],
                      [2, 2, 2, 3, 2, 2, 2, 2, 2, 3, 3, 2, 3, 2, 0, 2, 2],
                      [2, 2, 2, 3, 2, 2, 3, 2, 2, 2, 4, 2, 3, 2, 2, 0, 2],
                      [2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 0]],
        "frac_p_with_direct_edge_to_g": [0.0],
        "direct_g_p_edge": [[False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]],
        "direct_g_d_edge": [[True]],
        "n_hops_g_d": [[1]],
        "frac_p_with_direct_edge_to_dx": [0.5882352941176471],
        "max_percent_phen_overlap_train": 0.35294117647058826,
        "max_phen_overlap_train": 6,
        "n_hops_cand_g_p": [-0.3333333333333336, -1.0, 0.08333333333333348, -1.1102230246251565e-16, 1.0, 0.08333333333333348, 0.8333333333333333, 0.08333333333333348, -0.75, 0.33333333333333326, 0.16666666666666652, 0.41666666666666696]
    },
    "true_diseases": ["289916"]
}



In [73]:
patient_id, positive_phenotypes, all_candidate_genes, true_genes, true_diseases  = extract_patient_data(test_sample)

Patient ID: 2
Positive Phenotypes: ['HP:0000124', 'HP:0001249', 'HP:0001254', 'HP:0001259', 'HP:0001252', 'HP:0001510', 'HP:0002017', 'HP:0004374', 'HP:0410042', 'HP:0001903', 'HP:0100806', 'HP:0001279', 'HP:0000712', 'HP:0100739', 'HP:0031009', 'HP:0001363', 'HP:0001360']
All Candidate Genes: ['ENSG00000152457', 'ENSG00000146085', 'ENSG00000157766', 'ENSG00000014216', 'ENSG00000158352', 'ENSG00000143727', 'ENSG00000095464', 'ENSG00000168056', 'ENSG00000132155', 'ENSG00000197915', 'ENSG00000188994', 'ENSG00000105664']
True Genes: ['ENSG00000146085']
True Diseases: ['289916']




In [74]:
import pickle
import pandas as pd

def load_pickle_file(filepath):
    with open(filepath, 'rb') as f:
        return pickle.load(f)

def load_kg_node_map(filepath):
    # Load the KG node map file into a DataFrame
    return pd.read_csv(filepath, sep='\t')

def map_phenotypes_to_kg_nodes(phenotypes, hpo_to_name_dict, hpo_to_idx_dict, kg_node_map):
    # Mapping HPO codes to their names and indices
    phenotype_name_dict = {hpo: hpo_to_name_dict.get(hpo, "Unknown HPO Code") for hpo in phenotypes}
    phenotype_idx_dict = {hpo: hpo_to_idx_dict.get(hpo, "Unknown HPO Code") for hpo in phenotypes}

    # Mapping to the KG node map
    kg_mapping = {}
    for hpo, idx in phenotype_idx_dict.items():
        # Find the row in KG node map with the same node_idx as the phenotype index
        matching_row = kg_node_map[kg_node_map['node_idx'] == idx]
        if not matching_row.empty:
            kg_mapping[hpo] = {
                'name': phenotype_name_dict[hpo],
                'node_idx': idx,
                'node_id': matching_row['node_id'].values[0],
                'node_type': matching_row['node_type'].values[0]
            }
        else:
            kg_mapping[hpo] = {
                'name': phenotype_name_dict[hpo],
                'node_idx': idx,
                'node_id': "Not in KG",
                'node_type': "Not in KG"
            }

    # Printing out the mappings
    print("Phenotype KG Mappings:")
    for hpo, mapping in kg_mapping.items():
        print(f"HPO Code: {hpo} | Name: {mapping['name']} | Node Index: {mapping['node_idx']} | Node ID: {mapping['node_id']} | Node Type: {mapping['node_type']}")

    return kg_mapping

# Load the necessary files
hpo_to_name_dict = load_pickle_file('hpo_to_name_dict_8.9.21_kg.pkl')
hpo_to_idx_dict = load_pickle_file('hpo_to_idx_dict_8.9.21_kg.pkl')
kg_node_map = load_kg_node_map('KG_node_map.txt')

# Get the name and index mappings for the phenotypes along with KG node info
kg_mapping = map_phenotypes_to_kg_nodes(positive_phenotypes, hpo_to_name_dict, hpo_to_idx_dict, kg_node_map)

Phenotype KG Mappings:
HPO Code: HP:0000124 | Name: Renal tubular dysfunction | Node Index: 14744 | Node ID: 124 | Node Type: effect/phenotype
HPO Code: HP:0001249 | Name: Intellectual disability | Node Index: 15274 | Node ID: 1249 | Node Type: effect/phenotype
HPO Code: HP:0001254 | Name: Lethargy | Node Index: 14013 | Node ID: 1254 | Node Type: effect/phenotype
HPO Code: HP:0001259 | Name: Coma | Node Index: 14120 | Node ID: 1259 | Node Type: effect/phenotype
HPO Code: HP:0001252 | Name: Hypotonia | Node Index: 14015 | Node ID: 1252 | Node Type: effect/phenotype
HPO Code: HP:0001510 | Name: Growth delay | Node Index: 14341 | Node ID: 1510 | Node Type: effect/phenotype
HPO Code: HP:0002017 | Name: Nausea and vomiting | Node Index: 15567 | Node ID: 2017 | Node Type: effect/phenotype
HPO Code: HP:0004374 | Name: Hemiplegia/hemiparesis | Node Index: 15279 | Node ID: 4374 | Node Type: effect/phenotype
HPO Code: HP:0410042 | Name: Abnormal liver morphology | Node Index: 15322 | Node ID: 41

In [75]:
import pickle
import pandas as pd

def load_pickle_file(filepath):
    with open(filepath, 'rb') as f:
        return pickle.load(f)

def load_kg_node_map(filepath):
    # Load the KG node map file into a DataFrame
    return pd.read_csv(filepath, sep='\t')

def map_genes_to_kg_nodes(genes, ensembl_to_idx_dict, kg_node_map, gene_type):
    # Mapping Ensembl IDs to their indices
    gene_idx_dict = {gene: ensembl_to_idx_dict.get(gene, "Unknown Ensembl ID") for gene in genes}

    # Mapping to the KG node Map
    kg_mapping = {}
    for gene, idx in gene_idx_dict.items():
        # Find the row in KG node map with the same node_idx as the gene index
        matching_row = kg_node_map[kg_node_map['node_idx'] == idx]
        if not matching_row.empty:
            kg_mapping[gene] = {
                'node_idx': idx,
                'node_id': matching_row['node_id'].values[0],
                'node_type': matching_row['node_type'].values[0],
                'node_name': matching_row['node_name'].values[0]
            }
        else:
            kg_mapping[gene] = {
                'node_idx': idx,
                'node_id': "Not in KG",
                'node_type': "Not in KG",
                'node_name': "Not in KG"
            }

    # Printing out the mappings
    print(f"\n{gene_type} KG Mappings:")
    for gene, mapping in kg_mapping.items():
        print(f"Gene Ensembl ID: {gene} | Node Index: {mapping['node_idx']} | Node ID: {mapping['node_id']} | Node Type: {mapping['node_type']} | Node Name: {mapping['node_name']}")

    return kg_mapping

# Load the necessary files
ensembl_to_idx_dict = load_pickle_file('ensembl_to_idx_dict_8.9.21_kg.pkl')
kg_node_map = load_kg_node_map('KG_node_map_test.txt')

# Get the index mappings for all_candidate_genes along with KG node info
all_candidate_genes_mapping = map_genes_to_kg_nodes(all_candidate_genes, ensembl_to_idx_dict, kg_node_map, "All Candidate Genes")

# Get the index mappings for true_genes along with KG node info
true_genes_mapping = map_genes_to_kg_nodes(true_genes, ensembl_to_idx_dict, kg_node_map, "True Genes")


All Candidate Genes KG Mappings:
Gene Ensembl ID: ENSG00000152457 | Node Index: 12980 | Node ID: 64421 | Node Type: gene/protein | Node Name: DCLRE1C
Gene Ensembl ID: ENSG00000146085 | Node Index: 12554 | Node ID: 4594 | Node Type: gene/protein | Node Name: MMUT
Gene Ensembl ID: ENSG00000157766 | Node Index: 5784 | Node ID: 176 | Node Type: gene/protein | Node Name: ACAN
Gene Ensembl ID: ENSG00000014216 | Node Index: 2491 | Node ID: 823 | Node Type: gene/protein | Node Name: CAPN1
Gene Ensembl ID: ENSG00000158352 | Node Index: 54520 | Node ID: 57477 | Node Type: gene/protein | Node Name: SHROOM4
Gene Ensembl ID: ENSG00000143727 | Node Index: 5925 | Node ID: 52 | Node Type: gene/protein | Node Name: ACP1
Gene Ensembl ID: ENSG00000095464 | Node Index: 12868 | Node ID: 5146 | Node Type: gene/protein | Node Name: PDE6C
Gene Ensembl ID: ENSG00000168056 | Node Index: 10649 | Node ID: 4054 | Node Type: gene/protein | Node Name: LTBP3
Gene Ensembl ID: ENSG00000132155 | Node Index: 988 | Node 

In [76]:
import pickle
import pandas as pd

def load_pickle_file(filepath):
    with open(filepath, 'rb') as f:
        return pickle.load(f)

def load_kg_node_map(filepath):
    # Load the KG node map file into a DataFrame
    return pd.read_csv(filepath, sep='\t')

def map_diseases_to_kg_nodes(diseases, orphanet_to_mondo_dict, mondo_to_idx_dict, mondo_to_name_dict, kg_node_map):
    for disease in diseases:
        # Convert disease string to an integer
        try:
            disease_int = int(disease)
        except ValueError:
            print(f"Invalid disease ID format: {disease}")
            continue

        # Find the MONDO ID using the orphanet_to_mondo_dict
        mondo_id = orphanet_to_mondo_dict.get(disease_int, None)
        if not mondo_id:
            print(f"MONDO ID not found for disease ID: {disease}")
            continue

        # MONDO ID is already in the correct format, so no need to convert to string
        mondo_id_str = mondo_id[0]  # assuming mondo_id is a list like ['17360']

        # Map MONDO ID to index and name using the dictionaries
        idx = mondo_to_idx_dict.get(mondo_id_str, "Unknown MONDO ID")
        name = mondo_to_name_dict.get(mondo_id_str, "Unknown MONDO Name")

        # Find the corresponding entry in the KG node map using idx
        matching_row = kg_node_map[kg_node_map['node_idx'] == idx]
        if not matching_row.empty:
            node_id = matching_row['node_id'].values[0]
            node_type = matching_row['node_type'].values[0]
            node_name = matching_row['node_name'].values[0]
            print(f"true_disease_mondo: {disease} | Node ID: {node_id} | Node Type: {node_type} | Node Name: {node_name}")
        else:
            print(f"true_disease_mondo: {disease} | No matching node found in KG for MONDO ID (Index: {idx})")

# Load the necessary files
orphanet_to_mondo_dict = load_pickle_file('../../preprocess/orphanet/orphanet_to_mondo_dict.pkl')
mondo_to_idx_dict = load_pickle_file('mondo_to_idx_dict_8.9.21_kg.pkl')
mondo_to_name_dict = load_pickle_file('mondo_to_name_dict_8.9.21_kg.pkl')
kg_node_map = load_kg_node_map('KG_node_map_test.txt')

#Get the index mappings for true_diseases along with KG node info
map_diseases_to_kg_nodes(true_diseases, orphanet_to_mondo_dict, mondo_to_idx_dict, mondo_to_name_dict, kg_node_map)

true_disease_mondo: 289916 | Node ID: 17360 | Node Type: disease | Node Name: vitamin B12-unresponsive methylmalonic acidemia type mut0


In [83]:
import json
import pickle
import pandas as pd

# Function to load pickle files
def load_pickle_file(filepath):
    with open(filepath, 'rb') as f:
        return pickle.load(f)

# Function to load KG node map file into a DataFrame
def load_kg_node_map(filepath):
    return pd.read_csv(filepath, sep='\t')

# Function to extract and display patient data
def extract_patient_data(patient_data):
    patient_id = patient_data.get("id", None)
    if patient_id is None:
        patient_id = f"patient_{hash(str(patient_data)) % (10**8)}"  # Generate an ID if not provided

    positive_phenotypes = patient_data.get("positive_phenotypes", [])
    all_candidate_genes = patient_data.get("all_candidate_genes", [])
    true_genes = patient_data.get("true_genes", [])
    true_diseases = patient_data.get("true_diseases", [])

    print(f"Patient ID: {patient_id}")
    print(f"Positive Phenotypes: {positive_phenotypes}")
    print(f"All Candidate Genes: {all_candidate_genes}")
    print(f"True Genes: {true_genes}")
    print(f"True Diseases: {true_diseases}")
    print("\n")  # For better readability
    return patient_id, positive_phenotypes, all_candidate_genes, true_genes, true_diseases

# Function to map phenotypes to KG nodes
def map_phenotypes_to_kg_nodes(phenotypes, hpo_to_name_dict, hpo_to_idx_dict, kg_node_map):
    phenotype_name_dict = {hpo: hpo_to_name_dict.get(hpo, "Unknown HPO Code") for hpo in phenotypes}
    phenotype_idx_dict = {hpo: hpo_to_idx_dict.get(hpo, "Unknown HPO Code") for hpo in phenotypes}

    kg_mapping = {}
    for hpo, idx in phenotype_idx_dict.items():
        matching_row = kg_node_map[kg_node_map['node_idx'] == idx]
        if not matching_row.empty:
            kg_mapping[hpo] = {
                'name': phenotype_name_dict[hpo],
                'node_idx': idx,
                'node_id': matching_row['node_id'].values[0],
                'node_type': matching_row['node_type'].values[0]
            }
        else:
            kg_mapping[hpo] = {
                'name': phenotype_name_dict[hpo],
                'node_idx': idx,
                'node_id': "Not in KG",
                'node_type': "Not in KG"
            }

    print("\n Phenotype KG Mappings:")
    for hpo, mapping in kg_mapping.items():
        print(f"HPO Code: {hpo} | Name: {mapping['name']} | Node Index: {mapping['node_idx']} | Node ID: {mapping['node_id']} | Node Type: {mapping['node_type']}")
    return kg_mapping

# Function to map genes to KG nodes
def map_genes_to_kg_nodes(genes, ensembl_to_idx_dict, kg_node_map, gene_type):
    gene_idx_dict = {gene: ensembl_to_idx_dict.get(gene, "Unknown Ensembl ID") for gene in genes}

    kg_mapping = {}
    for gene, idx in gene_idx_dict.items():
        matching_row = kg_node_map[kg_node_map['node_idx'] == idx]
        if not matching_row.empty:
            kg_mapping[gene] = {
                'node_idx': idx,
                'node_id': matching_row['node_id'].values[0],
                'node_type': matching_row['node_type'].values[0],
                'node_name': matching_row['node_name'].values[0]
            }
        else:
            kg_mapping[gene] = {
                'node_idx': idx,
                'node_id': "Not in KG",
                'node_type': "Not in KG",
                'node_name': "Not in KG"
            }

    print(f"\n{gene_type} KG Mappings:")
    for gene, mapping in kg_mapping.items():
        print(f"Gene Ensembl ID: {gene} | Node Index: {mapping['node_idx']} | Node ID: {mapping['node_id']} | Node Type: {mapping['node_type']} | Node Name: {mapping['node_name']}")
    return kg_mapping

# Function to map diseases to KG nodes
def map_diseases_to_kg_nodes(diseases, orphanet_to_mondo_dict, mondo_to_idx_dict, mondo_to_name_dict, kg_node_map):
    for disease in diseases:
        try:
            disease_int = int(disease)
        except ValueError:
            print(f"Invalid disease ID format: {disease}")
            continue

        mondo_id = orphanet_to_mondo_dict.get(disease_int, None)
        if not mondo_id:
            print(f"MONDO ID not found for disease ID: {disease}")
            continue

        mondo_id_str = mondo_id[0]

        idx = mondo_to_idx_dict.get(mondo_id_str, "Unknown MONDO ID")
        name = mondo_to_name_dict.get(mondo_id_str, "Unknown MONDO Name")

        matching_row = kg_node_map[kg_node_map['node_idx'] == idx]
        print("\nTrue Disease Mappings:")
        if not matching_row.empty:
            node_id = matching_row['node_id'].values[0]
            node_idx = matching_row['node_idx'].values[0]
            node_type = matching_row['node_type'].values[0]
            node_name = matching_row['node_name'].values[0]
            print(f"true_disease_mondo: {disease} | Node Index: {node_idx} | Node ID: {node_id} | Node Type: {node_type} | Node Name: {node_name}")
        else:
            print(f"true_disease_mondo: {disease} | No matching node found in KG for MONDO ID (Index: {idx})")

# Load all necessary files
hpo_to_name_dict = load_pickle_file('hpo_to_name_dict_8.9.21_kg.pkl')
hpo_to_idx_dict = load_pickle_file('hpo_to_idx_dict_8.9.21_kg.pkl')
ensembl_to_idx_dict = load_pickle_file('ensembl_to_idx_dict_8.9.21_kg.pkl')
orphanet_to_mondo_dict = load_pickle_file('../../preprocess/orphanet/orphanet_to_mondo_dict.pkl')
mondo_to_idx_dict = load_pickle_file('mondo_to_idx_dict_8.9.21_kg.pkl')
mondo_to_name_dict = load_pickle_file('mondo_to_name_dict_8.9.21_kg.pkl')
kg_node_map = load_kg_node_map('KG_node_map_test.txt')

# Process the patient data
patient_id, positive_phenotypes, all_candidate_genes, true_genes, true_diseases = extract_patient_data(test_sample)

# Map phenotypes to KG nodes
phenotype_kg_mapping = map_phenotypes_to_kg_nodes(positive_phenotypes, hpo_to_name_dict, hpo_to_idx_dict, kg_node_map)

# Map all candidate genes to KG nodes
all_candidate_genes_mapping = map_genes_to_kg_nodes(all_candidate_genes, ensembl_to_idx_dict, kg_node_map, "All Candidate Genes")

# Map true genes to KG nodes
true_genes_mapping = map_genes_to_kg_nodes(true_genes, ensembl_to_idx_dict, kg_node_map, "True Genes")

# Map true diseases to KG nodes
map_diseases_to_kg_nodes(true_diseases, orphanet_to_mondo_dict, mondo_to_idx_dict, mondo_to_name_dict, kg_node_map)

Patient ID: 2
Positive Phenotypes: ['HP:0000124', 'HP:0001249', 'HP:0001254', 'HP:0001259', 'HP:0001252', 'HP:0001510', 'HP:0002017', 'HP:0004374', 'HP:0410042', 'HP:0001903', 'HP:0100806', 'HP:0001279', 'HP:0000712', 'HP:0100739', 'HP:0031009', 'HP:0001363', 'HP:0001360']
All Candidate Genes: ['ENSG00000152457', 'ENSG00000146085', 'ENSG00000157766', 'ENSG00000014216', 'ENSG00000158352', 'ENSG00000143727', 'ENSG00000095464', 'ENSG00000168056', 'ENSG00000132155', 'ENSG00000197915', 'ENSG00000188994', 'ENSG00000105664']
True Genes: ['ENSG00000146085']
True Diseases: ['289916']



 Phenotype KG Mappings:
HPO Code: HP:0000124 | Name: Renal tubular dysfunction | Node Index: 14744 | Node ID: 124 | Node Type: effect/phenotype
HPO Code: HP:0001249 | Name: Intellectual disability | Node Index: 15274 | Node ID: 1249 | Node Type: effect/phenotype
HPO Code: HP:0001254 | Name: Lethargy | Node Index: 14013 | Node ID: 1254 | Node Type: effect/phenotype
HPO Code: HP:0001259 | Name: Coma | Node Index: 

In [4]:
# Use a pipeline as a high-level helper
from transformers import pipeline
import torch

messages = [
    {"role": "user", "content": "Who are you?"},
]

# device cuda

pipe = pipeline("text-generation", model="meta-llama/Meta-Llama-3.1-8B-Instruct", token = access_token, device=0, max_new_tokens=256)
# pipe = pipeline("text-generation", 
#                 model="meta-llama/Meta-Llama-3.1-8B-Instruct",
#                 token = access_token, 
#                 device=0,
#                 model_kwargs={"torch_dtype": torch.bfloat16},
#                 max_new_tokens=256)
pipe(messages)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[{'generated_text': [{'role': 'user', 'content': 'Who are you?'},
   {'role': 'assistant',
    'content': 'I\'m an artificial intelligence model known as Llama. Llama stands for "Large Language Model Meta AI."'}]}]

In [1]:
# check the devices
import torch
print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.device(0))

True
0
<torch.cuda.device object at 0x7fceec52c190>
