In [1]:
%load_ext autoreload
%autoreload 2

from outlines import generate, models
from curverag import utils
from curverag.transformations import chunk_text
from curverag.graph import create_graph, KnowledgeGraph
import llama_cpp

In [2]:
max_tokens = 1000
n_ctx=2000

docs = [
    "The patient was diagnosed with type 2 diabetes mellitus and prescribed metformin 500mg twice daily.",
    "MRI scan revealed a small lesion in the left temporal lobe suggestive of low-grade glioma.",
    "Administer 5mg of lorazepam intravenously for acute seizure management.",
    "Blood tests showed elevated ALT and AST levels, indicating possible liver inflammation.",
    "The subject reported chronic lower back pain, managed with physical therapy and NSAIDs.",
    "CT angiography confirmed the presence of a pulmonary embolism in the right lower lobe.",
    "The patient underwent coronary artery bypass graft surgery without complications.",
    "Routine vaccination included MMR, tetanus, and influenza immunizations.",
    "Histopathology indicated ductal carcinoma in situ (DCIS) in the breast biopsy sample.",
    "The child presented with a persistent cough and fever, diagnosed as streptococcal pharyngitis."
]


In [3]:
model = utils.load_model(
    llm_model_path="models/Llama-3-13B-Instruct.i1-IQ3_S.gguf",
    tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("unsloth/Llama-3.2-1B-Instruct"),
    n_ctx=n_ctx,
    max_tokens=max_tokens
)

llama_model_loader: loaded meta data with 44 key-value pairs and 498 tensors from models/Llama-3-13B-Instruct.i1-IQ3_S.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Meta Llama 3 8B Instruct
llama_model_loader: - kv   3:                       general.organization str              = Meta Llama
llama_model_loader: - kv   4:                           general.finetune str              = Instruct
llama_model_loader: - kv   5:                           general.basename str              = Meta-Llama-3
llama_model_loader: - kv   6:                         general.size_label str              = 8B
llama_model_loader: - kv   7:     

In [4]:
graph = create_graph(model, docs, chunk_size=100, max_tokens=max_tokens)



<|begin_of_text|><|start_header_id|>system<|end_header_id|>You are a world class AI model who extracts ndoes and entities from documents for a Knowledge Graph creation task. Put yur reply in JSON<|eot_id|><|start_header_id|>user<|end_header_id|>Here's the json schema you must adhere to:
<schema>
{'$defs': {'Edge': {'properties': {'source': {'description': 'Name of the source edge', 'title': 'Source', 'type': 'string'}, 'target': {'description': 'Name of the target edge', 'title': 'Target', 'type': 'string'}, 'name': {'description': 'Name of the relationship for the edge', 'title': 'Name', 'type': 'string'}, 'is_directed': {'description': 'If true its a directed edge', 'title': 'Is Directed', 'type': 'boolean'}, 'description': {'description': 'Description of the edge', 'title': 'Description', 'type': 'string'}}, 'required': ['source', 'target', 'name', 'is_directed', 'description'], 'title': 'Edge', 'type': 'object'}, 'Node': {'properties': {'id': {'description': 'Unique identifier of t

  0%|                                                                                           | 0/10 [01:31<?, ?it/s]


KeyboardInterrupt: 

In [None]:
generator = generate.json(model, KnowledgeGraph, whitespace_pattern="")
# load graph schema and empty graph
schema = KnowledgeGraph.model_json_schema()


In [None]:
def generate_prompt2(chunk, schema):
    return f"""
    Extract the enitties and edges from the text below and put them into json:
    {chunk}

    """

In [23]:
prompt = generate_prompt2(docs[0], schema)
print(prompt)


    Extract the enitties and edges from the text below and put them into json:
    The patient was diagnosed with type 2 diabetes mellitus and prescribed metformin 500mg twice daily.

    


In [24]:
sub_graph = generator(prompt, max_tokens=1000, temperature=0, seed=42)

llama_perf_context_print:        load time =    7732.40 ms
llama_perf_context_print: prompt eval time =       0.00 ms /    45 tokens (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:        eval time =       0.00 ms /   246 runs   (    0.00 ms per token,      inf tokens per second)
llama_perf_context_print:       total time =   84807.60 ms /   291 tokens


In [25]:
sub_graph

KnowledgeGraph(nodes=[Node(id='1', name='Patient', description='The patient was diagnosed with type 2 diabetes mellitus and prescribed metformin 500mg twice daily.'), Node(id='2', name='Type 2 diabetes mell', description=''), Node(id='3', name='Metformin', description=''), Node(id='4', name='500mg', description=''), Node(id='5', name='Twice daily', description='')], edges=[Edge(source='1', target='2', name='has diagnosis of', is_directed=True, description=''), Edge(source='1', target='3', name='prescribed', is_directed=True, description=''), Edge(source='2', target='3', name='is a type of', is_directed=True, description=''), Edge(source='1', target='4', name='has dose form', is_directed=True, description=''), Edge(source='1', target='5', name='frequency of administration', is_directed=True, description='')])

In [7]:
graph = KnowledgeGraph(nodes=[], edges=[])

In [8]:
graph.is_empty()

True