In [1]:
import nest_asyncio

nest_asyncio.apply()

In [2]:

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.openai import OpenAI

In [3]:
documents = SimpleDirectoryReader(
    "C:\\Users\\MLASSOUED\Documents\\llam2\\data"

).load_data()

In [4]:
node_parser = SentenceSplitter(chunk_size=512)
nodes = node_parser.get_nodes_from_documents(documents)

In [5]:
for idx, node in enumerate(nodes):
    node.id_ = f"node_{idx}"

In [6]:
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, load_index_from_storage, StorageContext
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.llms.llama_cpp.llama_utils import (
    messages_to_prompt,
    completion_to_prompt,
)
llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    #model_url=model_url,
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    model_path="C:\projects\Phi-3-medium-128k-instruct-Q5_K_M.gguf",
    temperature=0,
    max_new_tokens=500,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 0},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)
from huggingface_hub import configure_http_backend
import os
import urllib3
import requests
 
def backend_factory() -> requests.Session:
    session = requests.Session()
    session.verify = False
    return session
 
configure_http_backend(backend_factory=backend_factory)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
index.storage_context.persist(persist_dir='./storage')
index.set_index_id("vector_index")
index.storage_context.persist("./storage")
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

storage_context = StorageContext.from_defaults(persist_dir="storage")
index = load_index_from_storage(storage_context, index_id="vector_index")

llama_model_loader: loaded meta data with 31 key-value pairs and 245 tensors from C:\projects\Phi-3-medium-128k-instruct-Q5_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = phi3
llama_model_loader: - kv   1:                               general.name str              = Phi3
llama_model_loader: - kv   2:                        phi3.context_length u32              = 131072
llama_model_loader: - kv   3:  phi3.rope.scaling.original_context_length u32              = 4096
llama_model_loader: - kv   4:                      phi3.embedding_length u32              = 5120
llama_model_loader: - kv   5:                   phi3.feed_forward_length u32              = 17920
llama_model_loader: - kv   6:                           phi3.block_count u32              = 40
llama_model_loader: - kv   7:                  phi3.attention.

# Retrieval evaluation 

In [15]:
retriever = index.as_retriever(similarity_top_k=2)
retrieved_nodes = retriever.retrieve("les carburants fossiles?")
from llama_index.core.response.notebook_utils import display_source_node

for node in retrieved_nodes:
    display_source_node(node, source_length=1000)

**Node ID:** 6781c747-3e17-4fb7-b693-3e59f2bc65bb<br>**Similarity:** 0.7290516096275045<br>**Text:** Dihydrogène (H2) 120 – 142 (liquéfié) 8,5 – 10,1 (Production et liquéfaction de l'hydrogène) 0,0
Carburants d'origine fossile
Charbon 29,3 – 33,5 39,85 - 74,43(En ne comptant pas : CO, NOx, sulfates et particules) : ~3,59
Pétrole 41,868 28 – 31,4 (En ne comptant pas : CO, NOx, sulfates et particules) : ~3,4
Essence 45 – 48,3 32 – 34,8(En ne comptant pas : CO, NOx, sulfates et particules) : ~3,30
Gazole (Diesel) 48,1 40,3 (En ne comptant pas : CO, NOx, sulfates et particules) : ~3,4
Gaz naturel 38 – 50(liquéfié) 25,5 –
28,7(Éthane, propane et butane N/C : CO, NOx et sulfates) : ~3,00
Éthane (CH3-CH3) 51,9 (liquéfié) ~24,0 2,93
Pouvoir calorifique inférieur (PCI) de composés organiques purs (à29/08/2024 09:45 Carburant — Wikipédia
https://fr.wikipedia.org/wiki/Carburant 4/8<br>

**Node ID:** 1cdd38a0-7d04-4e6c-bec7-19b8bd6162a1<br>**Similarity:** 0.7081434882530891<br>**Text:** 9. Dans de nombreuses régions agricoles de l’OCDE, les niveaux de pollution dépassent les normes de qualité de
l’eau potable  (http://www .oecd.org/document/36/0,3343,fr_2649_34487_40846244_1_1_1_1,00.html) , sur
oecd.org, consulté le 28 décembre 2018
Jean-Claude Guibet  et Emmanuelle Faure , Carburants et
moteurs : technologies, énergie, environnement , Ophrys, 1997 ,
819 p. (ISBN 978-2-7108-0704-9, lire en ligne (https://books.google.fr/books?id=IpUnMfB27i4C&printsec=frontcover))
E10 (carburant)
E85 (carburant)Distributeurs pétroliers
Notes et références
Notes
Références
Annexes
Sur les autres projets Wikimedia :
carburant , sur le Wiktionnaire
Une catégorie  est consacrée à ce
sujet : Carburant .Bibliographie
Articles connexes29/08/2024 09:45 Carburant — Wikipédia
https://fr.wikipedia.org/wiki/Carburant 7/8<br>

In [9]:
from llama_index.core.evaluation import (
    generate_question_context_pairs,
    EmbeddingQAFinetuneDataset,
)

In [10]:

qa_dataset = generate_question_context_pairs(
    nodes, llm=llm, num_questions_per_chunk=2
)

  0%|          | 0/20 [00:00<?, ?it/s]
llama_print_timings:        load time =  169892.16 ms
llama_print_timings:      sample time =       8.98 ms /    52 runs   (    0.17 ms per token,  5791.94 tokens per second)
llama_print_timings: prompt eval time =  247670.98 ms /   686 tokens (  361.04 ms per token,     2.77 tokens per second)
llama_print_timings:        eval time =   72470.40 ms /    51 runs   ( 1420.99 ms per token,     0.70 tokens per second)
llama_print_timings:       total time =  322709.35 ms /   737 tokens
  5%|▌         | 1/20 [05:22<1:42:13, 322.82s/it]Llama.generate: 71 prefix-match hit, remaining 543 prompt tokens to eval

llama_print_timings:        load time =  169892.16 ms
llama_print_timings:      sample time =       8.33 ms /    61 runs   (    0.14 ms per token,  7322.93 tokens per second)
llama_print_timings: prompt eval time =  193768.80 ms /   543 tokens (  356.85 ms per token,     2.80 tokens per second)
llama_print_timings:        eval time =   30787.78 ms / 

In [11]:
queries = qa_dataset.queries.values()
print(list(queries)[2])

What is the title of Virginie Despentes' book that is sometimes taught in gender studies and recommended to millennial women?


In [12]:
# [optional] save
qa_dataset.save_json("pg_eval_dataset.json")


In [13]:
qa_dataset = EmbeddingQAFinetuneDataset.from_json("pg_eval_dataset.json")


In [14]:
import pandas as pd 
eval_pdf= pd.DataFrame(qa_dataset)
eval_pdf

Unnamed: 0,0,1
0,queries,{'07161afe-0cc4-49bb-ae7e-c3702c268733': 'In w...
1,corpus,{'node_0': 'Virginie Despentes Despentes in Ma...
2,relevant_docs,{'07161afe-0cc4-49bb-ae7e-c3702c268733': ['nod...
3,mode,text


In [17]:
from llama_index.core.evaluation import RetrieverEvaluator

metrics = ["hit_rate", "mrr", "precision", "recall", "ap", "ndcg"]

if include_cohere_rerank:
    metrics.append(
        "cohere_rerank_relevancy"  # requires COHERE_API_KEY environment variable to be set
    )

retriever_evaluator = RetrieverEvaluator.from_metric_names(
    metrics, retriever=retriever
)

In [18]:
# try it out on a sample query
sample_id, sample_query = list(qa_dataset.queries.items())[0]
sample_expected = qa_dataset.relevant_docs[sample_id]

eval_result = retriever_evaluator.evaluate(sample_query, sample_expected)
print(eval_result)

Query: In which year was Virginie Despentes born, and in which city in France?
Metrics: {'hit_rate': 0.0, 'mrr': 0.0, 'precision': 0.0, 'recall': 0.0, 'ap': 0.0, 'ndcg': 0.0}



# Embedding Similarity Evaluator

In [10]:
from llama_index.core.evaluation import SemanticSimilarityEvaluator

evaluator = SemanticSimilarityEvaluator(
    embed_model=embed_model,
    
    similarity_threshold=0.6,
)

In [11]:
response = "DMA."
reference = "DMA."

result = await evaluator.aevaluate(
    response=response,
    reference=reference,
)

In [12]:
print("Score: ", result.score)
print("Passing: ", result.passing)

Score:  0.9999999999999998
Passing:  True
