# How to implement Llama3 (8b,70b)
The code is implementation of the [Llama3 official documentation](https://github.com/meta-llama/llama-recipes/blob/main/recipes/quickstart/Getting_to_know_Llama.ipynb).

# Getting to know Llama 3: Everything you need to start building
Our goal in this session is to provide a guided tour of Llama 3 with comparison with Llama 2, including understanding different Llama 3 models, how and where to access them, Generative AI and Chatbot architectures, prompt engineering, RAG (Retrieval Augmented Generation), Fine-tuning and more. All this is implemented with a starter code for you to take it and use it in your Llama 3 projects.

In [1]:
!pip install matplotlib ipywidgets

Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi
Successfully installed jedi-0.19.1


In [2]:
# presentation layer code

import base64
from IPython.display import Image, display
import matplotlib.pyplot as plt

def mm(graph):
  graphbytes = graph.encode("ascii")
  base64_bytes = base64.b64encode(graphbytes)
  base64_string = base64_bytes.decode("ascii")
  display(Image(url="https://mermaid.ink/img/" + base64_string))

def genai_app_arch():
  mm("""
  flowchart TD
    A[Users] --> B(Applications e.g. mobile, web)
    B --> |Hosted API|C(Platforms e.g. Custom, HuggingFace, Replicate)
    B -- optional --> E(Frameworks e.g. LangChain)
    C-->|User Input|D[Llama 3]
    D-->|Model Output|C
    E --> C
    classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

def rag_arch():
  mm("""
  flowchart TD
    A[User Prompts] --> B(Frameworks e.g. LangChain)
    B <--> |Database, Docs, XLS|C[fa:fa-database External Data]
    B -->|API|D[Llama 3]
    classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

def llama2_family():
  mm("""
  graph LR;
      llama-2 --> llama-2-7b
      llama-2 --> llama-2-13b
      llama-2 --> llama-2-70b
      llama-2-7b --> llama-2-7b-chat
      llama-2-13b --> llama-2-13b-chat
      llama-2-70b --> llama-2-70b-chat
      classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

def llama3_family():
  mm("""
  graph LR;
      llama-3 --> llama-3-8b
      llama-3 --> llama-3-70b
      llama-3-8b --> llama-3-8b
      llama-3-8b --> llama-3-8b-instruct
      llama-3-70b --> llama-3-70b
      llama-3-70b --> llama-3-70b-instruct
      classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

import ipywidgets as widgets
from IPython.display import display, Markdown

# Create a text widget
API_KEY = widgets.Password(
    value='',
    placeholder='',
    description='API_KEY:',
    disabled=False
)

def md(t):
  display(Markdown(t))

def bot_arch():
  mm("""
  graph LR;
  user --> prompt
  prompt --> i_safety
  i_safety --> context
  context --> Llama_3
  Llama_3 --> output
  output --> o_safety
  i_safety --> memory
  o_safety --> memory
  memory --> context
  o_safety --> user
  classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

def fine_tuned_arch():
  mm("""
  graph LR;
      Custom_Dataset --> Pre-trained_Llama
      Pre-trained_Llama --> Fine-tuned_Llama
      Fine-tuned_Llama --> RLHF
      RLHF --> |Loss:Cross-Entropy|Fine-tuned_Llama
      classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

def load_data_faiss_arch():
  mm("""
  graph LR;
      documents --> textsplitter
      textsplitter --> embeddings
      embeddings --> vectorstore
      classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

def mem_context():
  mm("""
      graph LR
      context(text)
      user_prompt --> context
      instruction --> context
      examples --> context
      memory --> context
      context --> tokenizer
      tokenizer --> embeddings
      embeddings --> LLM
      classDef default fill:#CCE6FF,stroke:#84BCF5,textColor:#1C2B33,fontFamily:trebuchet ms;
  """)

In [None]:
llama3_family()

In [3]:
# Define the URL you received in the email
PRESIGNED_URL = "https://download6.llamameta.net/*?Policy=eyJTdGF0ZW1lbnQiOlt7InVuaXF1ZV9oYXNoIjoiM3Qydms2cDV3dGFxZjRmcWxoeHoxMHExIiwiUmVzb3VyY2UiOiJodHRwczpcL1wvZG93bmxvYWQ2LmxsYW1hbWV0YS5uZXRcLyoiLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuIjp7IkFXUzpFcG9jaFRpbWUiOjE3MTQ4MzIwODd9fX1dfQ__&Signature=Je6vZ7w2sCHlVo3~1MI~d0ucxioWhoo6E1IeUD-WeTOUSqPNTUZfX7i8xe0l8aFDN-Zzz3woRug1DfebBRVOUvhoYfjNGR2Tq8MM9sP0R1joB1omhVslviv5aCBstFzaS7iVPhaIRTfMZz5xkyuDJz9oH2k3yQOSYRu2n0bSoMMBEHDLu-EzYw2NYrLsA1tXgwsetWjA5efgbeKiO4g2Vp0Pmxsh85ZzZlIK5eM-Ohf32HvUzgC2Av7-a8fKbKlUCDaNZaCohljbjEqHFz7KF3I36XnV~JYbssVrPzJQHlXWlWAK2xc2j~WsVVfdpYojV~dcCIPGmmXot8v-cLAF1A__&Key-Pair-Id=K15QRJLYKIFSLZ&Download-Request-ID=1113391456552429"

# Write the user input to a file, which will be read by the script
with open("input.txt", "w") as f:
    f.write(PRESIGNED_URL + "\n")  # Write the URL

# Execute the script
!bash /download.sh < input.txt


bash: /download.sh: No such file or directory


# Install dependencies
You'll need to first sign in with your github or gmail account, then get an API token to try Groq out for free. (Groq runs Llama models very fast and they only support one Llama 2 model: the Llama 2 70b chat).

In [None]:
#groq_API_token = "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" use your api token from groq

In [4]:
!pip install groq

Collecting groq
  Downloading groq-0.8.0-py3-none-any.whl (105 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.4/105.4 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from groq)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->groq)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->groq)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, groq
Successfully installed groq-0.8.0 h11-0.14.0 httpcore-1.0.5 ht

In [5]:
import os
from getpass import getpass

In [6]:
# Get the API token from the user
GROQ_API_TOKEN = getpass("Enter your Groq API token: ")

# Set the environment variable
os.environ["GROQ_API_KEY"] = GROQ_API_TOKEN

Enter your Groq API token: ··········


Create Llama 2 and Llama 3 helper functions - for chatbot type of apps, we'll use Llama 3 8b/70b instruct models, not the base models.

In [7]:
# Create helpers for Llama 2 and Llama 3
from groq import Groq

client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

def llama2(prompt, temperature=0.0, input_print=True):
  chat_completion = client.chat.completions.create(
      messages=[
          {
              "role": "user",
              "content": prompt,
          }
      ],
      model="llama2-70b-4096",
      temperature=temperature,
  )

  return (chat_completion.choices[0].message.content)

def llama3_8b(prompt, temperature=0.0, input_print=True):
  chat_completion = client.chat.completions.create(
      messages=[
          {
              "role": "user",
              "content": prompt,
          }
      ],
      model="llama3-8b-8192",
      temperature=temperature,
  )

  return (chat_completion.choices[0].message.content)

def llama3_70b(prompt, temperature=0.0, input_print=True):
  chat_completion = client.chat.completions.create(
      messages=[
          {
              "role": "user",
              "content": prompt,
          }
      ],
      model="llama3-70b-8192",
      temperature=temperature,
  )

  return (chat_completion.choices[0].message.content)


# Basic QA with Llama 2 and 3

In [8]:
prompt = "The typical color of a llama is: "
output = llama3_8b(prompt)
md(output)

The typical color of a llama is white! However, llamas can also come in a variety of other colors, including:

* Suri: a soft, silky coat that can be white, cream, or light brown
* Huacaya: a fluffy, soft coat that can be white, cream, or light brown
* Brown: a reddish-brown color
* Black: a glossy black coat
* Gray: a grayish-brown coat
* Fawn: a reddish-brown coat with a white undercoat
* Cream: a light beige or cream-colored coat

It's worth noting that llamas can also have a variety of patterns and markings on their coats, such as spots, stripes, or patches.

In [None]:
output = llama3_8b("The typical color of a llama is what? Answer in one word.")
md(output)

Brown.

In [None]:
prompt_chat = "What is the average lifespan of a Llama? Answer the question in few words."
output = llama3_8b(prompt_chat)
md(output)

15-20 years.

# Chat conversation:
Single-turn chat
VS
llama 8b and 70b **bold text**

In [None]:
# example without previous context. LLM's are stateless and cannot understand "they" without previous context
prompt_chat = "What animal family are they? Answer the question in few words."
output = llama3_8b(prompt_chat)
md(output)

Canidae (dogs)

**Note: Llama 3 70b doesn't hallucinate.**

In [None]:
output = llama3_70b(prompt_chat)
md(output)

I'm happy to help! However, I don't see a specific animal mentioned in your question. Could you please clarify or provide more context about which animal you're referring to?

# Multi-turn chat
Chat app requires us to send in previous context to LLM to get in valid responses. Below is an example of Multi-turn chat.

In [None]:
# example of multi-turn chat, with storing previous context
prompt_chat = """
User: What is the average lifespan of a Llama?
Assistant: 15-20 years.
User: What animal family are they?
"""

In [None]:
output = llama3_8b(prompt_chat)
md(output)

Llamas belong to the camelid family (Camelidae).

**In-Context Learning (e.g. Zero-shot, Few-shot)**

In-context learning - specific method of prompt engineering where demonstration of task are provided as part of prompt.
Zero-shot learning - model is performing tasks without any input examples.
Few or “N-Shot” Learning - model is performing and behaving based on input examples in user's prompt.

In [None]:
# Zero-shot example. To get positive/negative/neutral sentiment, we need to give examples in the prompt
prompt = '''
Classify: I saw a Gecko.
Sentiment: ?

Give one word response.
'''

output = llama3_8b(prompt)
md(output)

Neutral

In [None]:
# By giving examples to Llama, it understands the expected output format.

prompt = '''
Classify: I love Llamas!
Sentiment: Positive
Classify: I dont like Snakes.
Sentiment: Negative
Classify: I saw a Gecko.
Sentiment:

Give one word response.
'''

output = llama3_8b(prompt)
md(output)


Neutral

# Chain of Thought
"Chain of thought" enables complex reasoning through logical step by step thinking and generates meaningful and contextually relevant responses.

In [None]:
# Standard prompting
prompt = '''
Llama started with 5 tennis balls. It buys 2 more cans of tennis balls. Each can has 3 tennis balls.
How many tennis balls does Llama have?

Answer in one word.
'''

output = llama3_8b(prompt)
md(output)

Seven.

In [None]:
output = llama3_70b(prompt)
md(output)

Eleven.

**Note:** Llama 3-8b did not get the right answer because it was asked to answer in one word.

In [None]:
# By default, Llama 3 models follow "Chain-Of-Thought" prompting
prompt = '''
Llama started with 5 tennis balls. It buys 2 more cans of tennis balls. Each can has 3 tennis balls.
How many tennis balls does Llama have?
'''

output = llama3_8b(prompt)
md(output)

Let's break it down step by step!

Llama started with 5 tennis balls.

Llama buys 2 more cans of tennis balls. Each can has 3 tennis balls, so that's a total of 2 x 3 = 6 new tennis balls.

Adding the new tennis balls to the original 5, Llama now has:
5 (initial tennis balls) + 6 (new tennis balls) = 11 tennis balls.

So, Llama now has 11 tennis balls!

In [None]:
output = llama3_70b(prompt)
md(output)

Llama started with 5 tennis balls. Then it bought 2 cans of tennis balls. Each can has 3 tennis balls. So that is 2 x 3 = 6 tennis balls. 5 + 6 = 11.
#### 11

**Note:** By default, Llama 3 models identify word problems and solves it step by step!

In [None]:
prompt = """
15 of us want to go to a restaurant.
Two of them have cars
Each car can seat 5 people.
Two of us have motorcycles.
Each motorcycle can fit 2 people.
Can we all get to the restaurant by car or motorcycle?
Think step by step.
Provide the answer as a single yes/no answer first.
Then explain each intermediate step.
"""
output = llama3_8b(prompt)
print(output)

**Yes**

Here's the step-by-step breakdown:

1. We have 15 people who want to go to the restaurant.
2. Two people have cars that can seat 5 people each. This means we can accommodate 10 people in cars (2 cars x 5 seats per car).
3. We still have 5 people left who can't fit in the cars. We'll consider the motorcycles now.
4. Two people have motorcycles that can fit 2 people each. This means we can accommodate 4 people in motorcycles (2 motorcycles x 2 seats per motorcycle).
5. We still have 1 person left who can't fit in the cars or motorcycles. Unfortunately, we can't fit all 15 people in cars or motorcycles.
6. However, we can fit 10 people in cars (10 seats available) and 4 people in motorcycles (4 seats available), which adds up to 14 people. We still have 1 person left over.
7. Since we can't fit all 15 people in cars or motorcycles, we can't take everyone to the restaurant. However, we can take 14 people to the restaurant.

So, the answer is **Yes**, we can take 14 people to the r

In [None]:
output = llama3_70b(prompt)
print(output)

**Answer:** NO

Here's the step-by-step explanation:

1. We have 15 people who want to go to the restaurant.
2. We have 2 cars, each of which can seat 5 people. So, the cars can accommodate a total of 2 x 5 = 10 people.
3. This leaves 15 - 10 = 5 people who still need transportation.
4. We have 2 motorcycles, each of which can fit 2 people. So, the motorcycles can accommodate a total of 2 x 2 = 4 people.
5. This still leaves 5 - 4 = 1 person who doesn't have a ride.
6. Since we can't fit all 15 people in the available cars and motorcycles, the answer is NO, we cannot all get to the restaurant by car or motorcycle.


**Note:** Llama 3 70b model works correctly in this example.

Summary: Llama 2 often needs encourgement for step by step thinking to correctly reasoning. Llama 3 understands, reasons and explains better, making chain of thought unnecessary in the cases above.

# Retrieval Augmented Generation (RAG)

Prompt Eng Limitations - Knowledge cutoff & lack of specialized data

Retrieval Augmented Generation(RAG) allows us to retrieve snippets of information from external data sources and augment it to the user's prompt to get tailored responses from Llama 2.
For our demo, we are going to download an external PDF file from a URL and query against the content in the pdf file to get contextually relevant information back with the help of Llama!

In [None]:
rag_arch()

In [9]:
!pip install langchain
!pip install sentence-transformers
!pip install faiss-cpu
!pip install bs4
!pip install langchain-groq

Collecting langchain
  Downloading langchain-0.2.1-py3-none-any.whl (973 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m973.5/973.5 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
Collecting langchain-core<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_core-0.2.1-py3-none-any.whl (308 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m308.5/308.5 kB[0m [31m37.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.0-py3-none-any.whl (23 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.63-py3-none-any.whl (122 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.8/122.8 kB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.0->langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting packaging<24.0,>=23.2 (from langcha

# LangChain Q&A Retriever
Conversational Retrieval Chain

Query the Source documents


In [10]:
!pip install langchain-community langchain-core

Collecting langchain-community
  Downloading langchain_community-0.2.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.6-py3-none-any.whl (28 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.3/49.3 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)
Installing collected packages: mypy-extensio

In [11]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
import bs4

**The following address is the link to AI course at Unipd which we want to extract the information from**
you can change the link to any course or other webpage you want and then ask the relevant questions

In [None]:
# Step 1: Load the document from a web url
loader = WebBaseLoader(["https://en.didattica.unipd.it/off/2024/LM/SC/SC2598/000ZZ/SCQ0093639/N0"])
documents = loader.load()

# Step 2: Split the document into chunks with a specified chunk size
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
all_splits = text_splitter.split_documents(documents)

# Step 3: Store the document into a vector store with a specific embedding model
vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
from langchain_groq import ChatGroq
llm = ChatGroq(temperature=0, model_name="llama3-70b-8192")

from langchain.chains import ConversationalRetrievalChain
chain = ConversationalRetrievalChain.from_llm(llm,
                                              vectorstore.as_retriever(),
                                              return_source_documents=True)

result = chain({"question": "how many credits are allocated to the Artificial Intelligence course?", "chat_history": []})
md(result['answer'])

  warn_deprecated(


According to the provided context, 6.0 ECTS credits are allocated to the Artificial Intelligence course.

In [None]:
llm_8b = ChatGroq(temperature=0, model_name="llama3-8b-8192")

chain_8b = ConversationalRetrievalChain.from_llm(llm_8b,
                                              vectorstore.as_retriever(),
                                              return_source_documents=True)

result_8b = chain_8b({"question": "how many credits are allocated to the Artificial Intelligence course?", "chat_history": []})
md(result_8b['answer'])

According to the provided information, the course "ARTIFICIAL INTELLIGENCE" has 6.0 ECTS credits allocated.

In [None]:
# Query against your own data

chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)

chat_history = []
query = "when the course will start?"
result = chain({"question": query, "chat_history": chat_history})
md(result['answer'])

The course will start on 30/09/2024.

In [None]:
# This time your previous question and answer will be included as a chat history which will enable the ability
# to ask follow up questions.
chat_history = [(query, result["answer"])]
query = "when it will end?"
result = chain({"question": query, "chat_history": chat_history})
md(result['answer'])

The provided context does not explicitly mention the course end date. It only mentions the start date, which is 30/09/2024.

In [None]:
chat_history = [(query, result["answer"])]
query = "how many lecture hours does it have?"
result = chain({"question": query, "chat_history": chat_history})
md(result['answer'])

According to the provided information, the total number of lecture hours is 40.

# Models Evaluation:
**1- RAGAS:**
The folowing evaluation is based on[ DeepEval RAGAS](https://docs.confident-ai.com/docs/metrics-ragas) metrics which evaluate the RAG performance

* RAGAS Answer Relevancy Metric
* RAGAS Faithfulness Metric
* RAGAS Contextual PrecisionMetric
* RAGAS ContextualRecall Metric


In [12]:
# Step 1: Load the document from a web url
loader = WebBaseLoader(["https://www.unipd.it/en/how-apply"])
documents_cs = loader.load()

# Step 2: Split the document into chunks with a specified chunk size
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
all_splits = text_splitter.split_documents(documents_cs)

# Step 3: Store the document into a vector store with a specific embedding model
vectorstore_cs = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))

from langchain_groq import ChatGroq
llm = ChatGroq(temperature=0, model_name="llama3-70b-8192")

from langchain.chains import ConversationalRetrievalChain
chain = ConversationalRetrievalChain.from_llm(llm,
                                              vectorstore_cs.as_retriever(),
                                              return_source_documents=True)

llm_8b = ChatGroq(temperature=0, model_name="llama3-8b-8192")

chain = ConversationalRetrievalChain.from_llm(llm,
                                              vectorstore_cs.as_retriever(),
                                              return_source_documents=True)

chain_8b = ConversationalRetrievalChain.from_llm(llm_8b,
                                              vectorstore_cs.as_retriever(),
                                              return_source_documents=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

**Question 1**

Llama3 70b:

In [13]:
result = chain({"question": "Can I apply for the admission of the university of Padua if I am a non-italian student?", "chat_history": []})
md(result['answer'])


llama_70b_outputs = []
llama_70b_outputs.append(result['answer'])

  warn_deprecated(


Yes, the University of Padua welcomes applications from international students, which includes non-Italian students who hold a non-Italian academic diploma, degree or equivalent.

Llama3 8b:

In [86]:
result_8b = chain_8b({"question": "Can I apply for the admission of the university of Padua if I am a non-italian student?", "chat_history": []})
md(result_8b['answer'])

llama_8b_outputs = []
llama_8b_outputs.append(result_8b['answer'])

According to the provided context, yes, the University of Padua welcomes applications from international students. You are considered an "international student" if you hold a non-Italian academic diploma, degree or equivalent.

**Question 2**

Llama3 70b:

In [14]:
result = chain({"question": "Are the courses provided by the university of Padua taught only in Italian?", "chat_history": []})
md(result['answer'])

llama_70b_outputs.append(result['answer'])

No, the courses provided by the University of Padua are not taught only in Italian. According to the text, there are both ITALIAN-taught degree courses and ENGLISH-taught degree courses.

Llama3 8b:

In [87]:
result_8b = chain_8b({"question": "Are the courses provided the by university of Padua taught only in Italian?", "chat_history": []})
md(result_8b['answer'])

llama_8b_outputs.append(result_8b['answer'])

No, not all courses are taught in Italian. According to the text, there are also ENGLISH-taught degree courses, and you can check the application procedures and course factsheets for more information.

**Question 3**

Llama3 70b:

In [15]:
result = chain({"question": "I missed the deadlines for submitting application for the first semester. Can I enroll and submit the application for the second (winter) semester?", "chat_history": []})
md(result['answer'])

llama_70b_outputs.append(result['answer'])


According to the provided context, the answer is no. It is stated that "Enrolment for the 2nd semester is not available."

Llama3 8b:

In [88]:
result_8b = chain_8b({"question": "I missed the deadlines for submitting application for the first semester. Can I enroll and submit the application for the second (winter) semester?", "chat_history": []})
md(result_8b['answer'])

llama_8b_outputs.append(result_8b['answer'])

According to the provided information, the University of Padua does not offer enrollment for the 2nd semester. It is stated that "Enrolment for the 2nd semester is not available." Therefore, it is not possible to enroll and submit an application for the second semester.

**Question 4**

Llama3 70b:

In [16]:
result = chain({"question": "I want to apply for the bachelor's degree in biology, how to know if I am eligible?", "chat_history": []})
md(result['answer'])

llama_70b_outputs.append(result['answer'])

According to the provided context, there is no specific information about the eligibility criteria for a Bachelor's degree in biology. However, it is mentioned that applicants can consult a country-based list of qualifications to verify whether their entry qualification is suitable to access their degree programme of interest.

I would recommend consulting the country-based list of qualifications to determine if your entry qualification is suitable for the Bachelor's degree in biology.

Llama3 8b:

In [89]:
result_8b = chain_8b({"question": "I want to apply for the bachelor's degree in biology, how to know if I am eligible?", "chat_history": []})
md(result_8b['answer'])

llama_8b_outputs.append(result_8b['answer'])

According to the provided information, to be eligible for admission to a Bachelor's degree, you need to have a Bachelor's degree or equivalent foreign qualification with specific curricular requirements obtained from a higher education institution accredited according to the national educational accreditation system of the country where it is located.

Since you want to apply for a Bachelor's degree in Biology, you can consult a country-based list of qualifications to verify whether your entry qualification is suitable to access the degree programme of interest.

**Question 5**

Llama3 70b:

In [17]:
result = chain({"question": "The duration of diploma that I obtained is 11 years in my country, does that mean that I can not apply for the admission of the bachelor's degree program in biology of the university of Padua?", "chat_history":[] })
md(result['answer'])

llama_70b_outputs.append(result['answer'])

According to the provided context, if you have a 11-year diploma, you can still apply for admission to a Bachelor's degree program, but you need to have attended at least 1 year of academic studies and passed all the scheduled exams to become eligible.

Llama3 8b:

In [90]:
result_8b = chain_8b({"question": "The duration of diploma that I obtained is 11 years in my country, does that mean that I can not apply for the admission of the bachelor's degree program in biology of the university of Padua?", "chat_history": []})
md(result_8b['answer'])

llama_8b_outputs.append(result_8b['answer'])

According to the University of Padua's admission requirements, if you have been awarded a final diploma after a shorter programme of study (i.e. less than 12 years), you need to have attended at least 1 (in case of 11 years of schooling) or 2 (in case of 10 years of schooling) years of academic studies and passed all the scheduled exams in order to become eligible for admission to a Bachelor's degree.

Since your diploma is 11 years, you do not meet the minimum 12-year school record requirement. However, you may still be eligible for admission if you have attended at least 1 year of academic studies and passed all the scheduled exams. I would recommend checking the application procedures and course factsheets for the Bachelor's degree in Biology at the University of Padua to confirm the specific requirements and to see if you meet the additional requirements.

**Question 6**

Llama3 70b:

In [18]:
result = chain({"question": "How can I make sure if my previous university which is not Italian is qualified in order to apply for the master's degree programs?", "chat_history": []})
md(result['answer'])

llama_70b_outputs.append(result['answer'])

According to the provided context, you can consult a country-based list of qualifications to verify whether your entry qualification is suitable to access the Master's degree programme of interest. This list will help you determine if your previous university and qualification meet the requirements for admission to the Master's programme.

LLama3 8b:

In [91]:
result_8b = chain_8b({"question": "How can I make sure if my previous university which is not Italian is qualified in order to apply for the master's degree programs?", "chat_history": []})
md(result_8b['answer'])

llama_8b_outputs.append(result_8b['answer'])

According to the provided context, you can consult a country-based list of qualifications to verify whether your entry qualification is suitable to access your desired degree program.

**Question 7**

Llama3 70b:

In [19]:
result = chain({"question": "Can I apply for just single course units not the whole degree programs of the University of Padua?", "chat_history": []})
md(result['answer'])

llama_70b_outputs.append(result['answer'])

The provided context does not explicitly mention the possibility of applying for single course units, only whole degree programs. Therefore, I don't know the answer to this question.

llama3 8b:

In [92]:
result_8b = chain_8b({"question": "Can I apply for just single course units not the whole degree programs of the University of Padua?", "chat_history": []})
md(result_8b['answer'])

llama_8b_outputs.append(result_8b['answer'])

According to the provided information, it seems that the University of Padua does not offer the option to apply for single course units. The application procedures mentioned are for degree programs, not individual courses.

**Question 8**

Llama3 70b:

In [20]:
result = chain({"question": "My Bachelor's degree was a double degree program obtained from two different universities, one from Indonesia and one from Singapore, how does this affect my application for admission for the master's degree programs?", "chat_history": []})
md(result['answer'])

llama_70b_outputs.append(result['answer'])

According to the provided context, since your Bachelor's degree was issued by two universities from different countries (Indonesia and Singapore), each university must be accredited according to the national educational accreditation system of the country where it is located. As long as both universities meet this requirement, your double degree should be eligible for admission to a Master's degree program.

Llama3 8b:

In [93]:
result_8b = chain_8b({"question": "My Bachelor's degree was a double degree program obtained from two different universities, one from Indonesia and one from Singapore, how does this affect my application for admission for the master's degree programs?", "chat_history": []})
md(result_8b['answer'])

llama_8b_outputs.append(result_8b['answer'])

According to the provided context, since your Bachelor's degree is a double degree program obtained from two different universities, one from Indonesia and one from Singapore, each institution must be accredited according to the national educational accreditation system of the country where it is located.

In this case, the University in Indonesia must be accredited according to the national educational accreditation system of Indonesia, and the University in Singapore must be accredited according to the national educational accreditation system of Singapore.

As long as both universities are accredited according to their respective national educational accreditation systems, your Bachelor's degree should be considered valid for admission to a Master's degree program.

**Question 9**

Llama3 70b:

In [21]:
result = chain({"question": "what are the eligibilities to apply for the admission to a master's degree program?", "chat_history": []})
md(result['answer'])

llama_70b_outputs.append(result['answer'])

According to the provided context, to be eligible for admission to a Master's degree, you need:

1. A Bachelor's degree or equivalent foreign qualification
2. The degree should have specific curricular requirements
3. The degree should be obtained from a higher education institution accredited according to the national educational accreditation system of the country where it is located.

Llama3 8b:

In [94]:
result_8b = chain_8b({"question": "what are the eligibilities to apply for the admission to a master's degree program?", "chat_history": []})
md(result_8b['answer'])

llama_8b_outputs.append(result_8b['answer'])

According to the provided context, to be eligible for admission to a Master's degree, you need a Bachelor's degree or equivalent foreign qualification with specific curricular requirements obtained from a higher education institution accredited according to the national educational accreditation system of the country where it is located.

**Question 10**

Llama3 70b:

In [22]:

result = chain({"question": "Can you give the contact info so I can get more detailed academic information?", "chat_history": []})
md(result['answer'])

llama_70b_outputs.append(result['answer'])

According to the provided context, the contact information for the University of Padua is:

* Address: Via 8 Febbraio, 2 - 35122 Padova
* Switchboard: +39 049 827 5111
* Call Centre: +39 049 827 3131
* Certified mail: amministrazione.centrale@pec.unipd.it
* Email: urp@unipd.it

You can reach out to them to request more detailed academic information.

Llama3 8b:

In [95]:
result_8b = chain_8b({"question": "Can you give the contact info so I can get more detailed academic information?", "chat_history": []})
md(result_8b['answer'])

llama_8b_outputs.append(result_8b['answer'])

According to the provided context, you can contact the University of Padua through the following means:

* Switchboard: +39 049 827 5111
* Call Centre: +39 049 827 3131
* Certified mail: amministrazione.centrale@pec.unipd.it
* Email: urp@unipd.it

You can reach out to them to get more detailed academic information.

In [96]:
print(len(llama_8b_outputs))

10


In [23]:
expected_answers = ["yes, you can. The University of Padua welcomes applications from international students. You are considered an “international student” if you hold a non-Italian academic diploma, degree or equivalent.",
                    "No. There are two type of courses; ENGLISH-taught degree courses and ITALIAN-taught degree courses ",
                    "No, unfortunately you can not. courses at the University of Padua start at the end of September/beginning of October. Enrolment for the 2nd semester is not available.",
                    "To be eligible for admission to a Bachelor’s degree or a single-cycle degree, you need a minimum 12-year school record. If you have been awarded a final diploma after a shorter programme of study (i.e. less than 12 years), in order to become eligible you must: have attended at least 1 (in case of 11 years of schooling) or 2 (in case of 10 years of schooling) years of academic studies and passed all the scheduled exams. OR have obtained a post-secondary qualification  in a non-University higher education institution, certifying the additional one or two years of education. OR have successfully completed one or two year preparatory courses (Foundation courses) to compensate for the missing years of schooling.",
                    "you must take additional courses to become eligible for admission to a Bachelor's degree program. If you have been awarded a final diploma after a shorter programme of study (i.e. less than 12 years), in order to become eligible you must: have attended at least 1 (in case of 11 years of schooling) or 2 (in case of 10 years of schooling) years of academic studies and passed all the scheduled exams. OR have obtained a post-secondary qualification  in a non-University higher education institution, certifying the additional one or two years of education. OR have successfully completed one or two year preparatory courses (Foundation courses) to compensate for the missing years of schooling. Or as an alternative, you must: have attended an additional one (in case of 12 years of schooling) or two (in case of 11 years of schooling) years of academic studies in another country and/or different foreign system and passed all the scheduled exams. OR have obtained an official Italian or foreign post-secondary qualification in a non-University higher education institution with a minimum duration of one or two years. OR have successfully completed Foundation courses held from an Italian University. These additional certificates only allow enrolment to the first year and cannot be further evaluated for shortening of your degree programme",
                    "You can check a country-based list of qualifications in order to verify whether their entry qualification is suitable to access your degree programme of interest.",
                    "yes, you can. If you are you interested in attending single course units and take the corresponding exams without having to be enrolled in any degree course, visit the dedicated page.",
                    "If your bachelor's degree was issued by two or more universities (e.g. the awarding and the teaching institutions are not the same and pertain to different national education systems), each institution must be accredited according to the national educational accreditation system of the country where it is located..",
                    "To be eligible for admission to a Master’s degree, you need a Bachelor’s degree or equivalent foreign qualification with specific curricular requirements obtained from a higher education institution accredited according to the national educational accreditation system of the country where it is located.",
                    "sure,For academic related information you can contact the programme coordinator/secretariat tel. +39 0498273131, or send an email to urp@unipd.it "]

In [24]:
!pip install -U deepeval

Collecting deepeval
  Downloading deepeval-0.21.46-py3-none-any.whl (245 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/245.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/245.6 kB[0m [31m2.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.6/245.6 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf==4.25.1 (from deepeval)
  Downloading protobuf-4.25.1-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.6/294.6 kB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk (from deepeval)
  Downloading sentry_sdk-2.3.1-py2.py3-none-any.whl (289 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m289.0/289.0 kB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pytest-repeat (from deepeval)
  Downloading pytest_repeat-0.9.3-py3

In [25]:
import os
from getpass import getpass

In [27]:
# Get the API token from the user
OPENAI_API_KEY = getpass("Enter your openai API token: ")

# Set the environment variable
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

Enter your openai API token: ··········


In [28]:
# Replace this with the actual retrieved context from your RAG pipeline
retrieval_contexts = [
    "The University of Padua welcomes applications from international students. You are considered an “international student” if you hold a non-Italian academic diploma, degree or equivalent.",
    "There are two types of degree courses; ENGLISH-taught degree courses: check application procedures and the course factsheets for detailed information. In order to submit your application (except for the Single-cycle degree programme in Medicine and Surgery), please go to: apply.unipd.it ITALIAN-taught degree courses: applications dates vary depending on the course - check application procedures",
    "Courses at the University of Padua start at the end of September/beginning of October. Enrolment for the 2nd semester is not available.",
    "To be eligible for admission to a Bachelor’s degree or a single-cycle degree, you need a minimum 12-year school record. If you have been awarded a final diploma after a shorter programme of study (i.e. less than 12 years), in order to become eligible you must: have attended at least 1 (in case of 11 years of schooling) or 2 (in case of 10 years of schooling) years of academic studies and passed all the scheduled exams. OR have obtained a post-secondary qualification  in a non-University higher education institution, certifying the additional one or two years of education. OR have successfully completed one or two year preparatory courses (Foundation courses) to compensate for the missing years of schooling.",
    "To be eligible for admission to a Bachelor’s degree or a single-cycle degree, you need a minimum 12-year school record. If you have been awarded a final diploma after a shorter programme of study (i.e. less than 12 years), in order to become eligible you must: have attended at least 1 (in case of 11 years of schooling) or 2 (in case of 10 years of schooling) years of academic studies and passed all the scheduled exams. OR have obtained a post-secondary qualification  in a non-University higher education institution, certifying the additional one or two years of education. OR have successfully completed one or two year preparatory courses (Foundation courses) to compensate for the missing years of schooling. As an alternative to this certification, you must: have attended an additional one (in case of 12 years of schooling) or two (in case of 11 years of schooling) years of academic studies in another country and/or different foreign system and passed all the scheduled exams. OR have obtained an official Italian or foreign post-secondary qualification in a non-University higher education institution with a minimum duration of one or two years. OR have successfully completed Foundation courses held from an Italian University. These additional certificates only allow enrolment to the first year and cannot be further evaluated for shortening of your degree programme",
    "Foreign entry qualifications suitable for admission; Applicants can consult a country-based list of qualifications in order to verify whether their entry qualification is suitable to access their degree programme of interest..",
    "Apply for a single course unit ; If you are you interested in attending single course units and take the corresponding exams without having to be enrolled in any degree course, visit the dedicated page.",
    "Should your study title be issued by two or more universities (e.g. the awarding and the teaching institutions are not the same and pertain to different national education systems), each institution must be accredited according to the national educational accreditation system of the country where it is located.",
    "To be eligible for admission to a Master’s degree, you need a Bachelor’s degree or equivalent foreign qualification with specific curricular requirements obtained from a higher education institution accredited according to the national educational accreditation system of the country where it is located.",
    "Global Engagement Office - Recruitment Unit Palazzo Anselmi Lungargine del Piovego 1 - 35131 Padova Get in touch with the Global Engagement Office For academic related information, please contact the programme coordinator/secretariat tel. +39 0498273131 or send an email to  urp@unipd.it "
]


**Llama3 70b RAG Evaluation:**

In [29]:
from deepeval import evaluate
from deepeval.metrics.ragas import RagasMetric
from deepeval.test_case import LLMTestCase

In [30]:
# Replace this with the actual output from your LLM application
actual_outputs = llama_70b_outputs

# Replace this with the expected output from your RAG generator
expected_outputs = expected_answers

test_cases = []

metric = RagasMetric(threshold=0.6, model="gpt-4o")

# Loop through each question and create a test case
for idx, (actual_output, expected_output, retrieval_context) in enumerate(zip(actual_outputs, expected_outputs, retrieval_contexts)):
    test_case = LLMTestCase(
        input=f"Question {idx + 1}",
        actual_output=actual_output,
        expected_output=expected_output,
        retrieval_context=[retrieval_context]  # Wrap the string in a list
    )
    test_cases.append(test_case)

# Evaluate test cases
metrics = evaluate(test_cases, [metric])


Evaluating test cases...


Output()

Event loop is already running. Applying nest_asyncio patch to allow async execution...


Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]



Metrics Summary

  - ✅ RAGAS (score: 0.8569645219502147, threshold: 0.6, strict: False, evaluation model: gpt-4o, reason: None, error: None)
      - Contextual Precision (ragas) (score: 0.9999999999)
      - Contextual Recall (ragas) (score: 0.6666666666666666)
      - Faithfulness (ragas) (score: 1.0)
      - Answer Relevancy (ragas) (score: 0.7611914212341923)

For test case:

  - input: Question 1
  - actual output: Yes, the University of Padua welcomes applications from international students, which includes non-Italian students who hold a non-Italian academic diploma, degree or equivalent.
  - expected output: yes, you can. The University of Padua welcomes applications from international students. You are considered an “international student” if you hold a non-Italian academic diploma, degree or equivalent.
  - context: None
  - retrieval context: ['The University of Padua welcomes applications from international students. You are considered an “international student” if you hol



In [97]:
from statistics import mean

# puting the results of metrics in test_results to avoid confusion
test_results = metrics
# Initialize lists to collect scores for each metric and overall scores
contextual_precision_scores = []
contextual_recall_scores = []
faithfulness_scores = []
answer_relevancy_scores = []
overall_scores = []

# Iterate through each test result to extract the scores from score_breakdown and overall score
for test_result in test_results:
    # Access the RagasMetric object
    ragas_metric = test_result.metrics[0]

    # Access the score_breakdown dictionary
    score_breakdown = ragas_metric.score_breakdown

    # Append the scores to the respective lists
    contextual_precision_scores.append(score_breakdown['Contextual Precision (ragas)'])
    contextual_recall_scores.append(score_breakdown['Contextual Recall (ragas)'])
    faithfulness_scores.append(score_breakdown['Faithfulness (ragas)'])
    answer_relevancy_scores.append(score_breakdown['Answer Relevancy (ragas)'])

    # Append the overall score to the overall_scores list
    overall_scores.append(ragas_metric.score)

# Calculate the average for each metric
avg_contextual_precision = mean(contextual_precision_scores)
avg_contextual_recall = mean(contextual_recall_scores)
avg_faithfulness = mean(faithfulness_scores)
avg_answer_relevancy = mean(answer_relevancy_scores)
avg_overall_score = mean(overall_scores)

# Print the average scores

print("Llama3 70b results:")
print(f"Contextual Precision (average score: {avg_contextual_precision})")
print(f"Contextual Recall (average score: {avg_contextual_recall})")
print(f"Faithfulness (average score: {avg_faithfulness})")
print(f"Answer Relevancy (average score: {avg_answer_relevancy})")
print(f"RAGAS (average overall score: {avg_overall_score})")

Llama3 70b results:
Contextual Precision (average score: 0.9999999999)
Contextual Recall (average score: 0.8333333333333334)
Faithfulness (average score: 0.6116666666666667)
Answer Relevancy (average score: 0.5973696628999718)
RAGAS (average overall score: 0.760592415699993)


**Lama3 8b RAG Evaluation:**

In [98]:
# Replace this with the actual output from your LLM application
actual_outputs = llama_8b_outputs

# Replace this with the expected output from your RAG generator
expected_outputs = expected_answers

test_cases_8b = []

metric = RagasMetric(threshold=0.6, model="gpt-4o")

# Loop through each question and create a test case
for idx, (actual_output, expected_output, retrieval_context) in enumerate(zip(actual_outputs, expected_outputs, retrieval_contexts)):
    test_case = LLMTestCase(
        input=f"Question {idx + 1}",
        actual_output=actual_output,
        expected_output=expected_output,
        retrieval_context=[retrieval_context]  # Wrap the string in a list
    )
    test_cases_8b.append(test_case)

# Evaluate test cases
metrics = evaluate(test_cases_8b, [metric])


Output()

Evaluating test cases...
Event loop is already running. Applying nest_asyncio patch to allow async execution...


Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Output()



Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]



Metrics Summary

  - ✅ RAGAS (score: 0.8067423658071304, threshold: 0.6, strict: False, evaluation model: gpt-4o, reason: None, error: None)
      - Contextual Precision (ragas) (score: 0.9999999999)
      - Contextual Recall (ragas) (score: 0.6666666666666666)
      - Faithfulness (ragas) (score: 0.8)
      - Answer Relevancy (ragas) (score: 0.760302796661855)

For test case:

  - input: Question 1
  - actual output: According to the provided context, yes, the University of Padua welcomes applications from international students. You are considered an "international student" if you hold a non-Italian academic diploma, degree or equivalent.
  - expected output: yes, you can. The University of Padua welcomes applications from international students. You are considered an “international student” if you hold a non-Italian academic diploma, degree or equivalent.
  - context: None
  - retrieval context: ['The University of Padua welcomes applications from international students. You are c



In [99]:
# puting the results of metrics in test_results to avoid confusion
test_results = metrics
# Initialize lists to collect scores for each metric and overall scores
contextual_precision_scores = []
contextual_recall_scores = []
faithfulness_scores = []
answer_relevancy_scores = []
overall_scores = []

# Iterate through each test result to extract the scores from score_breakdown and overall score
for test_result in test_results:
    # Access the RagasMetric object
    ragas_metric = test_result.metrics[0]

    # Access the score_breakdown dictionary
    score_breakdown = ragas_metric.score_breakdown

    # Append the scores to the respective lists
    contextual_precision_scores.append(score_breakdown['Contextual Precision (ragas)'])
    contextual_recall_scores.append(score_breakdown['Contextual Recall (ragas)'])
    faithfulness_scores.append(score_breakdown['Faithfulness (ragas)'])
    answer_relevancy_scores.append(score_breakdown['Answer Relevancy (ragas)'])

    # Append the overall score to the overall_scores list
    overall_scores.append(ragas_metric.score)

# Calculate the average for each metric
avg_contextual_precision = mean(contextual_precision_scores)
avg_contextual_recall = mean(contextual_recall_scores)
avg_faithfulness = mean(faithfulness_scores)
avg_answer_relevancy = mean(answer_relevancy_scores)
avg_overall_score = mean(overall_scores)

# Print the average scores

print("Llama3 8b results:")
print(f"Contextual Precision (average score: {avg_contextual_precision})")
print(f"Contextual Recall (average score: {avg_contextual_recall})")
print(f"Faithfulness (average score: {avg_faithfulness})")
print(f"Answer Relevancy (average score: {avg_answer_relevancy})")
print(f"RAGAS (average overall score: {avg_overall_score})")

Llama3 8b results:
Contextual Precision (average score: 0.9999999999)
Contextual Recall (average score: 0.8333333333333334)
Faithfulness (average score: 0.6810101010101011)
Answer Relevancy (average score: 0.678396193234063)
RAGAS (average overall score: 0.7981849068693743)
