RAG Pipeline


In [None]:
# !pip install assemblyai openai pygame nltk yt-dlp transformers opencv-python-headless scenedetect chromadb torch pytesseract pydub numpy pandas matplotlib seaborn sentence-transformers tqdm edge-tts
!sudo apt-get update
!sudo apt-get install -y tesseract-ocr

from yt_dlp import YoutubeDL
import os
import argparse
import subprocess
import cv2
from scenedetect import detect, ContentDetector
from scenedetect.video_manager import VideoManager
from scenedetect.scene_manager import SceneManager
from scenedetect.stats_manager import StatsManager
from transformers import AutoModel, AutoTokenizer
import torch
import chromadb
from chromadb.config import Settings
import os
import json
import torch
import pytesseract
import re
from datetime import timedelta
from transformers import CLIPProcessor, CLIPModel, CLIPTokenizer
import assemblyai as aai
from pydub import AudioSegment
import openai
import numpy as np
import ast
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sentence_transformers
from tqdm.autonotebook import tqdm, trange

In [None]:
openai.api_key="YOUR-API-KEY"
aai.settings.api_key = "YOUR-API-KEY"
# openai_client = OpenAI.api_key

In [None]:
def convert_to_seconds(timestamp):
    hours, minutes, seconds = map(int, timestamp.split(":"))
    return timedelta(hours=hours, minutes=minutes, seconds=seconds).total_seconds()

def generate_summary(text):
    system_prompt = """
        Your job is to analyze a text snippet, and analyze the text content, summarizing it in one line. Highlight the main ideas, key terms, and concepts being discussed.
        Without any additional text, return this summary.
    """
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4",  # Replace with your authorized model, e.g., gpt-3.5-turbo
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": text}
            ],
            max_tokens=100  # Adjust as needed
        )
        summary = response.choices[0].message.content
        return summary.strip()
    except Exception as e:
        print(f"Error generating summary: {e}")
        return None

In [None]:
def add_summaries_to_json(json_file):
    with open(json_file, 'r') as f:
        transcriptions = json.load(f)
    for entry in transcriptions:
        full_text = " ".join([utterance["text"] for utterance in entry["transcription"]])
        summary = generate_summary(full_text)
        entry["summary"] = summary
    with open(json_file, 'w') as f:
        json.dump(transcriptions, f, indent=4)
    print(f"Summaries have been added and the file is updated.")

def transcribe_audio_in_chunks(audio):
    CHUNK_SIZE = 45
    transcriptions = []
    chunks_dir = "audio_chunks"
    os.makedirs(chunks_dir, exist_ok=True)
    num_chunks = len(audio) // (CHUNK_SIZE * 1000)
    for i in range(num_chunks + 1):
        start_time = i * CHUNK_SIZE * 1000
        end_time = (i + 1) * CHUNK_SIZE * 1000
        chunk = audio[start_time:end_time]
        chunk_filename = os.path.join(chunks_dir, f"chunk_{i}.wav")
        chunk.export(chunk_filename, format="wav")
        config = aai.TranscriptionConfig(
            speech_model=aai.SpeechModel.best,
            summarization=True,
            sentiment_analysis=True,
            entity_detection=True,
            speaker_labels=True,
            filter_profanity=True,
            language_detection=True
        )
        transcriber = aai.Transcriber(config=config)
        transcript = transcriber.transcribe(chunk_filename)

        while transcript.status != aai.TranscriptStatus.completed:
            transcript = transcriber.transcribe(chunk_filename)
        if transcript.status == aai.TranscriptStatus.error:
            print(f"Error in chunk {i}: {transcript.error}")
        else:
            transcriptions.append({
                "start_time": str(timedelta(milliseconds=start_time)),
                "end_time": str(timedelta(milliseconds=end_time)),
                "transcription": [  # Store each speaker's utterance separately
                    {"speaker": utterance.speaker, "text": utterance.text}
                    for utterance in transcript.utterances
                ]
            })
        os.remove(chunk_filename)
    with open('transcriptions.json', 'w') as json_file:
        json.dump(transcriptions, json_file, indent=4)
    print("Transcription completed and saved to 'transcriptions.json'.")

In [None]:
def perform_ocr(image_dir):

    def load_timestamps(json_path):
        try:
            with open(json_path, 'r') as f:
                return json.load(f)
        except Exception as e:
            print(f"Error loading timestamps from {json_path}: {e}")
            return {}

    def perform_ocr_with_preprocessing(image_dir, output_json_path, timestamps, max_text_length=500, language='eng'):
        ocr_results = []
        for image_file in os.listdir(image_dir):
            if image_file.endswith(".png"):
                image_path = os.path.join(image_dir, image_file)
                timestamp = timestamps.get(image_file, "00:00:00")
                try:
                    image = cv2.imread(image_path)
                    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                    _, thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)
                    text = pytesseract.image_to_string(thresh, lang=language).strip()
                    if len(text) > max_text_length:
                        text = text[:max_text_length] + "..."

                    ocr_results.append({
                        "timestamp": timestamp,
                        "file_name": image_file,
                        "text": text
                    })
                except Exception as e:
                    print(f"Error processing {image_file}: {e}")
        try:
            with open(output_json_path, "w", encoding="utf-8") as json_file:
                json.dump(ocr_results, json_file, ensure_ascii=False, indent=4)
            print(f"OCR results saved to {output_json_path}")
        except Exception as json_error:
            print(f"Error saving JSON file: {json_error}")
        return ocr_results

    timestamps_json_path = os.path.join(image_dir, 'timestamps.json')
    output_json_path = 'ocr_results_with_timestamps_newest.json'
    max_text_length = 50000
    timestamps = load_timestamps(timestamps_json_path)
    ocr_results = perform_ocr_with_preprocessing(image_dir, output_json_path, timestamps, max_text_length, language='eng')
    return None

In [None]:
def scene_detection(video_path):
    def format_timedelta(seconds):
        hours, remainder = divmod(int(seconds), 3600)
        minutes, seconds = divmod(remainder, 60)
        return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
    video_path = "downloaded_video.mp4"
    output_dir = "scenes"
    os.makedirs(output_dir, exist_ok=True)
    video_manager = VideoManager([video_path])
    stats_manager = StatsManager()
    scene_manager = SceneManager(stats_manager)
    scene_manager.add_detector(ContentDetector())
    video_manager.set_downscale_factor()
    video_manager.start()
    scene_manager.detect_scenes(frame_source=video_manager)
    scenes = scene_manager.get_scene_list()
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    timestamps = {}
    for i, scene in enumerate(scenes):
        scene_start_frame = scene[0].get_frames()
        cap.set(cv2.CAP_PROP_POS_FRAMES, scene_start_frame)
        ret, frame = cap.read()
        if ret:
            scene_filename = f"scene_{i:03d}.png"
            cv2.imwrite(os.path.join(output_dir, scene_filename), frame)
            timestamp = scene_start_frame / fps
            timestamps[scene_filename] = format_timedelta(timestamp)
    cap.release()
    with open(os.path.join(output_dir, "timestamps.json"), "w") as f:
        json.dump(timestamps, f, indent=4)
    print(f"Scene detection complete. Selected scenes saved in '{output_dir}' folder.")
    print(f"Timestamps saved in '{output_dir}/timestamps.json'.")

In [None]:
def download_video(url: str) -> str:
    ydl_opts = {
        'format': 'best[ext=mp4]',
        'outtmpl': 'downloaded_video.mp4',
    }
    with YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])

    video_path = 'downloaded_video.mp4'
    print(f"Video downloaded successfully: {video_path}")
    return video_path

def extract_audio(video_path):
    audio_path = 'extracted_audio.wav'
    try:
        subprocess.call(['ffmpeg', '-i', video_path, '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', '-ac', '2', audio_path])
    except subprocess.CalledProcessError as e:
        print(f"Error during audio extraction: {e}")
        return None
    return audio_path

In [None]:
video_url = input("Please enter the YouTube video URL: ").strip()

Please enter the YouTube video URL: https://youtu.be/5sLYAQS9sWQ?feature=shared


In [None]:
def run(video_url):
    # video_path = download_video(video_url)

    # audio_path = extract_audio(video_path)

    # scene_detection(video_path)

    # perform_ocr('scenes')

    # audio = AudioSegment.from_wav(audio_path)

    # transcribe_audio_in_chunks(audio)

    add_summaries_to_json("transcriptions.json")

    with open('scenes/timestamps.json', 'r') as f:
        timestamps_images = json.load(f)

    timestamps_in_seconds = {image: convert_to_seconds(time) for image, time in timestamps_images.items()}

    with open('transcriptions.json', 'r') as f:
        transcription_data = json.load(f)

    for segment in transcription_data:
        start_time_sec = convert_to_seconds(segment["start_time"])
        end_time_sec = convert_to_seconds(segment["end_time"])
        image_names = [
            image for image, time_sec in timestamps_in_seconds.items()
            if start_time_sec <= time_sec <= end_time_sec
        ]
        segment["image_names"] = image_names

    with open('transcriptions.json', 'w') as f:
        json.dump(transcription_data, f, indent=4)

    transcription_data = 'transcriptions.json'

    client_capstone = chromadb.PersistentClient(path="chromadb")
    collection_transcriptions_clip = client_capstone.create_collection(name="Capstone_Kiwi_clip")
    collection_transcriptions_bert = client_capstone.create_collection(name="Capstone_Kiwi_bert")

    with open(transcription_data, 'r') as file:
        transcription_data = json.load(file)
    data = transcription_data
    model_name = "openai/clip-vit-base-patch32"


    clip_model = CLIPModel.from_pretrained(model_name)
    clip_tokenizer = CLIPTokenizer.from_pretrained(model_name)
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = clip_model.to(device)


    bert_model_name = "bert-base-uncased"
    bert_model = AutoModel.from_pretrained(bert_model_name)
    bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    bert_model = bert_model.to(device)

    def embed_text_bert(text):
        inputs = bert_tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
        with torch.no_grad():
            outputs = bert_model(**inputs)
            text_embedding = outputs.last_hidden_state[:, 0, :]
        return text_embedding.squeeze().cpu().numpy()

    def embed_text(text):
        text_inputs = processor(text=[text], return_tensors="pt", padding=True).to(device)
        with torch.no_grad():
            text_embeddings = clip_model.get_text_features(**text_inputs)
        return text_embeddings.squeeze().cpu().numpy()

    embeddings = []
    for item in transcription_data:
        summary_text = item['summary']
        embedding = embed_text(summary_text)
        embeddings.append(embedding)

    for i, item in enumerate(transcription_data):
        summary_text = item['summary']
        collection_transcriptions_clip.add(
            documents=str(item),
            embeddings=[embeddings[i]],
            metadatas=None,
            ids=[str(i)]
        )

    bert_embeddings = []
    for item in transcription_data:
        summary_text = item['summary']
        embedding = embed_text_bert(summary_text)
        bert_embeddings.append(embedding)

    for i, item in enumerate(transcription_data):
        collection_transcriptions_bert.add(
            documents=str(item),
            embeddings=[bert_embeddings[i]],
            metadatas=None,
            ids=[str(i)]
        )

    print("Data successfully added to collection_transcriptions.")
    print('Now we can initiate our audio LLM pipeline.')


run(video_url)

Initiating the Chat Process (Audio/Text Mode)

In [None]:
import assemblyai as aai
from openai import OpenAI
import time
import os
import pygame
import threading
import atexit
import asyncio
import edge_tts
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")
from yt_dlp import YoutubeDL
import os
import argparse
import subprocess
from transformers import AutoModel, AutoTokenizer
import cv2
from scenedetect import detect, ContentDetector
from scenedetect.video_manager import VideoManager
from scenedetect.scene_manager import SceneManager
from scenedetect.stats_manager import StatsManager
import chromadb
from chromadb.config import Settings
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
import json
import torch
import pytesseract
import re
from datetime import timedelta
from transformers import CLIPProcessor, CLIPModel, CLIPTokenizer
import assemblyai as aai
from pydub import AudioSegment
from openai import OpenAI
import numpy as np
import ast
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sentence_transformers
from tqdm.autonotebook import tqdm, trange


In [None]:
class AI_Assistant:
    def __init__(self):
        # add the os.env
        aai.settings.api_key = "<Your-API-Key>"
        self.openai_client = OpenAI(api_key="<Your-API-Key>")
        self.transcriber = None
        self.full_transcript = []
        nltk.download('stopwords')
        nltk.download('punkt_tab')
        self.stop_words = set(stopwords.words("english"))
        self.client_capstone = chromadb.PersistentClient(path="chromadb")
        self.collection_transcriptions_clip = self.client_capstone.get_collection(name="Capstone_Kiwi_clip")
        self.collection_transcriptions_bert = self.client_capstone.get_collection(name="Capstone_Kiwi_bert")
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(self.device)
        self.clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
        self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
        self.bert_model = AutoModel.from_pretrained("bert-base-uncased")
        self.bert_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
        self.initialize_system_prompt()
        atexit.register(self.cleanup)

    def embed_text(self, text):
        text_inputs = self.processor(text=[text], return_tensors="pt", padding=True).to(self.device)
        with torch.no_grad():
            text_embeddings = self.clip_model.get_text_features(**text_inputs)
        return text_embeddings.squeeze().cpu().numpy()

    def embed_text_bert(self, text):
        inputs = self.bert_tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(self.device)
        with torch.no_grad():
            outputs = self.bert_model(**inputs)
            text_embedding = outputs.last_hidden_state[:, 0, :]
        return text_embedding.squeeze().cpu().numpy()

    def get_relevant_context(self, query_text):
        merged_info = ""
        query_embedding = self.embed_text(query_text)
        results = self.collection_transcriptions_clip.query(query_embeddings=query_embedding, n_results=3)
        data_string = results['documents'][0]
        for i, item in enumerate(data_string):
            merged_info += f"Context {i+1}: \n\n"
            merged_info += f"\n\n{item}\n\n"
        return merged_info

    def initialize_system_prompt(self):
        system_prompt = {
            "role": "system",
            "content": (
                "You are an AI teacher designed to assist students in understanding video content. "
                "When a student asks a question, analyze the provided context from the video transcription carefully "
                "and respond in a clear, engaging, and student-friendly manner. Match the language style and tone of the video "
                "to maintain a cohesive learning experience. Your answers should be concise if the query requires a direct response, "
                "limited to 2-3 sentences. For more complex or detailed questions, provide thorough explanations in 5-6 sentences, "
                "incorporating examples, quotes, or key points from the video transcription where relevant. "
                "Always aim to cite the exact timestamps (start and end) from the video to guide students to specific sections for additional clarity. "
                "Use phrases like 'as explained in the video,' 'as mentioned around,' or 'as demonstrated in the example at' to cite the timestamps and "
                "to help connect the response to the video content directly. Encourage students to revisit the video sections for a deeper understanding. "
                "Ensure your responses are tailored to the context of the video and focus on promoting an interactive and supportive learning environment."
                "Without any additonal text, return this answer."
            ),
        }
        self.full_transcript.append(system_prompt)

    def preprocess_question(self, question):
        tokens = word_tokenize(question.lower())
        filtered_tokens = [word for word in tokens if word not in self.stop_words]
        return ' '.join(filtered_tokens)

    def start_transcription(self):
        self.transcriber = aai.RealtimeTranscriber(
            sample_rate=16000,
            on_data=self.on_data,
            on_error=self.on_error,
            on_open=self.on_open,
            on_close=self.on_close,
            end_utterance_silence_threshold=1000
        )
        self.transcriber.connect()
        microphone_stream = aai.extras.MicrophoneStream(sample_rate=16000)
        self.transcriber.stream(microphone_stream)

    def stop_transcription(self):
        if self.transcriber:
            self.transcriber.close()
            self.transcriber = None

    def on_open(self, session_opened: aai.RealtimeSessionOpened):
        print("Please speak now...")
        return

    def on_data(self, transcript: aai.RealtimeTranscript):
        if not transcript.text:
            return
        if isinstance(transcript, aai.RealtimeFinalTranscript):
            self.generate_ai_response(transcript)

    def on_error(self, error: aai.RealtimeError):
        print("An error occurred:", error)
        return

    def on_close(self):
        return

    def generate_summary(self):
        try:
            with open('transcriptions.json', 'r') as file:
                transcriptions_data = json.load(file)
            summary_texts = []
            for entry in transcriptions_data:
                summary_texts.append(entry.get('summary', 'No summary available'))
            aggregated_summary = " ".join(summary_texts)

            prompt = (
                f"Please summarize the following information into two clear and concise paragraphs. The information is a from a video that the user is watching, and learning from. "
                f"The summary should highlight the key concepts, include important details, and provide any relevant examples to help the user understand. "
                f"Make sure the content is easy to follow and captures the main points. \n\n"
                f"{aggregated_summary}"
            )
            messages_to_send = [{"role": "user", "content": prompt}]
            response = self.openai_client.chat.completions.create(
                model="gpt-4o-mini",
                messages=messages_to_send,
                max_tokens=10000
            )

            ai_response = response.choices[0].message.content
            print("\nAI Summary Response:\n", ai_response)
            self.prompt_user()
        except Exception as e:
            print(f"An error occurred: {e}")


    def generate_ai_response(self, transcript, mode="audio"):
        self.stop_transcription()
        user_text = transcript.text
        processed_question = self.preprocess_question(user_text)
        relevant_context = self.get_relevant_context(processed_question)
        final_prompt = (
        f"This is the user question:\n{user_text}\n\n"
        f"These are the relevant contexts in order of relevancy:\n{relevant_context}\n\n"
        "Use the provided context to craft a well-structured, engaging, and listener-friendly response to the question. "
        "Ensure your answer is accurate and uses examples, references, or quotes from the context to provide clarity and depth. "
        "If possible, highlight specific sections or details from the context, using timestamps or references to guide the listener back to the source material. "
        "Always include timestamps from the provided context if they are available, and mention them explicitly in your response as it would help the student learn better."
        "For example, include phrases like 'as mentioned in the context,' 'as explained in the section around,' or 'as demonstrated in the example provided.' "
        "If the context does not fully address the question, rely on your knowledge to give a concise, efficient answer that remains accurate and informative. "
        "After completing the response, provide the YouTube link for the relevant timestamp as a separate line in the following format: "
        "'Relevant section: https://youtu.be/zizonToFXDs?si=AEz6grjxNfZ_ZT1x&t=XXX' where 'XXX' is the most relevant timestamp in seconds. "
        "Prioritize clarity and engagement in your response, tailoring it to ensure it fits the tone and style expected by the audience. "
        "If the question is something which is completely out of context, and has no relevance to the main ideas of the video, in that case "
        "don't give any timestamps and the links from the video as it would not make sense. Just answer the question in a general way, and ask the user to ask questions about the video"
        "in a more direct way."
        "Return only the answer and the link with no additional commentary or text."
        )
        self.full_transcript.append({"role": "user", "content": user_text})
        messages_to_send = [
            self.full_transcript[0],
            {"role": "user", "content": final_prompt},
        ]
        response = self.openai_client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages_to_send,
            max_tokens=10000
        )
        ai_response = response.choices[0].message.content
        self.full_transcript.append({"role": "assistant", "content": ai_response})
        if mode == "audio":
            asyncio.run(self.generate_audio(ai_response))
        else:
            print(f"\nAI Teacher: {ai_response}")
            self.prompt_user()

    async def generate_audio(self, text):
        print(f"\nAI Teacher: {text}")
        communicate = edge_tts.Communicate(text, "en-AU-NatashaNeural", rate="+20%")
        await communicate.save("response.mp3")
        pygame.mixer.init()
        pygame.mixer.music.load("response.mp3")
        pygame.mixer.music.play()
        while pygame.mixer.music.get_busy():
            pygame.time.Clock().tick(10)
        os.remove("response.mp3")
        self.prompt_user()


    def prompt_user(self):
        try:
            user_input = input(
                "\nEnter 1 to ask a question (voice mode), 2 to ask a question in text-mode, 3 to get a summary of the conversation/video, or 0 to finish the conversation: ")
            if user_input == "1":
                self.start_transcription()
            elif user_input == "2":
                question = input("\nPlease type your question: ")
                transcript = type("Transcript", (object,), {"text": question})()
                self.generate_ai_response(transcript, mode="text")
            elif user_input == "3":
                print("\nGenerating summary of the conversation...\n")
                self.generate_summary()
            elif user_input == "0":
                print("\nConversation Transcript: \n\n")
                self.full_transcript.pop(0)
                for entry in self.full_transcript:
                    print(f"{entry['role'].capitalize()}: {entry['content']}\n\n")
                print("\nThank you for the conversation!\n")
                self.cleanup()
                os._exit(1)
            else:
                print("Invalid input. Please enter 1, 2, or 0.")
                self.prompt_user()
        except KeyboardInterrupt:
            self.cleanup()
            print("\nProgram interrupted. Exiting.")
            os._exit(1)

    def cleanup(self):
        self.stop_transcription()
        os._exit(1)

greeting = "Hey! How are you doing, How can I help you?"
ai_assistant = AI_Assistant()
asyncio.run(ai_assistant.generate_audio(greeting))

In [None]:
!pip install langchain-openai
!pip install ragas

Evaluation

In [None]:
data = [
  {
	  "user_input":"How can LLMs be compared to the analogy of training a dog through commands?",
		"reference":"Large language models are general-purpose AI designed through pre-training and fine-tuning processes, analogous to training a dog with basic commands followed by specialized training for specific tasks.",
		"retrieved_contexts":["Large language models are general-purpose AI designed through pre-training and fine-tuning processes, analogous to training a dog with basic commands followed by specialized training for specific tasks.","The text discusses the advantages of using large language models (LLMs) for dialog applications, emphasizing their ability to handle extensive data without requiring expertise or training examples, focusing instead on effective prompt design.","Large language models are pre-trained on extensive datasets for general purposes and then fine-tuned on smaller datasets for specific tasks, allowing versatile applications and minimal training requirements."],
		"response": "Large language models (LLMs) can be compared to training a dog using commands because both processes involve a foundational stage followed by specialized training. As explained in the video, training a dog initially involves teaching basic commands like sit and stay, which equip the dog to interact well in everyday life. Similarly, LLMs undergo pre-training on large datasets to grasp general language functionalities before being fine-tuned for specific tasks, akin to how a service dog might receive additional specialized training for particular roles (0:00:45 - 0:01:30). This process ensures that LLMs are versatile, much like a well-trained dog that is capable of performing various tasks with minimal further instruction.",
	},
  {
    "user_input": "How are large language models (LLMs) related to deep learning?",
    "reference": "The text introduces a course on Large Language Models (LLMs) conducted by John Ewald at Google Cloud, covering definitions, use cases, prompt tuning, and Google's Genai tools, while linking LLMs to deep learning and generative AI.",
		"retrieved_contexts": ["Large language models are pre-trained on extensive datasets for general purposes and then fine-tuned on smaller datasets for specific tasks, allowing versatile applications and minimal training requirements.', 'image_names'","Large language models are versatile AI systems designed to address various language tasks and can be specialized for specific industries by utilizing extensive training data and high parameter counts.","The text discusses the distinction between parameters and hyperparameters in machine learning, emphasizing that models trained on common human language tasks can serve as foundational tools for others, particularly due to resource constraints in developing large language models."],
		"response":"Large language models (LLMs) are deeply connected to deep learning as they are fundamentally based on neural network architectures that learn from extensive datasets. As explained in the video, LLMs utilize a large amount of training data—often up to petabytes—and can have billions of parameters which equip them to tackle various language tasks like translation and text classification (Context 1, 0:03:00 - 0:03:45). The deep learning framework enables these models to pre-train on general language tasks and then be fine-tuned for specific applications, maximizing their versatility and efficiency."
  },
  {
    "user_input": "What makes large language models versatile?",
    "reference": "Large language models are versatile AI systems designed to address various language tasks and can be specialized for specific industries by utilizing extensive training data and high parameter counts.",
		"retrieved_contexts": ["Large language models are pre-trained on extensive datasets for general purposes and then fine-tuned on smaller datasets for specific tasks, allowing versatile applications and minimal training requirements.", "Prompt design is crucial for all models, while prompt engineering is specific to high-performance systems, with three types of large language models (generic, instruction tuned, dialogue tuned) that require different prompting approaches.", "Large language models are versatile AI systems designed to address various language tasks and can be specialized for specific industries by utilizing extensive training data and high parameter counts."],
    "response": "Large language models are versatile because they are initially pre-trained on massive datasets for general use and then fine-tuned on smaller, task-specific datasets. This allows a single model to perform a wide range of tasks such as language translation, text classification, and question answering with minimal additional training, as highlighted around 0:03:00 to 0:03:45. Their ability to adapt to both general and specific tasks makes them immensely valuable across various industries.",
  },
  {
    "user_input": "What system does Palm use for multi-task training?",
    "reference": "Palm is a 540 billion parameter dense decoder-only transformer model, utilizing Google's Pathways system for efficient mul3ti-task training and improved understanding through distributed computation.",
    "retrieved_contexts": ["Large language models excel in few-shot and zero-shot learning scenarios, demonstrating increasing performance with more data and parameters, exemplified by Google's 540 billion parameter Palm model.","Palm is a 540 billion parameter dense decoder-only transformer model, utilizing Google's Pathways system for efficient multi-task training and improved understanding through distributed computation.","The text describes the integration of Palm API with Maker Suite, which provides tools for model training, deployment, and monitoring of machine learning models through a user-friendly interface."],
    "response":"Palm uses Google's Pathways system for multi-task training, as mentioned around 0:04:30. This system allows Palm, a 540 billion parameter dense decoder-only transformer model, to efficiently train across multiple TPU V4 pods and manage various tasks simultaneously, improving its understanding of the world. The Pathways architecture enhances the model's ability to learn new tasks quickly and orchestrates distributed computation"
  },
  {   "user_input": "What features make Google’s Palm API user-friendly for developers?",
      "reference": "The text describes the integration of Palm API with Maker Suite, which provides tools for model training, deployment, and monitoring of machine learning models through a user-friendly interface.",
      "retrieved_contexts": ["Palm is a 540 billion parameter dense decoder-only transformer model, utilizing Google's Pathways system for efficient multi-task training and improved understanding through distributed computation.", "The text describes the integration of Palm API with Maker Suite, which provides tools for model training, deployment, and monitoring of machine learning models through a user-friendly interface.","Large language models excel in few-shot and zero-shot learning scenarios, demonstrating increasing performance with more data and parameters, exemplified by Google's 540 billion parameter Palm model."],
      "response":"Google’s Palm API is user-friendly for developers primarily due to its integration with Maker Suite, as mentioned around 0:15:00 to 0:15:45. This integration provides a graphical user interface and various tools, including model training, deployment, and monitoring. The model training tool allows developers to train models using different algorithms, while the deployment tool offers various options to put models into production. Additionally, the monitoring tool provides performance insights through a user-friendly dashboard, making the entire process straightforward and efficient."
  }
]


from ragas import EvaluationDataset
eval_dataset = EvaluationDataset.from_list(data)
from ragas.metrics import Faithfulness, FactualCorrectness, SemanticSimilarity
from ragas import evaluate
import os
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI
os.environ["OPENAI_API_KEY"] = "YOUR-API-KEY"
from langchain_openai import OpenAIEmbeddings
evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4"))
evaluator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())
metrics = [
    # LLMContextRecall(llm=evaluator_llm),
    FactualCorrectness(mode = "precision", llm=evaluator_llm),
    Faithfulness(llm=evaluator_llm),
    SemanticSimilarity(embeddings=evaluator_embeddings)
]

results = evaluate(dataset=eval_dataset, metrics=metrics)

df = results.to_pandas()
print(df)

Evaluating:   0%|          | 0/3 [00:00<?, ?it/s]

                                          user_input  \
0  What features make Google’s Palm API user-frie...   

                                  retrieved_contexts  \
0  [Palm is a 540 billion parameter dense decoder...   

                                            response  \
0  Google’s Palm API is user-friendly for develop...   

                                           reference  factual_correctness  \
0  The text describes the integration of Palm API...                 0.18   

   faithfulness  semantic_similarity  
0      0.181818             0.944585  


In [None]:
df.head()

Unnamed: 0,user_input,retrieved_contexts,response,reference,factual_correctness,faithfulness,semantic_similarity
0,How can LLMs be compared to the analogy of tra...,[Large language models are general-purpose AI ...,Large language models (LLMs) can be compared t...,Large language models are general-purpose AI d...,0.33,0.555556,0.926641
1,How are large language models (LLMs) related t...,[Large language models are pre-trained on exte...,Large language models (LLMs) are deeply connec...,The text introduces a course on Large Language...,0.09,0.181818,0.900435
2,What makes large language models versatile?,[Large language models are pre-trained on exte...,Large language models are versatile because th...,Large language models are versatile AI systems...,0.43,0.555556,0.941025
3,What system does Palm use for multi-task train...,[Large language models excel in few-shot and z...,Palm uses Google's Pathways system for multi-t...,Palm is a 540 billion parameter dense decoder-...,0.57,0.5,0.952131
4,What features make Google’s Palm API user-frie...,[Palm is a 540 billion parameter dense decoder...,Google’s Palm API is user-friendly for develop...,Palm is a 540 billion parameter dense decoder-...,0.0,0.181818,0.847082
