# Import dependencies and environment variables

In [1]:
import os
import time
from dotenv import load_dotenv

import yt_dlp
from moviepy.editor import VideoFileClip
import whisper
from pyannote.audio import Pipeline
import torch
import ffmpeg

from langchain_openai import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.schema import Document

from langchain.chains import (
    create_retrieval_chain,
    RetrievalQA,
)

from langgraph.checkpoint import MemorySaver
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, ToolMessage

from langchain.tools.retriever import create_retriever_tool
from langchain_core.tools import tool

from langgraph.prebuilt import create_react_agent

import ipywidgets as widgets
from IPython.display import display

from langchain.agents import Tool
from langsmith.evaluation import evaluate
from langchain import hub
from langsmith import Client
from langchain.schema import HumanMessage
import json

In [2]:
# Load environment variables
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
LANGCHAIN_API_KEY = os.getenv('LANGCHAIN_API_KEY')
ELEVEN_API_KEY = os.getenv('ELEVEN_API_KEY')
HF_TOKEN = os.getenv('HF_TOKEN')

if not OPENAI_API_KEY:
    raise ValueError("OPENAI_API_KEY environment variable not set")
elif not LANGCHAIN_API_KEY:
    raise ValueError("LANGCHAIN_API_KEY environment variable not set")

In [3]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Ironhack_Project3"
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"


# Video-processing functions

In [4]:
# General functions
def download_youtube_video(url):
    ydl_opts = {
        'format': 'mp4',
        'outtmpl': 'downloads/video.%(ext)s',
        'verbose': True,
    }
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info_dict = ydl.extract_info(url, download=True)
            
            metadata = {
                "title": info_dict.get('title', 'video'),
                "id": info_dict.get('id'),
                "duration": info_dict.get('duration'),
                "upload_date": info_dict.get('upload_date'),
                "uploader": info_dict.get('uploader'),
                "uploader_id": info_dict.get('uploader_id'),
                "view_count": info_dict.get('view_count'),
                "like_count": info_dict.get('like_count'),
                "dislike_count": info_dict.get('dislike_count'),
                "average_rating": info_dict.get('average_rating'),
                "age_limit": info_dict.get('age_limit'),
                "categories": ", ".join(info_dict.get('categories', [])),
                "tags": ", ".join(info_dict.get('tags', [])),
                "ext": info_dict.get('ext'),
                "thumbnail": info_dict.get('thumbnail'),
                "description": info_dict.get('description'),
                "channel": info_dict.get('channel'),
                "channel_id": info_dict.get('channel_id'),
                "is_live": info_dict.get('is_live'),
                "release_date": info_dict.get('release_date'),
                "availability": info_dict.get('availability')
            }
            
            for key, value in metadata.items():
                if value is None:
                    metadata[key] = "Empty"
            
            print(f"Video title: {metadata['title']}")

            video_ext = metadata['ext']
            initial_path = os.path.abspath(f'downloads/video.{video_ext}')
            if not os.path.isfile(initial_path):
                raise FileNotFoundError(f"Downloaded video file not found: {initial_path}")

            counter = 1
            final_path = os.path.abspath(f'downloads/video_{counter}.{video_ext}')
            while os.path.isfile(final_path):
                counter += 1
                final_path = os.path.abspath(f'downloads/video_{counter}.{video_ext}')

            os.rename(initial_path, final_path)
            print(f"Downloaded video saved to: {final_path}")

            return final_path, metadata['title'], metadata
    except Exception as e:
        print(f"Error downloading video: {e}")
        return None, None, None

def extract_audio(video_path):
    try:
        video = VideoFileClip(video_path)
        audio_path = video_path.replace('.mp4', '.wav')
        audio_path = os.path.abspath(audio_path)
        print(f"Extracting audio to: {audio_path}")
        video.audio.write_audiofile(audio_path)
        if not os.path.isfile(audio_path):
            raise FileNotFoundError(f"Extracted audio file not found: {audio_path}")
        return audio_path
    except Exception as e:
        print(f"Error extracting audio: {e}")
        return None


In [5]:
# create_transcription_from_video functions
def transcribe_audio(audio_path):
    try:
        print(f"Transcribing audio from: {audio_path}")
        if not os.path.isfile(audio_path):
            raise FileNotFoundError(f"Audio file not found: {audio_path}")
        
        model = whisper.load_model("base")
        result = model.transcribe(audio_path)
        print(f"Transcription completed.")
        return result['text']
    except Exception as e:
        print(f"Error transcribing audio: {e}")
        return ""

def combine_metadata_and_transcription(metadata, transcription):
    combined_text = "Metadata:\n"
    for key, value in metadata.items():
        combined_text += f"{key}: {value}\n"
    combined_text += "\nTranscription:\n" + transcription
    return combined_text

def create_transcription_from_video(url):
    video_path, title, metadata = download_youtube_video(url)
    if not video_path:
        return None
    
    audio_path = extract_audio(video_path)
    if not audio_path:
        return None
    
    transcription = transcribe_audio(audio_path)
    
    combined_text = combine_metadata_and_transcription(metadata, transcription)
    
    document = Document(page_content=combined_text, metadata=metadata)
    
    # Clean up files
    try:
        os.remove(video_path)
        os.remove(audio_path)
    except Exception as e:
        print(f"Error deleting files: {e}")
    
    return document

In [6]:
# create_diarization_from_video functions
def transcribe_audio_with_timestamps(audio_path):
    try:
        print(f"Transcribing audio from: {audio_path}")
        if not os.path.isfile(audio_path):
            raise FileNotFoundError(f"Audio file not found: {audio_path}")
        
        model = whisper.load_model("base")
        result = model.transcribe(audio_path, word_timestamps=True)
        print(f"Transcription completed.")
        return result
    except Exception as e:
        print(f"Error transcribing audio: {e}")
        return {}
        
def get_words_timestamps(result_transcription):
    words = {}
    word_counter = 0
    for segment in result_transcription["segments"]:
        for word in segment.get("words", []):
            words[f"word_{word_counter}"] = {
                "text": word["word"],
                "start": word["start"],
                "end": word["end"],
            }
            word_counter += 1
    return words

def words_per_segment(res_transcription, res_diarization, add_buffer=False, fixed_margin=0.5, gap_scale_factor=0.3):
    def calculate_dynamic_buffer(idx, segments):
        if idx == 0 or idx == len(segments) - 1:
            return fixed_margin
        previous_end = segments[idx - 1].end
        current_start = segments[idx].start
        return (current_start - previous_end) * gap_scale_factor

    res_trans_dia = {}
    segments = list(res_diarization.itersegments())

    words = get_words_timestamps(res_transcription)

    for idx, (segment, _, speaker) in enumerate(res_diarization.itertracks(yield_label=True)):
        buffer_time = calculate_dynamic_buffer(idx, segments) if add_buffer else 0

        adjusted_start = max(0, segment.start - buffer_time) if idx != 0 else 0
        adjusted_end = segment.end + buffer_time if idx != len(segments) - 1 else segment.end

        segment_words = []
        for _, word in words.items():
            if word["start"] >= adjusted_start and word["end"] <= adjusted_end:
                segment_words.append(word["text"])
            if word["start"] >= adjusted_end:
                break

        res_trans_dia[f"segment_{idx}"] = {
            "speaker": speaker,
            "text": " ".join(segment_words),
            "start": adjusted_start,
            "end": adjusted_end,
        }
    return res_trans_dia

def combine_metadata_transcription_diarization(metadata, transcription, diarization):
    combined_text = "Metadata:\n"
    for key, value in metadata.items():
        combined_text += f"{key}: {value}\n"
    combined_text += "\nDiarization:\n"
    for segment in diarization.values():
        combined_text += f"Speaker {segment['speaker']} [{segment['start']:.2f} - {segment['end']:.2f}]: {segment['text']}\n"
    return combined_text

def create_diarization_from_video(url):
    video_path, title, metadata = download_youtube_video(url)
    if not video_path:
        return None
    
    audio_path = extract_audio(video_path)
    if not audio_path:
        return None
    
    transcription_result = transcribe_audio_with_timestamps(audio_path)
    if not transcription_result:
        return None
    
    # Initialize Pyannote pipeline for speaker diarization
    pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=os.getenv("HF_TOKEN"))
    
    # Check if CUDA is available and use it
    if torch.cuda.is_available():
        pipeline.to(torch.device("cuda"))
        print("Using CUDA for Pyannote diarization")
    
    diarization_result = pipeline(audio_path)
    
    # Process diarization and transcription results
    final_result = words_per_segment(transcription_result, diarization_result)
    
    combined_text = combine_metadata_transcription_diarization(metadata, transcription_result, final_result)
    
    document = Document(page_content=combined_text, metadata=metadata)
    
    # Clean up files
    try:
        os.remove(video_path)
        os.remove(audio_path)
    except Exception as e:
        print(f"Error deleting files: {e}")
    
    return document


# Processing video

In [7]:
# Example transcription usage with Warhammer video
url = "https://www.youtube.com/watch?v=N3rdQazLZPA"
document = create_transcription_from_video(url)
if document:
    print("Document created successfully:")
    print(document)
else:
    print("Failed to create document")

[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out UTF-8 (No VT), error UTF-8 (No VT), screen UTF-8 (No VT)
[debug] yt-dlp version stable@2024.07.09 from yt-dlp/yt-dlp [7ead7332a] (pip) API
[debug] params: {'format': 'mp4', 'outtmpl': 'downloads/video.%(ext)s', 'verbose': True, 'compat_opts': set(), 'http_headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', 'Sec-Fetch-Mode': 'navigate'}}
[debug] Python 3.9.19 (CPython AMD64 64bit) - Windows-10-10.0.19045-SP0 (OpenSSL 3.3.1 4 Jun 2024)
[debug] exe versions: ffmpeg 7.0.1 (setts), ffprobe 7.0.1
[debug] Optional libraries: Cryptodome-3.20.0, brotli-1.0.9, certifi-2024.07.04, mutagen-1.47.0, requests-2.32.3, sqlite3-3.45.3, urllib3-1.26.19, websockets-12.0
[debug] Proxy map: {}
[debug] Request Handlers: urllib, requests, websock

[youtube] Extracting URL: https://www.youtube.com/watch?v=N3rdQazLZPA
[youtube] N3rdQazLZPA: Downloading webpage
[youtube] N3rdQazLZPA: Downloading ios player API JSON


[debug] Loading youtube-nsig.d60b0ef9 from cache
[debug] [youtube] Decrypted nsig Zd0ydtIKeLOA8oOf => BzqFtkbsUjr6Fg
[debug] Loading youtube-nsig.d60b0ef9 from cache
[debug] [youtube] Decrypted nsig _ZO0p_Rqc3R14sAC => 1xDOLi_QO-BfpA


[youtube] N3rdQazLZPA: Downloading m3u8 information


[debug] Sort order given by extractor: quality, res, fps, hdr:12, source, vcodec:vp9.2, channels, acodec, lang, proto
[debug] Formats sorted by: hasvid, ie_pref, quality, res, fps, hdr:12(7), source, vcodec:vp9.2(10), channels, acodec, lang, proto, size, br, asr, vext, aext, hasaud, id


[info] N3rdQazLZPA: Downloading 1 format(s): 18


[debug] Invoking http downloader on "https://rr1---sn-h5nhv8pa-h5qs.googlevideo.com/videoplayback?expire=1721429584&ei=8JmaZrScMf_Gp-oPlJK26Aw&ip=66.81.167.174&id=o-AKGGZ5_kkhWgp0f0B0WUwdl2gZ-zbLm2qBF8emxtnSoI&itag=18&source=youtube&requiressl=yes&xpc=EgVo2aDSNQ%3D%3D&mh=2w&mm=31%2C29&mn=sn-h5nhv8pa-h5qs%2Csn-h5q7knee&ms=au%2Crdu&mv=m&mvi=1&pl=21&pcm2=no&initcwndbps=1055000&bui=AXc671L-EdQDwdjahKNLLqDrQstgB49670ZVHTFIYqbGFvDRgSvoet6gUoD2U5QkElcVGNVdfhX53BKw&spc=NO7bAUlA8qIWTn_IUKW24WBkVxjrUWnJLiePyjMW2SGuZ5RC4sbIG3DuUppx07Y&vprv=1&svpuc=1&mime=video%2Fmp4&ns=QoERRQeLcz3Q6tFRZlrWyjIQ&rqh=1&gir=yes&clen=41267042&ratebypass=yes&dur=2003.928&lmt=1719559448634554&mt=1721407734&fvip=1&c=WEB&sefc=1&txp=5438434&n=BzqFtkbsUjr6Fg&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cxpc%2Cpcm2%2Cbui%2Cspc%2Cvprv%2Csvpuc%2Cmime%2Cns%2Crqh%2Cgir%2Cclen%2Cratebypass%2Cdur%2Clmt&sig=AJfQdSswRQIgPl7p7U7ft9wEGa6PWrk_ibVDunwBohrGbat_EPzDkYECIQCD9ssCa1VFnj5xRtuDcSB1gsZQFABnv1B3pOKzLth9mw%3D%3D&ls

[download] Destination: downloads\video.mp4
[download] 100% of   39.36MiB in 00:00:03 at 12.06MiB/s    
Video title: Every Unreleased Codex in 10th Edition and What Models They Should Get
Downloaded video saved to: C:\Users\Pedro\Documents\Ironhack\NinthWeek\Project_3\project-3-business-case-multimodal-ai-chatbot-for-yt-video-qa\notebooks\downloads\video_1.mp4
Extracting audio to: C:\Users\Pedro\Documents\Ironhack\NinthWeek\Project_3\project-3-business-case-multimodal-ai-chatbot-for-yt-video-qa\notebooks\downloads\video_1.wav
MoviePy - Writing audio in C:\Users\Pedro\Documents\Ironhack\NinthWeek\Project_3\project-3-business-case-multimodal-ai-chatbot-for-yt-video-qa\notebooks\downloads\video_1.wav


                                                                        

MoviePy - Done.
Transcribing audio from: C:\Users\Pedro\Documents\Ironhack\NinthWeek\Project_3\project-3-business-case-multimodal-ai-chatbot-for-yt-video-qa\notebooks\downloads\video_1.wav
Transcription completed.
Document created successfully:
page_content='Metadata:
title: Every Unreleased Codex in 10th Edition and What Models They Should Get
id: N3rdQazLZPA
duration: 2004
upload_date: 20240627
uploader: Auspex Tactics
uploader_id: @auspextactics
view_count: 66292
like_count: 1984
dislike_count: Empty
average_rating: Empty
age_limit: 0
categories: Gaming
tags: warhammer 40k, 10th edition, codex, miniature, releases, predictions, warhammer, 40K, news, rumours, leaks, reveal, warhammer community, games workshop, review, preview, prices, release date, warhammer 40000, space marines, GW
ext: mp4
thumbnail: https://i.ytimg.com/vi_webp/N3rdQazLZPA/maxresdefault.webp
description: Let's talk miniature releases for the Warhammer 40K armies, what models are they likely to receive, and what do 

In [8]:
# Example diarization usage with meme rpg video
url = "https://www.youtube.com/watch?v=vr_FPbAHids"
document = create_diarization_from_video(url)
if document:
    print("Document created successfully:")
    print(document)
else:
    print("Failed to create document")

[debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out UTF-8 (No VT), error UTF-8 (No VT), screen UTF-8 (No VT)
[debug] yt-dlp version stable@2024.07.09 from yt-dlp/yt-dlp [7ead7332a] (pip) API
[debug] params: {'format': 'mp4', 'outtmpl': 'downloads/video.%(ext)s', 'verbose': True, 'compat_opts': set(), 'http_headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', 'Sec-Fetch-Mode': 'navigate'}}
[debug] Python 3.9.19 (CPython AMD64 64bit) - Windows-10-10.0.19045-SP0 (OpenSSL 3.3.1 4 Jun 2024)
[debug] exe versions: ffmpeg 7.0.1 (setts), ffprobe 7.0.1
[debug] Optional libraries: Cryptodome-3.20.0, brotli-1.0.9, certifi-2024.07.04, mutagen-1.47.0, requests-2.32.3, sqlite3-3.45.3, urllib3-1.26.19, websockets-12.0
[debug] Proxy map: {}
[debug] Request Handlers: urllib, requests, websock

[youtube] Extracting URL: https://www.youtube.com/watch?v=vr_FPbAHids
[youtube] vr_FPbAHids: Downloading webpage
[youtube] vr_FPbAHids: Downloading ios player API JSON


[debug] Loading youtube-nsig.d60b0ef9 from cache
[debug] [youtube] Decrypted nsig a9DvKvf6-01kOs-6 => 1D5QvNukY_j8gQ
[debug] Loading youtube-nsig.d60b0ef9 from cache
[debug] [youtube] Decrypted nsig HmsLKANqMlhkzjW8 => wRTKMaJKIUA42A


[youtube] vr_FPbAHids: Downloading m3u8 information


[debug] Sort order given by extractor: quality, res, fps, hdr:12, source, vcodec:vp9.2, channels, acodec, lang, proto
[debug] Formats sorted by: hasvid, ie_pref, quality, res, fps, hdr:12(7), source, vcodec:vp9.2(10), channels, acodec, lang, proto, size, br, asr, vext, aext, hasaud, id


[info] vr_FPbAHids: Downloading 1 format(s): 18


[debug] Invoking http downloader on "https://rr5---sn-h5nhv8pa-h5qe.googlevideo.com/videoplayback?expire=1721429648&ei=MJqaZqisKsfcmLAP88aT8As&ip=66.81.167.174&id=o-ALca83-dKDGOYrRjgUDB1r2pXuKUijs82DnnONWRMlBT&itag=18&source=youtube&requiressl=yes&xpc=EgVo2aDSNQ%3D%3D&mh=mw&mm=31%2C29&mn=sn-h5nhv8pa-h5qe%2Csn-h5qzen7l&ms=au%2Crdu&mv=m&mvi=5&pcm2cms=yes&pl=21&initcwndbps=1153750&bui=AXc671LVMl5xQ4S_HBJRa5NZT4IoCptCVYt0TPY8rjOD8tnujohKb0HUMcGU2BigWHEaJvE9fRkQKt8L&spc=NO7bAXRv6SVTsMfZfsB16ZaDWQxmvC3UacVggFtEXUrfgyKWC6BrL1tXVbLPeAE&vprv=1&svpuc=1&mime=video%2Fmp4&ns=nfUcqVLbbkgnUUKh512wB5UQ&rqh=1&cnr=14&ratebypass=yes&dur=154.505&lmt=1699376532057842&mt=1721407734&fvip=5&c=WEB&sefc=1&txp=5538434&n=1D5QvNukY_j8gQ&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cxpc%2Cbui%2Cspc%2Cvprv%2Csvpuc%2Cmime%2Cns%2Crqh%2Ccnr%2Cratebypass%2Cdur%2Clmt&sig=AJfQdSswRQIhAMhuZqN_zpEkXg_I43QQFGM7hrP3TuTw_F1Rln4XlY7SAiAUmGlhUIifrGDuZaqV9agHH6o0OCbekwXgix-BLs8Nug%3D%3D&lsparams=mh%2Cmm%2Cmn%2Cms%2

[download] Destination: downloads\video.mp4
[download] 100% of    8.60MiB in 00:00:00 at 12.70MiB/s    
Video title: When the healer thinks they're DPS
Downloaded video saved to: C:\Users\Pedro\Documents\Ironhack\NinthWeek\Project_3\project-3-business-case-multimodal-ai-chatbot-for-yt-video-qa\notebooks\downloads\video_1.mp4
Extracting audio to: C:\Users\Pedro\Documents\Ironhack\NinthWeek\Project_3\project-3-business-case-multimodal-ai-chatbot-for-yt-video-qa\notebooks\downloads\video_1.wav
MoviePy - Writing audio in C:\Users\Pedro\Documents\Ironhack\NinthWeek\Project_3\project-3-business-case-multimodal-ai-chatbot-for-yt-video-qa\notebooks\downloads\video_1.wav


                                                                      

MoviePy - Done.
Transcribing audio from: C:\Users\Pedro\Documents\Ironhack\NinthWeek\Project_3\project-3-business-case-multimodal-ai-chatbot-for-yt-video-qa\notebooks\downloads\video_1.wav
Transcription completed.
Using CUDA for Pyannote diarization
Document created successfully:
page_content='Metadata:
title: When the healer thinks they're DPS
id: vr_FPbAHids
duration: 155
upload_date: 20220803
uploader: Viva La Dirt League
uploader_id: @VivaLaDirtLeague
view_count: 1964202
like_count: 99906
dislike_count: Empty
average_rating: Empty
age_limit: 0
categories: Gaming
tags: NPC Man, PUBG Logic, pubg real life, pubg, pubg logic supercut, pubg movie, pubg parody, Viva La Dirt League, vldl, epic npc man, pubg mobile, pubg skits, viva la dirt league pubg, pubg funny, pubg live action, vldl pubg, gaming, online gaming, gamer, playtech, bored, game logic
ext: mp4
thumbnail: https://i.ytimg.com/vi/vr_FPbAHids/maxresdefault.jpg
description: Plz healers... stop acting like you're DPS

SUPPORT US 

# Agents

In [9]:
# Add document to the vector database

try:
    vectorstore = Chroma.from_documents(documents=[document], embedding=OpenAIEmbeddings())
except Exception as e:
    print(f"Error creating vectorstore: {e}")
    exit()

retriever = vectorstore.as_retriever()


In [10]:
llm = ChatOpenAI(model="gpt-4o")

In [11]:
# retrieval qa chain
#from langchain.chains import SimpleChain

try:
    qa = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever
    )

except Exception as e:
    print(f"Error initializing QA chains: {e}")
    exit()

In [14]:
# First we initialize the model we want to use.
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4o", temperature=0)


In [15]:
from langgraph.checkpoint import MemorySaver

memory = MemorySaver()

In [16]:
#simplified tools, use this for now
from langchain_core.tools import tool
tools = [
    Tool(
        name='video_transcript_retriever',
        func=qa.run,
        description=(
            'Searches and returns excerpts from the transcript of the user uploaded video.'
        )
    ),
]

prompt = '''You are a chatbot that answers questions and perform tasks about a video that the user provides. If lacking context, assume the user is always talking about the video.
    You must always identify the language the user is utilizing in their last message and answer in that language, unless the user tells you otherwise. 
    If the user asks for a transcription, provide it even if it's long by iteratively chunking the request. Consider that the transcription might not be perfect everytime.
    Use more steps when needed in order to get the right answer.
    Consider if you need to use your tools. '''

In [17]:
from langgraph.prebuilt import create_react_agent
agent = create_react_agent(model, tools=tools, messages_modifier=prompt, checkpointer=memory)
config = {"configurable": {"thread_id": "5"}}

In [18]:
def print_stream(stream):
    for s in stream:
        message = s["messages"][-1]
        if isinstance(message, tuple):
            print(message)
        else:
            message.pretty_print()

In [19]:
query = "What is the healer's punchline?"
inputs = {"messages": [("user", query)]}
print_stream(agent.stream(inputs, config=config, stream_mode="values"))


What is the healer's punchline?
Tool Calls:
  video_transcript_retriever (call_4FVnz6nr7jrJJTCV70O3zLjN)
 Call ID: call_4FVnz6nr7jrJJTCV70O3zLjN
  Args:
    __arg1: healer's punchline


Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


Name: video_transcript_retriever

"How about you just not get hit by the boss so much?"

The healer's punchline is: "How about you just not get hit by the boss so much?"


In [19]:
config = {"configurable": {"thread_id": "2"}}

In [20]:
query = "Quién subió el vídeo?"
inputs = {"messages": [("user", query)]}
agent.invoke(inputs, config=config, stream_mode="values")

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


{'messages': [HumanMessage(content='Quién subió el vídeo?', id='cc992821-d6de-4ed9-a707-c02ebe8a36c1'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_HdLZXxq7EIkiClh6bFrXuZVG', 'function': {'arguments': '{"__arg1":"Quién subió el vídeo?"}', 'name': 'video_transcript_retriever'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 25, 'prompt_tokens': 204, 'total_tokens': 229}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_c4e5b6fa31', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-c10f1474-bf3e-43c3-a776-167ff0b26f3e-0', tool_calls=[{'name': 'video_transcript_retriever', 'args': {'__arg1': 'Quién subió el vídeo?'}, 'id': 'call_HdLZXxq7EIkiClh6bFrXuZVG', 'type': 'tool_call'}], usage_metadata={'input_tokens': 204, 'output_tokens': 25, 'total_tokens': 229}),
  ToolMessage(content='El vídeo fue subido por Auspex Tactics.', name='video_transcript_retriever', id='a58ceebd-537e-4378-a791-8e3ee8a85602', tool_call_id=

# Evaluation

We are going to build 2 evaluation sets: for Q&A and for summarization, 2 of the main tasks the model is going to execute. They have to be oriented for RAG

In order to get them right, we will use LangSmith https://docs.smith.langchain.com/tutorials/Developers/rag#evaluating-intermediate-steps and DeepEval https://docs.confident-ai.com/docs/guides-rag-evaluation

We first need to create our datasets

In [22]:
config = {"configurable": {"thread_id": "3"}}

In [27]:
### Dataset name
dataset_name = "Video_Test"

client = Client()

# Define dataset: these are your test cases
dataset = client.create_dataset(dataset_name)

LangSmithConflictError: Conflict for /datasets. HTTPError('409 Client Error: Conflict for url: https://api.smith.langchain.com/datasets', '{"detail":"Dataset with this name already exists."}')

In [None]:
client.create_examples(
    inputs=[
        {"input_question": "Hello"},
        {"input_question": "What is the video about?"},
        {"input_question": "What was the last miniature that was released for the Thousand Sons?"},
        {"input_question": "What is the most likely miniature to be released for Imperial Agents?"},
        {"input_question": "What are the Space Wolves units that are less in need of an update?"},
        {"input_question": "What can we expect the Black Templars to get if we are very optimistic?"},
        {"input_question": "What is a common practice from Games Workshop when releasing a new codex?"},
    ],
    outputs=[
        {"output_answer": "Hello, how can I help you?."},
        {"output_answer": "The video is about all the Warhammer 40K factions that are still missing a Codex in 10th edition and what novelties will the codexes bring when they are released, focusing on new possible miniatures."},
        {"output_answer": "The Infernal Master."},
        {"output_answer": "Inquisitor Coteaz."},
        {"output_answer": "Wulfen and Thunderwolves."},
        {"output_answer": "Some themed Terminators."},
        {"output_answer": "They release at least one miniature, usually one or two characters and often releasing Battleforces."},
    ],
    dataset_id=dataset.id,
)

In [31]:
def extract_final_answer(messages):
    """
    Extracts the content of the last AIMessage from the messages.
    
    Args:
    messages (list): List of message dictionaries containing messages from human, AI, and tools.
    
    Returns:
    str: The content of the last AIMessage.
    """
    # Iterate over the messages in reverse order to find the last AIMessage
    for message in reversed(messages):
        # Check if the message is an instance of AIMessage
        if isinstance(message, AIMessage):
            return message.content
    return ''


def predict_rag_answer(example: dict):
    #Use this for answer evaluation
    query = example["input_question"]
    # Format inputs properly
    inputs = {"messages": [{"role": "user", "content": query}]}
    answer = agent.invoke(inputs, config=config, stream_mode="values")
    
    if 'messages' in answer:
        response = extract_final_answer(answer['messages'])
    else:
        response = "No valid response found."
    
    return {"answer": response}

In [33]:
#Implement this if there is time, for evaluating correctly the retrieved documents and hallucinations.

"""
#Implement this if there is time, for evaluating correctly the retrieved documents and hallucinations.
def predict_rag_answer_with_context(example: dict):
    #Use this for answer evaluation
    query = example["input_question"]
    # Format inputs properly
    inputs = {"messages": [{"role": "user", "content": query}]}
    answer = agent.invoke(inputs, config=config, stream_mode="values")
    
    if 'messages' in answer:
        response = extract_final_answer(answer['messages'])
    else:
        response = "No valid response found."
    
    return {"answer": response}
"""

'\n#Implement this if there is time, for evaluating correctly the retrieved documents and hallucinations.\ndef predict_rag_answer_with_context(example: dict):\n    #Use this for answer evaluation\n    query = example["input_question"]\n    # Format inputs properly\n    inputs = {"messages": [{"role": "user", "content": query}]}\n    answer = agent.invoke(inputs, config=config, stream_mode="values")\n    \n    if \'messages\' in answer:\n        response = extract_final_answer(answer[\'messages\'])\n    else:\n        response = "No valid response found."\n    \n    return {"answer": response}\n'

### Response vs reference answer

In [77]:
# Grade prompt
grade_prompt_answer_accuracy = hub.pull("langchain-ai/rag-answer-vs-reference")

def answer_evaluator(run, example) -> dict:
    """
    A simple evaluator for RAG answer accuracy
    """
    # Access example correctly
    input_question = example.inputs["input_question"]
    reference = example.outputs["output_answer"]
    prediction = run.outputs["answer"]

    # LLM grader
    llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)

    # Structured prompt
    answer_grader = grade_prompt_answer_accuracy | llm

    # Run evaluator
    score = answer_grader.invoke({
        "question": input_question,
        "correct_answer": reference,
        "student_answer": prediction
    })
    score = score["Score"]

    return {"key": "answer_v_reference_score", "score": score}

### Response vs input

In [78]:
# Grade prompt
grade_prompt_answer_helpfulness = prompt = hub.pull("langchain-ai/rag-answer-helpfulness")

def answer_helpfulness_evaluator(run, example) -> dict:
    """
    A simple evaluator for RAG answer helpfulness
    """

    # Get question, ground truth answer, RAG chain answer
    input_question = example.inputs["input_question"]
    prediction = run.outputs["answer"]

    # LLM grader
    llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)

    # Structured prompt
    answer_grader = grade_prompt_answer_helpfulness | llm

    # Run evaluator
    score = answer_grader.invoke({"question": input_question,
                                  "student_answer": prediction})
    score = score["Score"]

    return {"key": "answer_helpfulness_score", "score": score}

### Response vs retrieved docs

In [79]:
# Prompt
grade_prompt_hallucinations = hub.pull("langchain-ai/rag-answer-hallucination")

def answer_hallucination_evaluator(run, example) -> dict:
    """
    A simple evaluator for generation hallucination
    """

    # RAG inputs
    input_question = example.inputs["input_question"]
    contexts = run.outputs.get("contexts", [])

    # RAG answer
    prediction = run.outputs.get("answer", "No valid response found.")

    # LLM grader
    llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)

    # Structured prompt
    answer_grader = grade_prompt_hallucinations | llm

    # Get score
    score = answer_grader.invoke({"documents": contexts,
                                  "student_answer": prediction})
    score = score["Score"]

    return {"key": "answer_hallucination", "score": score}


### Retrieved docs vs input

In [80]:
# Grade prompt
grade_prompt_doc_relevance = hub.pull("langchain-ai/rag-document-relevance")

def docs_relevance_evaluator(run, example) -> dict:
    """
    A simple evaluator for document relevance
    """

    # RAG inputs
    input_question = example.inputs["input_question"]
    contexts = run.outputs.get("contexts", [])

    # LLM grader
    llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)

    # Structured prompt
    answer_grader = grade_prompt_doc_relevance | llm

    # Get score
    score = answer_grader.invoke({"question":input_question,
                                  "documents":contexts})
    score = score["Score"]

    return {"key": "document_relevance", "score": score}

## Run evaluators

In [81]:
experiment_results = evaluate(
    predict_rag_answer,
    data=dataset_name,
    evaluators=[
        answer_evaluator,
        answer_helpfulness_evaluator,
        answer_hallucination_evaluator,
        docs_relevance_evaluator
    ],
    experiment_prefix="Full_final_test",
    metadata={"version": "Video_final_test, ChatMistralAI"},
)

View the evaluation results for experiment: 'Full_final_test-fa05f3ce' at:
https://smith.langchain.com/o/7a212ba4-5b5a-5574-8675-8c191876cf10/datasets/6c5bcb88-b355-4d61-a3f2-751d1d1fddcd/compare?selectedSessions=90b5d39b-c3fb-4218-9fa8-b70b93b082c1




0it [00:00, ?it/s]

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1
Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1
Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1
