In [1]:
import os
import time
from dotenv import load_dotenv

import yt_dlp
from moviepy.editor import VideoFileClip
import whisper
from pyannote.audio import Pipeline
import torch
import ffmpeg
from PIL import Image
import base64
from io import BytesIO 

from langchain_openai import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.schema import Document

from langchain.chains import create_retrieval_chain, create_history_aware_retriever, RetrievalQA
from langgraph.checkpoint import MemorySaver
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, ToolMessage

from langchain.tools.retriever import create_retriever_tool
from langchain.agents import Tool

from langgraph.prebuilt import create_react_agent

import ipywidgets as widgets
from IPython.display import display
import cv2
from transformers import pipeline

from pytube import YouTube
import requests
from openai import OpenAI


In [2]:
# Load environment variables
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
LANGCHAIN_API_KEY = os.getenv('LANGCHAIN_API_KEY')
ELEVEN_API_KEY = os.getenv('ELEVEN_API_KEY')
HF_TOKEN = os.getenv('HF_TOKEN')

if not OPENAI_API_KEY:
    raise ValueError("OPENAI_API_KEY environment variable not set")
elif not LANGCHAIN_API_KEY:
    raise ValueError("LANGCHAIN_API_KEY environment variable not set")

In [3]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Ironhack_Project3"
os.environ["LANGCHAIN_ENDPOINT"]="https://api.smith.langchain.com"

# Set up directories
os.makedirs('uploads', exist_ok=True)
os.makedirs('downloads', exist_ok=True)

In [4]:
# Function to handle file uploads
# Function to handle file uploads
# Helper function to encode image in base64
# Helper function to encode image in base64
def encode_image(image):
    buffered = BytesIO()
    image.save(buffered, format="JPEG")
    return base64.b64encode(buffered.getvalue()).decode('utf-8')

# Function to handle file uploads
def handle_file_upload():
    uploader = widgets.FileUpload(accept='video/*', multiple=False)
    display(uploader)
    return uploader

# Function to save uploaded file
def save_uploaded_file(file):
    start_time = time.time()
    if file.value:
        for file_info in file.value:
            filename = file_info['name']
            content = file_info['content']
            print(f"Filename: {filename}")
            file_path = os.path.join('uploads', filename)
            with open(file_path, 'wb') as f:
                f.write(content)
            end_time = time.time()
            duration = end_time - start_time
            print(f"File {filename} uploaded successfully to {file_path} in {duration:.2f} seconds")
            return file_path, filename, duration
    else:
        print("No file uploaded.")
        return None, None, 0

# Function to extract metadata using ffmpeg
def extract_metadata_ffmpeg(video_path):
    try:
        probe = ffmpeg.probe(video_path)
        video_info = next(stream for stream in probe['streams'] if stream['codec_type'] == 'video')
        metadata = {
            "duration": float(video_info['duration']),
            "width": int(video_info['width']),
            "height": int(video_info['height']),
            "codec_name": video_info['codec_name'],
            "bit_rate": int(video_info['bit_rate'])
        }
        print(f"Extracted metadata: {metadata}")
        return metadata
    except Exception as e:
        print(f"Error extracting metadata with ffmpeg: {e}")
        return None

# Function to download video from YouTube
def download_youtube_video(url):
    start_time = time.time()
    ydl_opts = {
        'format': 'mp4',
        'outtmpl': 'downloads/video.%(ext)s',
        'verbose': True,
    }
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info_dict = ydl.extract_info(url, download=True)
            
            metadata = {
                "title": info_dict.get('title', 'video'),
                "id": info_dict.get('id'),
                "duration": info_dict.get('duration'),
                "upload_date": info_dict.get('upload_date'),
                "uploader": info_dict.get('uploader'),
                "uploader_id": info_dict.get('uploader_id'),
                "view_count": info_dict.get('view_count'),
                "like_count": info_dict.get('like_count'),
                "dislike_count": info_dict.get('dislike_count'),
                "average_rating": info_dict.get('average_rating'),
                "age_limit": info_dict.get('age_limit'),
                "categories": ", ".join(info_dict.get('categories', [])),
                "tags": ", ".join(info_dict.get('tags', [])),
                "ext": info_dict.get('ext'),
                "thumbnail": info_dict.get('thumbnail'),
                "description": info_dict.get('description'),
                "channel": info_dict.get('channel'),
                "channel_id": info_dict.get('channel_id'),
                "is_live": info_dict.get('is_live'),
                "release_date": info_dict.get('release_date'),
                "availability": info_dict.get('availability')
            }
            
            for key, value in metadata.items():
                if value is None:
                    metadata[key] = "Empty"
            
            print(f"Video title: {metadata['title']}")

            video_ext = metadata['ext']
            initial_path = os.path.abspath(f'downloads/video.{video_ext}')
            if not os.path.isfile(initial_path):
                raise FileNotFoundError(f"Downloaded video file not found: {initial_path}")

            counter = 1
            final_path = os.path.abspath(f'downloads/video_{counter}.{video_ext}')
            while os.path.isfile(final_path):
                counter += 1
                final_path = os.path.abspath(f'downloads/video_{counter}.{video_ext}')

            os.rename(initial_path, final_path)
            print(f"Downloaded video saved to: {final_path}")

            end_time = time.time()
            duration = end_time - start_time
            print(f"Time taken for downloading video: {duration:.2f} seconds")
            
            return final_path, metadata, duration
    except Exception as e:
        print(f"Error downloading video: {e}")
        return None, None, 0

# Function to extract audio from video
def extract_audio(video_path):
    start_time = time.time()
    try:
        video = VideoFileClip(video_path)
        audio_path = video_path.replace('.mp4', '.wav')
        audio_path = os.path.abspath(audio_path)
        print(f"Extracting audio to: {audio_path}")
        video.audio.write_audiofile(audio_path)
        if not os.path.isfile(audio_path):
            raise FileNotFoundError(f"Extracted audio file not found: {audio_path}")
        end_time = time.time()
        duration = end_time - start_time
        print(f"Audio extracted in {duration:.2f} seconds")
        return audio_path, duration
    except Exception as e:
        print(f"Error extracting audio: {e}")
        return None, 0

# Function to select Whisper model based on duration and mode
def select_whisper_model(duration, mode='Fast'):
    if mode == "Accurate":
        if duration > 3600:  # More than 1 hour
            model_name = "tiny"
        elif duration > 1800:  # More than 30 minutes
            model_name = "base"
        elif duration > 600:  # More than 10 minutes
            model_name = "small"
        else:  # 10 minutes or less
            model_name = "medium"
    else:  # Fast mode
        if duration > 1800:  # More than 30 minutes
            model_name = "tiny"
        elif duration > 600:  # More than 10 minutes
            model_name = "base"
        else:  # 10 minutes or less
            model_name = "small"
    
    print(f"Selected Whisper model: {model_name}")
    return model_name

# Function to transcribe audio
def transcribe_audio(audio_path, duration, mode='Fast'):
    start_time = time.time()
    try:
        print(f"Transcribing audio from: {audio_path}")
        if not os.path.isfile(audio_path):
            raise FileNotFoundError(f"Audio file not found: {audio_path}")

        model_name = select_whisper_model(duration, mode)
        model = whisper.load_model(model_name)
        
        # Check if CUDA is available and use it
        if torch.cuda.is_available():
            model = model.to("cuda")
            print("Using CUDA for Whisper transcription")
        
        result = model.transcribe(audio_path)
        end_time = time.time()
        transcription_duration = end_time - start_time
        print(f"Transcription completed in {transcription_duration:.2f} seconds.")
        return result['text'], transcription_duration
    except Exception as e:
        print(f"Error transcribing audio: {e}")
        return "", 0

# Function to transcribe audio with timestamps
def transcribe_audio_with_timestamps(audio_path, duration, mode='Fast'):
    start_time = time.time()
    try:
        print(f"Transcribing audio from: {audio_path}")
        if not os.path.isfile(audio_path):
            raise FileNotFoundError(f"Audio file not found: {audio_path}")

        model_name = select_whisper_model(duration, mode)
        model = whisper.load_model(model_name)
        
        # Check if CUDA is available and use it
        if torch.cuda.is_available():
            model = model.to("cuda")
            print("Using CUDA for Whisper transcription")
        
        result = model.transcribe(audio_path, word_timestamps=True)
        end_time = time.time()
        transcription_duration = end_time - start_time
        print(f"Transcription with timestamps completed in {transcription_duration:.2f} seconds.")
        return result, transcription_duration
    except Exception as e:
        print(f"Error transcribing audio: {e}")
        return {}, 0

# Function to combine metadata and transcription
def combine_metadata_and_transcription(metadata, transcription):
    combined_text = "Metadata:\n"
    for key, value in metadata.items():
        combined_text += f"{key}: {value}\n"
    combined_text += "\nTranscription:\n" + transcription
    return combined_text

# Function to combine metadata, transcription, and diarization
def combine_metadata_transcription_diarization(metadata, transcription, diarization):
    combined_text = "Metadata:\n"
    for key, value in metadata.items():
        combined_text += f"{key}: {value}\n"
    combined_text += "\nDiarization:\n"
    for segment in diarization.itertracks(yield_label=True):
        speaker = segment[2]
        start_time = segment[0].start
        end_time = segment[0].end
        combined_text += f"Speaker {speaker} [{start_time:.2f} - {end_time:.2f}]: {transcription['segments'][0]['text']}\n"
    return combined_text

# Function to perform diarization
def perform_diarization(audio_path, duration, mode='Fast'):
    start_time = time.time()
    try:
        pipeline = pipeline("speaker-diarization")
        
        # Check if CUDA is available and use it
        if torch.cuda.is_available():
            pipeline.to(torch.device("cuda"))
            print("Using CUDA for Pyannote diarization")
        
        diarization_result = pipeline(audio_path)
        end_time = time.time()
        diarization_duration = end_time - start_time
        print(f"Diarization completed in {diarization_duration:.2f} seconds.")
        return diarization_result, diarization_duration
    except Exception as e:
        print(f"Error during diarization: {e}")
        return None, 0

# Function to extract frames from video
def extract_frames(video_path, num_frames=10):
    video_capture = cv2.VideoCapture(video_path)
    total_frames = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_interval = total_frames // num_frames
    
    frames = []
    for i in range(0, total_frames, frame_interval):
        video_capture.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = video_capture.read()
        if ret:
            timestamp = video_capture.get(cv2.CAP_PROP_POS_MSEC) / 1000
            frames.append((frame, timestamp))
    
    video_capture.release()
    return frames

# Function to get text summaries from frames using GPT-4o-mini
def summarize_frames(frames, api_key):
    summaries = []
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    for frame, timestamp in frames:
        image = Image.fromarray(frame)
        base64_image = encode_image(image)
        payload = {
            "model": "gpt-4o-mini",
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "What’s in this image?"},
                        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
                    ]
                }
            ],
            "max_tokens": 300
        }
        response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
        response_json = response.json()
        if 'choices' in response_json:
            summary = response_json["choices"][0]["message"]["content"]
            summaries.append((summary, timestamp))
        else:
            print(f"Error in API response: {response_json}")
            summaries.append((f"Error: {response_json.get('error', 'Unknown error')}", timestamp))
    return summaries

# Main function to process video
def process_video(source_type, source, mode='Fast', process_type='Transcription', include_cv=False, api_key=None):
    timings = {}
    total_start_time = time.time()
    
    if source_type == 'upload':
        video_path, filename, upload_duration = save_uploaded_file(source)
        metadata = extract_metadata_ffmpeg(video_path)
        timings['upload'] = upload_duration
    elif source_type == 'youtube':
        video_path, metadata, download_duration = download_youtube_video(source)
        timings['download'] = download_duration
    else:
        print("Invalid source type.")
        return None, None
    
    if not video_path or not metadata:
        print("Failed to get video or metadata.")
        return None, None
    
    audio_path, extract_duration = extract_audio(video_path)
    if not audio_path:
        print("Failed to extract audio.")
        return None, None
    timings['extract_audio'] = extract_duration
    
    transcription_result, transcribe_duration = None, 0
    diarization_result, diarization_duration = None, 0
    if process_type == 'Transcription':
        transcription_result, transcribe_duration = transcribe_audio(audio_path, metadata['duration'], mode)
        if not transcription_result:
            print("Failed to transcribe audio.")
            return None, None
        timings['transcribe_audio'] = transcribe_duration
    elif process_type == 'Diarization':
        transcription_result, transcribe_duration = transcribe_audio_with_timestamps(audio_path, metadata['duration'], mode)
        if not transcription_result:
            print("Failed to transcribe audio with timestamps.")
            return None, None
        timings['transcribe_audio_with_timestamps'] = transcribe_duration
        
        diarization_result, diarization_duration = perform_diarization(audio_path, metadata['duration'], mode)
        if not diarization_result:
            print("Failed to perform diarization.")
            return None, None
        timings['diarization'] = diarization_duration
    
    visual_summaries = []
    if include_cv and api_key:
        frames = extract_frames(video_path)
        visual_summaries = summarize_frames(frames, api_key)
    
    combined_text = ""
    if process_type == 'Transcription':
        combined_text = combine_metadata_and_transcription(metadata, transcription_result)
    elif process_type == 'Diarization':
        combined_text = combine_metadata_transcription_diarization(metadata, transcription_result, diarization_result)
    
    total_end_time = time.time()
    timings['total_processing'] = total_end_time - total_start_time
    
    print("Timings:", timings)
    
    document = Document(page_content=combined_text, metadata=metadata)
    
    visual_document = None
    if include_cv and visual_summaries:
        visual_combined_text = "\n".join([f"Time [{timestamp:.2f}]: {summary}" for summary, timestamp in visual_summaries])
        visual_document = Document(page_content=visual_combined_text, metadata=metadata)
    
        # Debugging: Print visual document contents
        print("Visual Document Content:", visual_document.page_content)
    
    # Clean up files
    try:
        os.remove(video_path)
        os.remove(audio_path)
    except Exception as e:
        print(f"Error deleting files: {e}")
    
    return document, visual_document

# Function to create vectorstore
def create_vectorstore(document):
    try:
        vectorstore = Chroma.from_documents(documents=[document], embedding=OpenAIEmbeddings())
        retriever = vectorstore.as_retriever()
        return retriever
    except Exception as e:
        print(f"Error creating vectorstore: {e}")
        return None

# Function to create visual vectorstore
def create_visual_vectorstore(visual_document):
    try:
        visual_vectorstore = Chroma.from_documents(documents=[visual_document], embedding=OpenAIEmbeddings())
        visual_retriever = visual_vectorstore.as_retriever()
        # Debugging: Validate vectorstore contents
        print("Visual Vectorstore Contents:", visual_vectorstore.similarity_search("What is in this image?", k=1))
        return visual_retriever
    except Exception as e:
        print(f"Error creating visual vectorstore: {e}")
        return None


In [5]:
# https://www.youtube.com/watch?v=vr_FPbAHids
process_button.on_click(on_process_button_clicked)
display(url_input, uploaded_file_widget, mode_selector, process_type_selector, process_button)

NameError: name 'process_button' is not defined

# Chatbot

In [7]:
class VideoChatbot:
    def __init__(self):
        self.retriever = None
        self.visual_retriever = None
        self.qa_chain = None
        self.visual_qa_chain = None
        self.memory = MemorySaver()
        self.model = ChatOpenAI(model="gpt-4o", temperature=0)
        self.prompt = '''You are a chatbot that answers questions and performs tasks about a video that the user provides. 
                         Never ask the user to provide a video without first checking if there is one already.
                         If lacking context, assume the user is always talking about the video.
                         First, consider which tools you need to use, if any.
                         When retrieving information, consider that the transcription might not be perfect every time.
                         Then, if relevant, try to identify speakers by their names or usernames, using their dialogue and considering the available metadata.
                         Then use more steps when needed in order to get the right answer. 
                         Finally, you must always identify the language the user is utilizing in their last message and answer in that language, unless the user tells you otherwise.
                      '''
        self.agent = None

    def initialize_qa_chain(self):
        llm = ChatOpenAI(model="gpt-4o")
        try:
            if self.retriever is not None:
                qa = RetrievalQA.from_chain_type(
                    llm=llm,
                    chain_type="stuff",
                    retriever=self.retriever
                )
                self.qa_chain = qa
                print("QA chain for transcript initialized successfully.")
            else:
                print("Transcript retriever not available.")
            
            if self.visual_retriever is not None:
                visual_qa = RetrievalQA.from_chain_type(
                    llm=llm,
                    chain_type="stuff",
                    retriever=self.visual_retriever
                )
                self.visual_qa_chain = visual_qa
                print("QA chain for visual summaries initialized successfully.")
            else:
                print("Visual retriever not available.")
        except Exception as e:
            print(f"Error initializing QA chains: {e}")
            self.qa_chain = None
            self.visual_qa_chain = None

    def create_agent(self):
        tools = []
        if self.qa_chain is not None:
            tools.append(
                Tool(
                    name='video_transcript_retriever',
                    func=self.qa_chain.run,
                    description=(
                        'Searches and returns excerpts from the transcript of the user uploaded video.'
                    )
                )
            )
        if self.visual_qa_chain is not None:
            tools.append(
                Tool(
                    name='video_visual_retriever',
                    func=self.visual_qa_chain.run,
                    description=(
                        'Searches and returns visual context from the visual summaries of the user uploaded video.'
                    )
                )
            )
        if tools:
            self.agent = create_react_agent(self.model, tools=tools, messages_modifier=self.prompt, checkpointer=self.memory)
            print("Agent created successfully.")
        else:
            print("No tools available for the agent.")

    def process_query(self, query):
        if not self.agent:
            print("Agent not initialized.")
            return

        inputs = {"messages": [("user", query)]}
        config = {"configurable": {"thread_id": "2"}}
        stream = self.agent.stream(inputs, config=config, stream_mode="values")
        for s in stream:
            message = s["messages"][-1]
            if isinstance(message, tuple):
                print(message)
            else:
                message.pretty_print()

# Create instance of the chatbot
chatbot = VideoChatbot()

# Example usage of the chatbot after processing the video
def on_process_button_clicked(b):
    if url_input.value:
        source_type = 'youtube'
        source = url_input.value
    elif uploaded_file_widget.value:
        source_type = 'upload'
        source = uploaded_file_widget
    else:
        print("Please provide a YouTube URL or upload a file.")
        return
    
    api_key = OPENAI_API_KEY  # Replace with your OpenAI API key
    document, visual_document = process_video(source_type, source, mode_selector.value, process_type_selector.value, include_cv_selector.value, api_key)
    if document:
        try:
            chatbot.retriever = create_vectorstore(document)
            if include_cv_selector.value and visual_document:
                chatbot.visual_retriever = create_visual_vectorstore(visual_document)
        except Exception as e:
            print(f"Error creating vectorstore: {e}")
            return
    
        if chatbot.retriever and (not include_cv_selector.value or chatbot.visual_retriever):
            print("Vectorstores created and retrievers initialized.")
            chatbot.initialize_qa_chain()
            chatbot.create_agent()
            
            # Test visual retriever independently
            if chatbot.visual_retriever:
                test_query = "What is in this image?"
                test_result = chatbot.visual_retriever.get_relevant_documents(test_query)
                print("Test Visual Retriever Result:", test_result)
            
            example_query()  # Run example query after agent creation
        else:
            print("Failed to create vectorstores.")
    else:
        print("Failed to process video.")

# Display widgets and process button
uploaded_file_widget = handle_file_upload()
url_input = widgets.Text(description="YouTube URL:")
mode_selector = widgets.Dropdown(options=["Fast", "Accurate"], description="Mode:")
process_type_selector = widgets.Dropdown(options=["Transcription", "Diarization"], description="Process Type:")
include_cv_selector = widgets.Checkbox(description="Include Computer Vision", value=False)
process_button = widgets.Button(description="Process Video")
process_button.on_click(on_process_button_clicked)
display(url_input, uploaded_file_widget, mode_selector, process_type_selector, include_cv_selector, process_button)

# Example query to the chatbot
def example_query():
    if chatbot.agent:
        query = "De qué va el vídeo?"
        chatbot.process_query(query)
    else:
        print("Agent not initialized.")

FileUpload(value=(), accept='video/*', description='Upload')

Text(value='', description='YouTube URL:')

FileUpload(value=(), accept='video/*', description='Upload')

Dropdown(description='Mode:', options=('Fast', 'Accurate'), value='Fast')

Dropdown(description='Process Type:', options=('Transcription', 'Diarization'), value='Transcription')

Checkbox(value=False, description='Include Computer Vision')

Button(description='Process Video', style=ButtonStyle())

In [8]:
query = "Qué aspecto tienen los personajes?"
chatbot.process_query(query)


Qué aspecto tienen los personajes?
Tool Calls:
  video_visual_retriever (call_AgkRrQDMmGpcLbfT4niSr9oE)
 Call ID: call_AgkRrQDMmGpcLbfT4niSr9oE
  Args:
    __arg1: personajes
Name: video_visual_retriever

Los personajes en el video "When the healer thinks they're DPS" de Viva La Dirt League son cuatro:

1. **Britt**: Es uno de los tanques del grupo, cuya tarea principal es absorber el daño del enemigo.
2. **Rowan**: Es el otro tanque del grupo, quien también se encarga de recibir el daño del enemigo.
3. **Alan**: Es el arquero del grupo, encargado de atacar al enemigo desde la distancia.
4. **Adam**: Es el sanador del grupo, quien debería enfocarse en curar a los compañeros de equipo, pero a veces se distrae intentando hacer daño.

Están involucrados en una discusión sobre la importancia de que Adam se concentre en sanar en lugar de intentar hacer daño.

Los personajes en el video "When the healer thinks they're DPS" de Viva La Dirt League son:

1. **Britt**: Es uno de los tanques del

# Evaluation

In [None]:
config = {"configurable": {"thread_id": "3"}}

In [None]:
from langchain import hub

In [None]:
### Dataset name
dataset_name = "Video_Test"
from langsmith import Client

client = Client()

# Define dataset: these are your test cases
dataset = client.create_dataset(dataset_name)

In [None]:
client.create_examples(
    inputs=[
        {"input_question": "Hello"},
        {"input_question": "What is the video about?"},
        {"input_question": "What was the last miniature that was released for the Thousand Sons?"},
        {"input_question": "What is the most likely miniature to be released for Imperial Agents?"},
        {"input_question": "What are the Space Wolves units that are less in need of an update?"},
        {"input_question": "What can we expect the Black Templars to get if we are very optimistic?"},
        {"input_question": "What is a common practice from Games Workshop when releasing a new codex?"},
    ],
    outputs=[
        {"output_answer": "Hello, how can I help you?."},
        {"output_answer": "The video is about all the Warhammer 40K factions that are still missing a Codex in 10th edition and what novelties will the codexes bring when they are released, focusing on new possible miniatures."},
        {"output_answer": "The Infernal Master."},
        {"output_answer": "Inquisitor Coteaz."},
        {"output_answer": "Wulfen and Thunderwolves."},
        {"output_answer": "Some themed Terminators."},
        {"output_answer": "They release at least one miniature, usually one or two characters and often releasing Battleforces."},
    ],
    dataset_id=dataset.id,
)

In [None]:
from langchain.schema import HumanMessage
import json


def extract_final_answer(messages):
    """
    Extracts the content of the last AIMessage from the messages.
    
    Args:
    messages (list): List of message dictionaries containing messages from human, AI, and tools.
    
    Returns:
    str: The content of the last AIMessage.
    """
    # Iterate over the messages in reverse order to find the last AIMessage
    for message in reversed(messages):
        # Check if the message is an instance of AIMessage
        if isinstance(message, AIMessage):
            return message.content
    return ''


def predict_rag_answer(example: dict):
    #Use this for answer evaluation
    query = example["input_question"]
    # Format inputs properly
    inputs = {"messages": [{"role": "user", "content": query}]}
    answer = agent.invoke(inputs, config=config, stream_mode="values")
    
    if 'messages' in answer:
        response = extract_final_answer(answer['messages'])
    else:
        response = "No valid response found."
    
    return {"answer": response}



In [None]:
#Implement this if there is time, for evaluating correctly the retrieved documents and hallucinations.

"""
#Implement this if there is time, for evaluating correctly the retrieved documents and hallucinations.
def predict_rag_answer_with_context(example: dict):
    #Use this for answer evaluation
    query = example["input_question"]
    # Format inputs properly
    inputs = {"messages": [{"role": "user", "content": query}]}
    answer = agent.invoke(inputs, config=config, stream_mode="values")
    
    if 'messages' in answer:
        response = extract_final_answer(answer['messages'])
    else:
        response = "No valid response found."
    
    return {"answer": response}
"""

### Response vs reference answer

In [None]:
from langchain import hub
from langchain_openai import ChatOpenAI

# Grade prompt
grade_prompt_answer_accuracy = hub.pull("langchain-ai/rag-answer-vs-reference")

def answer_evaluator(run, example) -> dict:
    """
    A simple evaluator for RAG answer accuracy
    """
    # Access example correctly
    input_question = example.inputs["input_question"]
    reference = example.outputs["output_answer"]
    prediction = run.outputs["answer"]

    # LLM grader
    llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)

    # Structured prompt
    answer_grader = grade_prompt_answer_accuracy | llm

    # Run evaluator
    score = answer_grader.invoke({
        "question": input_question,
        "correct_answer": reference,
        "student_answer": prediction
    })
    score = score["Score"]

    return {"key": "answer_v_reference_score", "score": score}

### Response vs input

In [None]:
# Grade prompt
grade_prompt_answer_helpfulness = prompt = hub.pull("langchain-ai/rag-answer-helpfulness")

def answer_helpfulness_evaluator(run, example) -> dict:
    """
    A simple evaluator for RAG answer helpfulness
    """

    # Get question, ground truth answer, RAG chain answer
    input_question = example.inputs["input_question"]
    prediction = run.outputs["answer"]

    # LLM grader
    llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)

    # Structured prompt
    answer_grader = grade_prompt_answer_helpfulness | llm

    # Run evaluator
    score = answer_grader.invoke({"question": input_question,
                                  "student_answer": prediction})
    score = score["Score"]

    return {"key": "answer_helpfulness_score", "score": score}

### Response vs retrieved docs

In [None]:
# Prompt
grade_prompt_hallucinations = hub.pull("langchain-ai/rag-answer-hallucination")

def answer_hallucination_evaluator(run, example) -> dict:
    """
    A simple evaluator for generation hallucination
    """

    # RAG inputs
    input_question = example.inputs["input_question"]
    contexts = run.outputs.get("contexts", [])

    # RAG answer
    prediction = run.outputs.get("answer", "No valid response found.")

    # LLM grader
    llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)

    # Structured prompt
    answer_grader = grade_prompt_hallucinations | llm

    # Get score
    score = answer_grader.invoke({"documents": contexts,
                                  "student_answer": prediction})
    score = score["Score"]

    return {"key": "answer_hallucination", "score": score}


### Retrieved docs vs input

In [None]:
# Grade prompt
grade_prompt_doc_relevance = hub.pull("langchain-ai/rag-document-relevance")

def docs_relevance_evaluator(run, example) -> dict:
    """
    A simple evaluator for document relevance
    """

    # RAG inputs
    input_question = example.inputs["input_question"]
    contexts = run.outputs.get("contexts", [])

    # LLM grader
    llm = ChatOpenAI(model="gpt-4-turbo", temperature=0)

    # Structured prompt
    answer_grader = grade_prompt_doc_relevance | llm

    # Get score
    score = answer_grader.invoke({"question":input_question,
                                  "documents":contexts})
    score = score["Score"]

    return {"key": "document_relevance", "score": score}

## Run evaluators

In [None]:
from langsmith.evaluation import evaluate

experiment_results = evaluate(
    predict_rag_answer,
    data=dataset_name,
    evaluators=[
        answer_evaluator,
        answer_helpfulness_evaluator,
        answer_hallucination_evaluator,
        docs_relevance_evaluator
    ],
    experiment_prefix="Full_final_test",
    metadata={"version": "Video_final_test, ChatMistralAI"},
)

# Used package versions for requirements.txt

In [12]:
import pkg_resources

# List of packages you want to check
packages = [
    'Flask', 'yt-dlp', 'moviepy', 'whisper', 'pyannote.audio', 'torch',
    'ffmpeg-python', 'python-dotenv', 'langchain-openai', 'langchain',
    'langgraph', 'ipywidgets', 'IPython', 'langchain-community', 'openai-whisper', 'chromadb'
]

# Get the installed version of each package
installed_packages = {pkg.key: pkg.version for pkg in pkg_resources.working_set if pkg.key in [p.lower() for p in packages]}

# Print the package versions
for pkg, version in installed_packages.items():
    print(f"{pkg}=={version}")


chromadb==0.5.3
ffmpeg-python==0.2.0
flask==3.0.3
ipython==8.15.0
ipywidgets==8.1.3
langchain==0.2.7
langchain-community==0.2.6
langchain-openai==0.1.16
langgraph==0.1.8
moviepy==1.0.3
openai-whisper==20231117
pyannote.audio==3.3.1
python-dotenv==1.0.1
torch==2.3.1
whisper==1.1.10
yt-dlp==2024.7.9


In [7]:
import torch
import gc

# Function to free GPU memory
def free_gpu_memory():
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()

# Example usage
free_gpu_memory()
