In [1]:
!pip install langchain





In [2]:
!pip install youtube-transcript-api





In [3]:
!pip install chromadb





In [4]:
!pip install langchain-openai





In [5]:
!pip install langchain-community





In [6]:
!pip install gradio





In [7]:
!pip install openai





In [8]:
!pip install whisper





In [9]:
!pip install tiktoken





In [10]:
from youtube_transcript_api import YouTubeTranscriptApi

def get_video_transcript(video_id):
    """
    Fetch transcript for a given YouTube video ID.
    """
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        transcript_text = " ".join([item['text'] for item in transcript if 'text' in item])
        return transcript_text
    except Exception as e:
        print(f"Error retrieving transcript for video {video_id}: {e}")
        return None

# Example usage with a list of video IDs
video_ids = ['2u4ItZerRac', 'I2zF1I60hPg', '8xqSF-uHCUs', 'LtmS-c1pChY']
transcripts = {}

# Fetch and print the transcript for each video
for video_id in video_ids:
    transcript = get_video_transcript(video_id)
    if transcript:
        transcripts[video_id] = transcript
    else:
        print(f"No transcript available for video {video_id}")

# Display the fetched transcripts
for video_id, transcript in transcripts.items():
    print(f"Transcript for {video_id}:")
    print(transcript[:500])  # Display the first 500 characters of each transcript for preview

Transcript for 2u4ItZerRac:
[Music] the pros and cons of living in Germany today we're going to talk about the pros and cons of living in Germany Germany is an amazing country with a strong economy lots of culture great infrastructure and friendly people but that doesn't mean it's the right place for everyone in this video we'll look at all aspects of life in Germany so you can decide if it's the right move for you so let's get started Pros living in Germany is a great experience it offers many advantages both long and sho
Transcript for I2zF1I60hPg:
should you move to Germany that's an easy question to a hard answer but I'm going to try my [Music] best hi my name is Fernando I have been living in Munich Germany for eight years now and yeah I have a lot of experience I say and I'm very happy to kind of like give you some ideas and some stories to will probably help you make a better decision if you should move to Germany or not the first thing of course is that there are many reasons w

In [11]:
import chromadb
from sentence_transformers import SentenceTransformer

# Initialize ChromaDB
chroma_client = chromadb.Client()

# Create a collection in ChromaDB
collection = chroma_client.create_collection("youtube_video_transcripts2")

# Initialize the SentenceTransformer model for embedding text
sentence_model = SentenceTransformer('all-mpnet-base-v2')

def embed_text(text):
    """
    Convert text into embeddings using SentenceTransformer.
    """
    return sentence_model.encode([text])[0]

def upsert_to_chromadb(video_id, transcript_text):
    """
    Store video transcript embeddings in ChromaDB.
    """
    embedding = embed_text(transcript_text)
    collection.upsert(
        ids=[video_id],  # Adding the 'ids' argument with the video_id
        embeddings=[embedding],
        metadatas=[{"video_id": video_id, "transcript": transcript_text}]
    )

# Upsert all transcripts into ChromaDB
for video_id, transcript in transcripts.items():
    upsert_to_chromadb(video_id, transcript)
    print(f"Stored embedding for {video_id}")

  from .autonotebook import tqdm as notebook_tqdm


Stored embedding for 2u4ItZerRac
Stored embedding for I2zF1I60hPg
Stored embedding for 8xqSF-uHCUs
Stored embedding for LtmS-c1pChY


In [12]:
pip install python-dotenv

Note: you may need to restart the kernel to use updated packages.




In [14]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get the OpenAI API key from the environment variable
openai_api_key = os.getenv("OPENAI_API_KEY")

# Set the OpenAI API key as an environment variable
os.environ["OPENAI_API_KEY"] = openai_api_key


# Now you can use the OpenAI API key in your script
from youtube_transcript_api import YouTubeTranscriptApi
import chromadb
from sentence_transformers import SentenceTransformer
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
import openai

# Define the LangChain Prompt Template for QA
QA_TEMPLATE = """
Use the following context from a YouTube video transcript to answer the question:

Context: {context}

Question: {question}

Answer:
"""
qa_prompt = PromptTemplate(input_variables=["context", "question"], template=QA_TEMPLATE)

# Initialize the LLM (using GPT-4 mini model)
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Initialize ChromaDB
chroma_client = chromadb.Client()

# Create a collection in ChromaDB, if it doesn't already exist
collection = chroma_client.create_collection("youtube_video_transcripts2", get_or_create=True)

# Initialize the SentenceTransformer model for embedding text
sentence_model = SentenceTransformer('all-mpnet-base-v2')

def embed_text(text):
    """
    Convert text into embeddings using SentenceTransformer.
    """
    return sentence_model.encode([text])[0]

def upsert_to_chromadb(video_id, transcript_text):
    """
    Store video transcript embeddings in ChromaDB.
    """
    embedding = embed_text(transcript_text)
    collection.upsert(
        ids=[video_id],
        embeddings=[embedding],
        metadatas=[{"video_id": video_id, "transcript": transcript_text}]
    )

def get_video_transcript(video_id):
    """
    Fetch transcript for a given YouTube video ID.
    """
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        transcript_text = " ".join([item['text'] for item in transcript if 'text' in item])
        return transcript_text
    except Exception as e:
        print(f"Error retrieving transcript for video {video_id}: {e}")
        return None

def answer_question(question, video_id):
    """
    Answer a question based on the transcript of the selected video.
    """
    # Retrieve relevant video context using similarity search from ChromaDB
    question_embedding = embed_text(question)
    results = collection.query(
        query_embeddings=[question_embedding],
        n_results=1
    )

    # Print the results to understand its structure
    print("Results:", results)

    # Get the transcript text from the results
    context_text = results['metadatas'][0][0]['transcript']

    # Create a QA chain with the OpenAI model and the context
    qa_chain = LLMChain(llm=llm, prompt=qa_prompt)
    answer = qa_chain.run(context=context_text, question=question)

    return answer

# Example usage with a list of video IDs
video_ids = ['2u4ItZerRac', 'I2zF1I60hPg', '8xqSF-uHCUs', 'LtmS-c1pChY']
transcripts = {}

# Fetch and store the transcript for each video
for video_id in video_ids:
    transcript = get_video_transcript(video_id)
    if transcript:
        transcripts[video_id] = transcript
        upsert_to_chromadb(video_id, transcript)
        print(f"Stored embedding for {video_id}")
    else:
        print(f"No transcript available for video {video_id}")

# Example query
question = "What is the main topic of the video?"
video_id = "2u4ItZerRac"
answer = answer_question(question, video_id)
print("Answer:", answer)

Stored embedding for 2u4ItZerRac
Stored embedding for I2zF1I60hPg
Stored embedding for 8xqSF-uHCUs
Stored embedding for LtmS-c1pChY
Results: {'ids': [['2u4ItZerRac']], 'embeddings': None, 'documents': [[None]], 'uris': None, 'data': None, 'metadatas': [[{'transcript': "[Music] the pros and cons of living in Germany today we're going to talk about the pros and cons of living in Germany Germany is an amazing country with a strong economy lots of culture great infrastructure and friendly people but that doesn't mean it's the right place for everyone in this video we'll look at all aspects of life in Germany so you can decide if it's the right move for you so let's get started Pros living in Germany is a great experience it offers many advantages both long and shortterm that make it an attractive destination for those considering making the move here are some of the pros of living in Germany one free education for more than 25 years Germany has offered tuition-free higher education at publ

In [None]:
import whisper

# Initialize the Whisper model
whisper_model = whisper.load_model("base")

def transcribe_audio(audio_path):
    """
    Transcribe audio input into text using Whisper.
    """
    result = whisper_model.transcribe(audio_path)
    return result['text']

# Example usage with a local audio file
audio_path = 'path_to_audio_file.wav'
transcribed_text = transcribe_audio(audio_path)
print(f"Transcribed Text: {transcribed_text}")

In [16]:
import gradio as gr

def chatbot_interface(question, video_title, audio_file=None):
    """
    Handle user queries and answer questions based on selected video.
    """
    video_choices = {
        'Video 1': '2u4ItZerRac',
        'Video 2': 'I2zF1I60hPg',
        'Video 3': '8xqSF-uHCUs',
        'Video 4': 'LtmS-c1pChY'
    }

    video_id = video_choices.get(video_title)

    if audio_file:
        # Transcribe audio if provided
        question = transcribe_audio(audio_file)

    if video_id:
        return answer_question(question, video_id)
    else:
        return "Please select a valid video."

# Create Gradio interface
iface = gr.Interface(
    fn=chatbot_interface,
    inputs=[
        gr.Textbox(label="Enter your question"),
        gr.Dropdown(label="Select Video", choices=['Video 1', 'Video 2', 'Video 3', 'Video 4'])
        #gr.Audio(label="Upload Audio (optional)", type="file", optional=True)
    ],
    outputs="text"
)

# Launch the Gradio app
iface.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://f1315bbc6e4f9a38b7.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [17]:
# evaluation and fine tuning
# Define a set of test queries and expected answers
test_cases = [
    {
        "question": "What are the benefits of immigration to Germany?",
        "expected_answer": "Immigration brings cultural diversity, fills labor shortages, and contributes to economic growth."
    },
    {
        "question": "What are the challenges faced by immigrants in Germany?",
        "expected_answer": "Immigrants may face language barriers, integration difficulties, and potential discrimination."
    },
    # Add more test cases as needed
]

def evaluate_model(test_cases):
    """
    Evaluate the model with a set of test cases.
    """
    correct_answers = 0
    for test in test_cases:
        answer = answer_question(test['question'], '2u4ItZerRac,')  # Assuming using one video for testing
        print(f"Question: {test['question']}")
        print(f"Expected: {test['expected_answer']}")
        print(f"Received: {answer}")
        if test['expected_answer'].lower() in answer.lower():
            correct_answers += 1
        print()
    
    accuracy = correct_answers / len(test_cases)
    print(f"Model accuracy: {accuracy * 100}%")

# Run the evaluation
evaluate_model(test_cases)

Results: {'ids': [['2u4ItZerRac']], 'embeddings': None, 'documents': [[None]], 'uris': None, 'data': None, 'metadatas': [[{'transcript': "[Music] the pros and cons of living in Germany today we're going to talk about the pros and cons of living in Germany Germany is an amazing country with a strong economy lots of culture great infrastructure and friendly people but that doesn't mean it's the right place for everyone in this video we'll look at all aspects of life in Germany so you can decide if it's the right move for you so let's get started Pros living in Germany is a great experience it offers many advantages both long and shortterm that make it an attractive destination for those considering making the move here are some of the pros of living in Germany one free education for more than 25 years Germany has offered tuition-free higher education at public universities with no charge for most courses and little no cost for administrative fees this means that students coming from all 

Results: {'ids': [['2u4ItZerRac']], 'embeddings': None, 'documents': [[None]], 'uris': None, 'data': None, 'metadatas': [[{'transcript': "[Music] the pros and cons of living in Germany today we're going to talk about the pros and cons of living in Germany Germany is an amazing country with a strong economy lots of culture great infrastructure and friendly people but that doesn't mean it's the right place for everyone in this video we'll look at all aspects of life in Germany so you can decide if it's the right move for you so let's get started Pros living in Germany is a great experience it offers many advantages both long and shortterm that make it an attractive destination for those considering making the move here are some of the pros of living in Germany one free education for more than 25 years Germany has offered tuition-free higher education at public universities with no charge for most courses and little no cost for administrative fees this means that students coming from all 