In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from IPython.display import  clear_output
import time
import PyPDF2
from pathlib import Path
from tqdm.auto import tqdm
from typing import Optional
from langchain.text_splitter import RecursiveCharacterTextSplitter

device = 'cuda' if torch.cuda.is_available() else 'cpu'

DEFAULT_MODEL = "meta-llama/Llama-3.2-3B-Instruct"


model = AutoModelForCausalLM.from_pretrained(
    DEFAULT_MODEL,
    torch_dtype=torch.bfloat16,
    use_safetensors=True,
    device_map=device,
)

tokenizer = AutoTokenizer.from_pretrained(DEFAULT_MODEL, use_safetensors=True)
tokenizer.pad_token_id = tokenizer.eos_token_id
model.generation_config.pad_token_id = 128001

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [2]:
# Helper function

def llm_generate(messages, max_tokens=256,do_sample=False):
    prompt = tokenizer.apply_chat_template(messages, tokenize=False)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    # print(prompt)

    with torch.no_grad():
        output = model.generate(
            **inputs,
            do_sample=do_sample,
            max_new_tokens=max_tokens,
        )

    processed_text = tokenizer.decode(output[0][len(inputs.input_ids[0])+3:], skip_special_tokens=True)

    return processed_text

# Load the RAW Transcription 

In [None]:
with open("results/transcription.txt", "r", encoding="utf-8") as f:
    transcript_txt = f.read()

# Write the title for the video

In [None]:
SYSTEM_PROMPT = ''' 
Generate a list of possible video titles based on the provided video topic. Each title should be concise (maximum 60 characters) and start with the most impactful keyword. The titles should be engaging, culturally relevant, and optimized for high click-through rates.
'''

messages = [
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": f'''TRANSCRIPT TEXT: {transcript_txt}'''},
]

video_title_ideas = llm_generate(messages,max_tokens=256)
print(video_title_ideas)

with open('results/video_title_ideas.txt', "w", encoding="utf-8") as file:
    file.write(video_title_ideas)



Here are some possible video title options based on the provided transcript:

1. "How to Train a Small Language Model using PyTorch"
2. "Building a Simple Language Model from Scratch with PyTorch"
3. "Understanding Language Models: A Simplified Explanation"
4. "Small Language Model Tutorial: From Basics to Implementation"
5. "PyTorch Language Model Tutorial: Training a Small Language Model"
6. "Language Model 101: How to Create a Small Language Model with PyTorch"
7. "Simplifying Language Models: A Step-by-Step Guide with PyTorch"
8. "From Text to Tokens: Training a Small Language Model with PyTorch"
9. "The Basics of Language Models: A Simplified Explanation with PyTorch"
10. "Building a Small Language Model: A PyTorch Tutorial for Beginners"

Each title is concise, starts with the most impactful keyword, and is optimized for high click-through rates. They also provide a clear idea of what the video will cover, making it appealing to potential viewers.


# Write the Description for the video

In [None]:

SYSTEM_PROMPT = ''' 
Generate a compelling summary for the given video, incorporating all key topics and important keywords naturally. The summary should be engaging, concise, and clearly convey the video's main takeaways while maintaining a natural flow.
Do not include any additional text or numbering. DO NOT USE MARKDOWN.
'''

messages = [
    {"role": "system", "content": SYSTEM_PROMPT},
    {"role": "user", "content": f'''TRANSCRIPT TEXT: {transcript_txt}'''},
]

video_description = llm_generate(messages,max_tokens=256,do_sample=True)

print(video_description)

with open('results/video_description.txt', "w", encoding="utf-8") as file:
    file.write(video_description)




In this video, we're exploring LangChain Agents and delving into what they're all about, how they work, and the benefits they bring to businesses. An agent is essentially a combination of a language model and one or more tools, allowing it to solve specific problems in a way that's similar to how a person would use tools like Excel or data tables. By combining a language model with tools, we empower it to tackle complex tasks, and LangChain is the framework that makes this possible.

We're starting with an example of building a chatbot for an e-commerce business, where the language model needs to access customer data, such as browsing history and purchase records, to provide a personalized experience. However, current language models lack this information, so we need to get them access to customer data through microservices and APIs. LangChain's connectors to APIs and computational resources are called tools, and when combined with a language model, we get an agent.

We're taking a P

# Identify the key topics

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import CrossEncoder
import numpy as np

def find_new_topics_from_transcript(transcript, chunk_size=500, chunk_overlap=20, similarity_threshold=-5):
    """
    Finds new topics in a podcast transcript using chunking and cross-encoder similarity.

    Args:
        transcript (str): The podcast transcript.
        chunk_size (int): The size of each text chunk.
        chunk_overlap (int): The overlap between text chunks.
        similarity_threshold (float): The threshold for similarity between chunks.

    Returns:
        list: A list of new topic chunks.
    """

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )
    chunks = text_splitter.split_text(transcript)

    model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

    new_topics = [chunks[0]]  # Initialize with the first chunk
    reference_chunk = chunks[0]

    for chunk in chunks[1:]:
        pairs = [[reference_chunk, chunk]]
        scores = model.predict(pairs)
        similarity_score = scores[0]

        if similarity_score < similarity_threshold:
            new_topics.append(chunk)
            reference_chunk = chunk  # Update the reference chunk
        #else:
        #   print(f"Similarity {similarity_score:.4f} found between: \n{reference_chunk[:100]}...\nand \n{chunk[:100]}...")

    return new_topics


new_topics = find_new_topics_from_transcript(transcript_txt)

for i, topic in enumerate(new_topics):
    print(f"Topic {i+1}:\n{topic}\n{'-'*40}")

Topic 1:
In this video, we're going to have a closer look at LangChain Agents and understand what  Agents are all about.  First we're going to dive into what an agent is and understand how agents work under the  hood of LangChain.  Then we're going to have a look at what we can do with agents that we couldn't do before  and some of the future implications for businesses that are already investing in technology,  data and analytics.  And finally, I'm going to show you how to get started building your own
----------------------------------------
Topic 2:
knows about your products, which is better than before, but it's still not good enough. Because what does a chatbot really need to know in order to give the customers a good  customer experience? The chatbot needs to know stuff about the customer. If this chatbot is on  a webpage, it needs to know the context of the visit. And this could be information like,  is this a new potential customer or an existing customer? Or what is the browsi

# Look through the transcript for time stamps

In [None]:
from sentence_transformers import CrossEncoder
import re


def parse_srt(subtitle_file):
    """Parses the subtitle file and returns a list of (start_time, text) tuples."""
    subtitles = []
    pattern = re.compile(r"(\d+)\n(\d{2}:\d{2}:\d{2},\d{3}) --> \d{2}:\d{2}:\d{2},\d{3}\n(.+?)(?=\n\d+\n|\Z)", re.DOTALL)
    
    with open(subtitle_file, "r", encoding="utf-8") as file:
        content = file.read()
    
    matches = pattern.findall(content)
    for _, start_time, text in matches:
        cleaned_text = text.replace("\n", " ").strip()
        # Convert HH:MM:SS,mmm to MM:SS
        hours, minutes, seconds_ms = start_time.split(":")
        seconds, milliseconds = seconds_ms.split(",")
        start_time_MMSS = f"{minutes}:{seconds}"
        subtitles.append((start_time_MMSS, cleaned_text))
    
    return subtitles

def find_best_timestamps(subtitle_file, queries):
    """Finds the best subtitle match for each query and returns the start timestamps."""
    subtitles = parse_srt(subtitle_file)
    texts = [text for _, text in subtitles]
    start_times = [start for start, _ in subtitles]

    model = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
    
    query_timestamps = {}
    for query in queries:
        scores = model.predict([[query, text] for text in texts])
        best_idx = scores.argmax()
        query_timestamps[start_times[best_idx]] = query
    
    return query_timestamps

# Example usage:
queries =  new_topics

subtitle_file = "results/subtitles.srt"  # Replace with your actual subtitle file path

timestamps = find_best_timestamps(subtitle_file, queries)
# Function to convert timestamp to minutes and seconds for sorting
def time_to_seconds(time_str):
    minutes, seconds = map(int, time_str.split(':'))
    return minutes * 60 + seconds

# Sort dictionary by timestamp
timestamps = dict(sorted(timestamps.items(), key=lambda x: time_to_seconds(x[0])))
print(timestamps)

{'00:00': "In this video, we're going to have a closer look at LangChain Agents and understand what  Agents are all about.  First we're going to dive into what an agent is and understand how agents work under the  hood of LangChain.  Then we're going to have a look at what we can do with agents that we couldn't do before  and some of the future implications for businesses that are already investing in technology,  data and analytics.  And finally, I'm going to show you how to get started building your own", '01:18': "knows about your products, which is better than before, but it's still not good enough. Because what does a chatbot really need to know in order to give the customers a good  customer experience? The chatbot needs to know stuff about the customer. If this chatbot is on  a webpage, it needs to know the context of the visit. And this could be information like,  is this a new potential customer or an existing customer? Or what is the browsing history of this visitor?  What pr

# Rephrase the chapter names using llm 

In [19]:
def rephrase_chapter(chapter_text):
    SYSTEM_PROMPT = '''rephrase this chapter name to make it look like a standalone video title (not too flashy keep it simple, do not use bloat words like mastering, unlocking, embarking etc).
    The sentence should be easily understandable and concise. 
    however the title should not be more than 60 characters
    Do not include any additional text or numbering.'''
    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f'''CHAPTER TEXT: {chapter_text}'''}
    ]

    prompt = tokenizer.apply_chat_template(messages, tokenize=False)
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    # print(prompt)

    with torch.no_grad():
        output = model.generate(
            **inputs,
            do_sample=False,
            max_new_tokens=50,
        )

    processed_text = tokenizer.decode(output[0][len(inputs.input_ids[0])+3:], skip_special_tokens=True)
    return processed_text.strip()



In [20]:
chapter_timestamps=[]

for ts in timestamps:
    chapter_name = rephrase_chapter(timestamps[ts])
    chapter_timestamps.append(f'''{ts} - {chapter_name.strip('"').strip("'")}''')

print('\n'.join(chapter_timestamps))



00:00 - Understanding LangChain Agents
01:18 - Unlocking Customer Insights for Better Chatbot Experience
02:14 - Building Tools for Language Models
04:18 - 5 Key Channels for Business Communication
04:43 - ChatGPT: The Future of Language Understanding
06:21 - Shopify API Data Extraction with Python
07:59 - Defining a Shopify Agent with GBT4


# Youtube video downloader

In [38]:
import yt_dlp

def download_youtube_video(url, output_path="."):
    """
    Downloads a YouTube video using yt-dlp.

    Args:
        url (str): The YouTube video URL.
        output_path (str): The directory where the video will be saved.
    """
    ydl_opts = {
        'outtmpl': f'{output_path}/%(title)s.%(ext)s', # output template
        'format': 'bestvideo[height<=?1080]+bestaudio/best', # best video and audio, max 1080p
        'merge_output_format': 'mp4', # merge to mp4
    }

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        print("Download complete.")

    except Exception as e:
        print(f"An error occurred: {e}")



# Example usage:
video_url = "https://www.youtube.com/watch?v=Xi9Ui-9qcPw&t=170s" # Replace with your YouTube URL.
download_youtube_video(video_url) # Downloads to the current directory

# Example with a specific output path:
# download_youtube_video(video_url, output_path="/path/to/your/videos")

[youtube] Extracting URL: https://www.youtube.com/watch?v=Xi9Ui-9qcPw&t=170s
[youtube] Xi9Ui-9qcPw: Downloading webpage
[youtube] Xi9Ui-9qcPw: Downloading tv client config
[youtube] Xi9Ui-9qcPw: Downloading player 4fcd6e4a
[youtube] Xi9Ui-9qcPw: Downloading tv player API JSON
[youtube] Xi9Ui-9qcPw: Downloading ios player API JSON
[youtube] Xi9Ui-9qcPw: Downloading m3u8 information
[info] Xi9Ui-9qcPw: Downloading 1 format(s): 248+251
[download] Destination: LangChain Agents： Simply Explained!.f248.webm
[download] 100% of   12.45MiB in 00:00:00 at 43.77MiB/s    
[download] Destination: LangChain Agents： Simply Explained!.f251.webm
[download] 100% of    8.59MiB in 00:00:00 at 43.95MiB/s  
[Merger] Merging formats into "LangChain Agents： Simply Explained!.mp4"
Deleting original file LangChain Agents： Simply Explained!.f248.webm (pass -k to keep)
Deleting original file LangChain Agents： Simply Explained!.f251.webm (pass -k to keep)
Download complete.
