In [1]:
!pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib youtube-transcript-api

Collecting youtube-transcript-api
  Downloading youtube_transcript_api-0.6.2-py3-none-any.whl.metadata (15 kB)
Downloading youtube_transcript_api-0.6.2-py3-none-any.whl (24 kB)
Installing collected packages: youtube-transcript-api
Successfully installed youtube-transcript-api-0.6.2


In [2]:
!pip install googletrans

Collecting googletrans
  Downloading googletrans-3.0.0.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans)
  Downloading hstspreload-2024.9.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans)
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting httpcore==0.9.* (from httpx==0.13.3->googletrans)
  Downloading httpcore-0.9.1-py3-none-any.whl.metadata (4.6 kB)
Collecting h11<0.10,>=0.8 (from httpcore==0.9.*->httpx==0.13.3->googletrans)
  Downloading h11-0.9.0-py2.py3-none-any.whl.metadata (8.1 kB)
Collectin

In [3]:
from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi as yta, NoTranscriptFound

In [4]:
# API key
API_KEY = 'AIzaSyCzAl1InDV_CrXNLYP4JPqQyNGqjOyJT_Q'
youtube = build('youtube', 'v3', developerKey=API_KEY)

In [5]:
def search_youtube(query, max_results=10):
    """Search YouTube for videos matching the query."""
    try:
        request = youtube.search().list(
            q=query,
            part='id,snippet',
            maxResults=max_results
        )
        response = request.execute()

        videos = []
        for item in response['items']:
            video_id = item['id'].get('videoId')
            title = item['snippet']['title']
            description = item['snippet']['description']
            if video_id:
                videos.append({
                    'video_id': video_id,
                    'title': title,
                    'description': description
                })
        return videos
    except Exception as e:
        print(f"Error searching YouTube: {e}")
        return []

def analyze_transcript(transcript, query):
    """Analyze the transcript to find time lapses where the query appears."""
    time_lapses = []
    query_lower = query.lower()

    for entry in transcript:
        text = entry['text']
        start_time = entry['start']
        if query_lower in text.lower():
            time_lapses.append({
                'time': start_time,
                'text': text
            })

    return time_lapses

def format_time(seconds):
    """Convert seconds to a formatted time string."""
    minutes, seconds = divmod(seconds, 60)
    hours, minutes = divmod(minutes, 60)
    return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02}"

def get_transcript(video_id):
    """Retrieve the transcript for a given video ID."""
    languages_to_try = ['en']
    for lang in languages_to_try:
        try:
            transcript = yta.get_transcript(video_id, languages=[lang])
            return transcript
        except NoTranscriptFound:
            print(f"No transcript found for video {video_id} in {lang}")
        except Exception as e:
            print(f"Error getting transcript for video {video_id}: {e}")
            break
    return None

def display_results(user_query, max_results=10):
    """Display search results with time lapses and text for the query."""
    videos = search_youtube(user_query, max_results)

    results = []
    for video in videos:
        transcript = get_transcript(video['video_id'])
        if transcript:
            time_lapses = analyze_transcript(transcript, user_query)
            results.append({
                'title': video['title'],
                'link': f"https://www.youtube.com/watch?v={video['video_id']}",
                'description': video['description'],
                'time_lapses': [
                    {
                        'time': format_time(t['time']),
                        'text': t['text']
                    } for t in time_lapses
                ]
            })
        else:
            print(f"No transcript available for video {video['video_id']}")

    # Sort results by the number of time lapses in descending order
    results.sort(key=lambda x: len(x['time_lapses']), reverse=True)

    # Print results
    for i, result in enumerate(results):
        print(f"{i + 1}. {result['title']}")
        print(f"   Link: {result['link']}")
        print(f"   Description: {result['description']}")
        if result['time_lapses']:
            print(f"   Time Lapse          Text")
            for time_lapse in result['time_lapses']:
                print(f"   {time_lapse['time']}            {time_lapse['text']}")
        else:
            print("   No relevant time lapses found.")
        print("---------")


In [10]:
# Example usage
display_results("fine tuning", max_results=10)

1. Fine-tuning Large Language Models (LLMs) | w/ Example Code
   Link: https://www.youtube.com/watch?v=eC6Hd1hFvos
   Description: CXOs, VPs, & Directors... I offer custom AI workshops: https://www.shawhintalebi.com/ai-workshops This is the 5th video in a ...
   Time Lapse          Text
   00:00:33            what is model fine tuning the way I like
   00:01:06            the edges fine tuning is taking this raw
   00:03:19            to learn more about model fine tuning
   00:03:21            and how open AI did their fine tuning
   00:04:14            fine tuning you don't have to rely on
   00:05:00            differentiate fine tuning with
   00:07:14            was supervised fine tuning so
   00:09:01            approach to model fine tuning here I
   00:09:06            first choose your fine tuning task so
   00:09:55            number four the fine tuning the model
   00:18:27            examples for model fine tuning the next
   00:26:56            that we evaluated before 5