In [8]:
import os
import re
import chromadb
from pathlib import Path
import google.generativeai as genai
from chromadb.utils import embedding_functions
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter

from utils import get_client, get_or_create_collection

In [10]:
def join_paths(*args):
  return os.path.join(*args)

In [15]:
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

client = get_client(client_type='persistent', path='../youtube_db')
genai.configure(api_key=GOOGLE_API_KEY)
gemini_ef = embedding_functions.GoogleGenerativeAiEmbeddingFunction(api_key=GOOGLE_API_KEY)
genai_model = genai.GenerativeModel('gemini-1.5-flash')
collection = get_or_create_collection(client=client, name='youtube_notes', embedding_function=gemini_ef)

In [12]:
uris = [
  "https://www.youtube.com/watch?v=wRmOOWPTRBs",
  "https://www.youtube.com/watch?v=7EMa8hMHcXI",
  "https://www.youtube.com/watch?v=xWhfs1MYNfc",
  "https://www.youtube.com/watch?v=BrQLpAGlvko",
  "https://www.youtube.com/watch?v=ht_PoCwpUFQ",
  "https://www.youtube.com/watch?v=9x1y392RBfc",
  "https://www.youtube.com/watch?v=jR3TZv_2jJg",
  "https://www.youtube.com/watch?v=0e4xgZkLuNg"
]

video_dict = {}

for uri in uris:
  video_uri = re.split("=", uri)[1]
  video_dict.update({video_uri: uri})

video_dict

{'wRmOOWPTRBs': 'https://www.youtube.com/watch?v=wRmOOWPTRBs',
 '7EMa8hMHcXI': 'https://www.youtube.com/watch?v=7EMa8hMHcXI',
 'xWhfs1MYNfc': 'https://www.youtube.com/watch?v=xWhfs1MYNfc',
 'BrQLpAGlvko': 'https://www.youtube.com/watch?v=BrQLpAGlvko',
 'ht_PoCwpUFQ': 'https://www.youtube.com/watch?v=ht_PoCwpUFQ',
 '9x1y392RBfc': 'https://www.youtube.com/watch?v=9x1y392RBfc',
 'jR3TZv_2jJg': 'https://www.youtube.com/watch?v=jR3TZv_2jJg',
 '0e4xgZkLuNg': 'https://www.youtube.com/watch?v=0e4xgZkLuNg'}

In [26]:
notes_name = 'keynotes.txt'
output_folder_name = '../output'
for video_id, uri in video_dict.items():

  # Get transcript
  transcript = YouTubeTranscriptApi.get_transcript(video_id=video_id, languages=['en', 'en-US', 'en-GB'])
  transcript = TextFormatter().format_transcript(transcript)
  transcript_name = 'transcript.txt'

  # Get paths
  video_folder_path = join_paths(output_folder_name, video_id)
  transcript_path = join_paths(video_folder_path, transcript_name)
  notes_path = join_paths(video_folder_path, notes_name)

  # Create video folder
  Path(video_folder_path).mkdir(parents=True, exist_ok=True)

  # Write transcript
  with open(transcript_path, "w") as file:
    file.write(transcript)


  # Ask Gemini for the keynotes
  prompt = "Extract key notes from video transcript: "
  response = genai_model.generate_content(prompt + transcript, stream=False)


  # Write keynotes
  with open(notes_path, "w") as file:
    file.write(response.text)
  
  # Add to db
  collection.upsert(ids=[video_id], documents=[response.text])

In [29]:
# Query the results
query_text = "What are the outcomes of practicing boxing?"
n_results = 2

query_results = collection.query(
  query_texts=[query_text],
  n_results=n_results,
  include=['distances', 'documents', 'uris']
)

In [30]:
# Print the results
for i in range(len(query_results['ids'][0])):
  id       = query_results["ids"][0][i]
  document = query_results["documents"][0][i]

  print("************************************************************************")
  print(f"Video ID: {i+1}")
  print("************************************************************************")
  print(document, "\n")

************************************************************************
Video ID: 1
************************************************************************
Key notes from the video transcript about the effects of boxing on the body:

* **Significant Weight Loss:** Boxing is a full-body, high-intensity workout that burns hundreds of calories per hour, leading to significant weight loss.

* **Fitter and More Muscular Physique:**  It builds muscle throughout the entire body (chest, shoulders, back, arms, legs, and core), resulting in a leaner, more defined physique.  Improved muscle endurance is also a benefit.

* **Improved Heart Health:** Boxing is a form of high-intensity interval training (HIIT), linked to a lower risk of heart disease and increased cardiovascular fitness.  Studies show a correlation between boxing ranking and heart health markers.

* **Lowered Blood Pressure:**  HIIT boxing has been shown to significantly lower both systolic and diastolic blood pressure, reducing t

In [31]:

prompt = "Answer the following QUESTION using DOCUMENT as context."
prompt += f"QUESTION: {query_text}"
prompt += f"DOCUMENT: {query_results["documents"][0][0]}"

response = genai_model.generate_content(prompt, stream=False)
print(response.text)

Practicing boxing leads to significant weight loss, a fitter and more muscular physique, improved heart health (including lower blood pressure), and boosted whole body strength.  It also encourages positive lifestyle changes, such as healthier eating habits.

