In [4]:
#This function does a direct query to OpenAI
#It shows that without context, not much is returned on some topics

import openai
from dotenv import load_dotenv
import os

load_dotenv()

openai_api_key=os.getenv("OPENAI_API_KEY")
youtube_api_key=os.getenv("YOUTUBE_API_KEY")


# Initialize client using the new API interface
client = openai.OpenAI(api_key=openai_api_key)

# Define the prompt
prompt = "What does Nathan Babcock say about quantum effects in biology?"

# Make the request using the `chat.completions.create` method
response = client.chat.completions.create(
    model="gpt-4",  # Or "gpt-3.5-turbo"
    messages=[
        {"role": "user", "content": prompt}
    ],
    temperature=0.7,
    max_tokens=100
)

# Print the response
print("Response:", response.choices[0].message.content)


Response: I'm sorry, but I couldn't find any information on a person named Nathan Babcock discussing quantum effects in biology. It's possible that you may have misspelled the name or the person you're asking about isn't widely recognized in this field.


In [5]:
# Retrieve the transcript from a YouTube video. 
# Includes some exception handling

from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import (
    TranscriptsDisabled,
    NoTranscriptFound,
    VideoUnavailable,
    CouldNotRetrieveTranscript,
)

# YouTube Video ID
video_id = "1igKzDiWc84"

# Function to get transcript safely
def get_transcript_safe(video_id):
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        print(f"\nTranscript for video ID {video_id}:\n")
        for entry in transcript:
            print(entry['text'])
        return transcript
    except VideoUnavailable:
        print(f"❌ Video {video_id} is unavailable.")
    except TranscriptsDisabled:
        print(f"❌ Transcripts are disabled for video {video_id}.")
    except NoTranscriptFound:
        print(f"❌ No transcript found for video {video_id}.")
    except CouldNotRetrieveTranscript:
        print(f"❌ Could not retrieve transcript for video {video_id}.")
    except Exception as e:
        print(f"❌ An unexpected error occurred: {str(e)}")
    return None

# Run the function
transcript = get_transcript_safe(video_id)



Transcript for video ID 1igKzDiWc84:

This is physical principles of quantum
biology part one chapter 1 quantum
theory and the new
observables. For those of you who are
new to the subject, I've recently
released a monograph with my wife and
collaborator Brandy Babcock on the
physical principles of quantum biology.
The book contains 21 chapters and these
videos will provide lecture material
outlining the content of each chapter.
You can download the book for free from
the physics archive. That's
archive.org. Entry number
25503.11747. So go ahead, check it out.
The book begins in chapter 1 with an
overview of quantum mechanics starting
in the year 1900 when Lord Calvin
delivered a address to the Royal
Institution outlining two major problems
he saw in physics. He described these
metaphorically as clouds hanging over
the horizon of physics which he felt
obscured the beauty and clearness of the
dynamical theory so to speak. The first
cloud was that of the luminiferous
ether. uh Michaelels

In [None]:
# This block searches for information found in the transcripts
# of a set of YouTube videos

# STEP 1: Search YouTube using video names
#         Provide a search term which will be used to find a
#         list of matching YouTube videos
# STEP 2: Get and Chunk Transcripts
#         Save the transcripts in a vector database
# STEP 3: Create FAISS Index
# 
# STEP 4: In a loop search for something within the transcripts
#         Query FAISS
#         The search is used to find blocks of text that are relevant
#         from the vector databases, to build context for OpenAI
# STEP 5: Query OpenAI
#         Peform a search using OpenAI and the context from the 
#         chunks retrieved from the Vector database

import googleapiclient.discovery
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import NoTranscriptFound
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from openai import OpenAI

# ==== CONFIGURATION ====

CHUNK_SIZE = 500
TOP_K = 5

# ==== SETUP ====

client = OpenAI(api_key=openai_api_key)
youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=youtube_api_key)
model = SentenceTransformer("all-MiniLM-L6-v2")

# ==== STEP 1: Search YouTube ====
def search_youtube_videos(query, max_results=5):
    request = youtube.search().list(
        q=query, part="snippet", type="video", maxResults=max_results
    )
    response = request.execute()
    videos = []
    for item in response["items"]:
        video_id = item["id"]["videoId"]
        title = item["snippet"]["title"]
        videos.append({"id": video_id, "title": title})
    return videos

# ==== STEP 2: Get and Chunk Transcripts ====
def get_transcript_chunks(video_id, chunk_size=500):
    try:
        # Request only English transcript
        transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])

        chunks = []
        current = ""
        for entry in transcript:
            text = entry['text']
            if len(current) + len(text) <= chunk_size:
                current += " " + text
            else:
                chunks.append(current.strip())
                current = text
        if current:
            chunks.append(current.strip())

        return chunks

    except Exception as e:
        print(f"❌ Skipped video {video_id}: {e}")
        return []



# ==== STEP 3: Create FAISS Index ====
def build_faiss_index(chunks):
    embeddings = np.array([model.encode(chunk) for chunk in chunks])
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(embeddings)
    return index, embeddings

# ==== STEP 4: Query FAISS ====
def retrieve_relevant_chunks(query, index, chunks, top_k=TOP_K):
    query_embedding = model.encode(query).reshape(1, -1)
    distances, indices = index.search(query_embedding, top_k)
    return [chunks[i] for i in indices[0]]

# ==== STEP 5: Query OpenAI ====
def ask_openai(chunks):
    content = "\n\n".join(chunks)
    prompt = f"Analyze and answer based on the following transcript chunks:\n\n{content}"
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0.5,
        max_tokens=1000
    )
    return response.choices[0].message.content

# ==== MAIN PROGRAM ====
def run_pipeline():
    topic = input("🔍 Enter a topic to search YouTube videos: ")
    videos = search_youtube_videos(topic)
    
    print("\n🎬 Found Videos:")
    for i, v in enumerate(videos, 1):
        print(f"{i}. {v['title']} (ID: {v['id']})")

    all_chunks = []
    for v in videos:
        print(f"\n⏳ Processing: {v['title']}")
        chunks = get_transcript_chunks(v["id"])
        if chunks:
            all_chunks.extend(chunks)

    if not all_chunks:
        print("❌ No usable transcripts found.")
        return

    print("\n📦 Building vector database...")
    index, _ = build_faiss_index(all_chunks)

    while True:
        query = input("\n❓ Ask a question about the topic (or type 'exit'): ")
        if query.lower() == "exit":
            break
        results = retrieve_relevant_chunks(query, index, all_chunks)
        print("\n🔍 Top Relevant Chunks:")
        for i, chunk in enumerate(results, 1):
            print(f"{i}. {chunk}\n")

        summary = ask_openai(results)
        print("\n🧠 OpenAI Summary:")
        print(summary)

# ==== Run it ====
if __name__ == "__main__":
    run_pipeline()


🔍 Enter a topic to search YouTube videos:  Nathan Babcock Quantum Biology



🎬 Found Videos:
1. Quantum Theory &amp; The New Observables: Chapter 1 of Physical Principles of Quantum Biology (ID: 1igKzDiWc84)
2. Quantum Biology: From Photons to Physiology (ID: mf6lkIipjF0)
3. FULL LECTURE - Physical Foundations of Quantum Biology (ID: bRpTpSJFGCs)
4. A Template for Quantum Biology (ID: 9u7rIODg2YU)
5. Quantum Biology for Health and Medicine (Resonance Symposium, São Paulo, Brazil, 17 November 2023) (ID: PsbkocsfDyo)

⏳ Processing: Quantum Theory &amp; The New Observables: Chapter 1 of Physical Principles of Quantum Biology

⏳ Processing: Quantum Biology: From Photons to Physiology

⏳ Processing: FULL LECTURE - Physical Foundations of Quantum Biology

⏳ Processing: A Template for Quantum Biology

⏳ Processing: Quantum Biology for Health and Medicine (Resonance Symposium, São Paulo, Brazil, 17 November 2023)

📦 Building vector database...



❓ Ask a question about the topic (or type 'exit'):  What quantum effects are found in biology



🔍 Top Relevant Chunks:
1. biology it's worth noting that Quantum biology is the study of biological processes that cannot be predicted or adequately described using only the principles of classical physics Quantum effects in biology include things like photosynthesis electron tunneling during cellular respiration and many fundamental aspects of molecular structure Quantum biology research has important implications for models of health and medicine with a variety of applications that I'll discuss today and it is

2. effects, quantum effects in biology. Uh but my view is that even when this approach is used at its best, it's essentially ad hoc because we're imagining a classical hay stack so to speak and going looking for quantum needles in it. Uh but really if we really believe quantum mechanics, we should begin with the quantum mechanical postulates first and then we can explore the full range of possible quantum states and effects at their most microscopic level at the level of atom


❓ Ask a question about the topic (or type 'exit'):  What proof does the author give for these effects?



🔍 Top Relevant Chunks:
1. challenging ones uh the electron deoization is first treated uh with Vanderwal's forces and dispersion. Uh now these kind of uh true quantum mechanical effects sometimes can be treated with relatively simple parameterized models. uh they're very important for things like the pi stacking of aromatic molecules and of course they're significant in any solvent system. Now in DNA in particular the electrostatic repulsion between stacked base pairs is balanced by the attractive London forces these

2. mechanics. Uh his trick was to focus only on observables, things that could conclusively be measured and counted in numbers. He based the new theory entirely on relationships between measurable quantities. And the reason why was because at that time they were trying to figure out the orbit of electrons around the nucleus in an atom. Uh but they couldn't measure the electron traveling in a circular orbit. the way we can observe the planets orbiting around the sun and e