In [49]:
from youtube_transcript_api import YouTubeTranscriptApi,TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from  langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from dotenv import load_dotenv

load_dotenv()

True

In [44]:
video_id = '-HzgcbRXUK8'
try:
    ytt_api = YouTubeTranscriptApi()
    transcript_list = ytt_api.fetch(video_id,languages=['en'])
except TranscriptsDisabled:
    print('No transcript available for this video')
    
transcript_list



In [None]:
# combining all the text together

transcript=" ".join(transcript_chunk.text for transcript_chunk in transcript_list)
print(transcript)

- It's hard for us humans to make any kind of clean predictions about highly nonlinear, dynamical systems. But again, to your point, we might be very surprised
what classical learning systems might be able to do about even fluid. - Yes, exactly. I mean, fluid dynamics,
Navier-Stokes equations, these are traditionally thought of as very, very difficult intractable problems to do on classical systems. They take enormous amounts of compute, you know, weather prediction systems, you know, these kind of things all involve fluid dynamics calculations. But again, if you look
at something like Veo, our video generation model, it can model liquids quite
well, surprisingly well, and materials, specular lighting. I love the ones where, you know, there's people who generated videos where there's like clear liquids going through hydraulic presses, and then it's being squeezed out. I used to write physics
engines and graphics engines in my early days in gaming, and I know it's just so painstakingly 

In [None]:
# making documents of the text 

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

In [47]:
chunks = splitter.create_documents([transcript])
chunks

[Document(metadata={}, page_content="- It's hard for us humans to make any kind of clean predictions about highly nonlinear, dynamical systems. But again, to your point, we might be very surprised\nwhat classical learning systems might be able to do about even fluid. - Yes, exactly. I mean, fluid dynamics,\nNavier-Stokes equations, these are traditionally thought of as very, very difficult intractable problems to do on classical systems. They take enormous amounts of compute, you know, weather prediction systems, you know, these kind of things all involve fluid dynamics calculations. But again, if you look\nat something like Veo, our video generation model, it can model liquids quite\nwell, surprisingly well, and materials, specular lighting. I love the ones where, you know, there's people who generated videos where there's like clear liquids going through hydraulic presses, and then it's being squeezed out. I used to write physics"),
 Document(metadata={}, page_content="engines and gr

In [48]:
chunks[100]

Document(metadata={}, page_content='and you led this effort. What did it take to go from let\'s say, quote, unquote, "losing" to quote, unquote, "winning"\nin a span of a year? - Yeah, well firstly, it\'s absolutely incredible\nteam that we have, you know, led by Koray\nand Jeff Dean and Oriol and the amazing team we have on Gemini, absolutely world class. So you can\'t do it\nwithout the best talent. And of course you have, you know, we have a lot of great compute as well. But then it\'s the research\nculture we\'ve created, right? And basically coming together, both different groups in Google, you know, there was Google\nBrain, a world-class team, and then the old DeepMind. And pulling together all the\nbest people and the best ideas and gathering around to make the absolute\ngreater system we could. And it has been hard, but we\'re all very competitive. And we, you know, love research. It\'s just so fun to do. And we, you know, it\'s')

In [50]:
# storing embeddings of the  transcript in the vector store

embedding_model = HuggingFaceEmbeddings(model='sentence-transformers/all-MiniLM-L6-v2')

vector_store = FAISS.from_documents(
    embedding=embedding_model,
    documents=chunks
    
)

vector_store

<langchain_community.vectorstores.faiss.FAISS at 0x175388d3490>

In [51]:
vector_store.index_to_docstore_id

{0: '370d0d6f-aa40-4ae4-8490-c03eebb6c6a4',
 1: 'c0c1a221-7228-4041-9ae4-d1e4c5ed06c2',
 2: '9572c16b-a81b-4a96-8717-060ecb56c6e2',
 3: '2331a84b-54fb-479a-a56c-bd17456bf8f7',
 4: 'b2fa4cbc-a3e8-43ef-8c5f-d2f5711d9d42',
 5: 'ff428214-f1ae-4b4a-a482-1765052be2f8',
 6: '2edf6b7a-5b21-4b4f-a6bd-b3516cc9fd4d',
 7: 'd46dd25b-1b31-440c-b20b-50f1440ade9a',
 8: '425c9e00-478c-4233-b1ef-2c7f84e5c47e',
 9: '7700f625-480e-497b-b691-239302fa13f3',
 10: '9638955c-51f1-40c1-978a-92205edaaf19',
 11: '6989a718-5bfb-4e23-8db7-a68645629d10',
 12: '12535fbf-0a72-41b0-884a-0a8bf5912c72',
 13: 'e68d6aa4-7d10-4cc0-9e9c-97307c3453f5',
 14: '6f52fd46-0a18-4eb7-a22a-fb694a9df15b',
 15: 'c3dc8ae6-ef60-4e23-8578-58e3251e1b50',
 16: '6444567e-3ac0-41be-8ef5-c46d918261b3',
 17: '591f8b03-e621-42e6-8af4-1a3c9b149e4c',
 18: '4ef43475-224f-45d4-8fb5-b85582b0f101',
 19: '2d4fae5f-fe48-47f4-9d26-8f88618397df',
 20: '67486ceb-2674-4727-b985-c19fa202b0a7',
 21: 'bb8ca1a5-822d-415a-9c20-40a9a5f7c58f',
 22: '88deea79-710d-

In [56]:
print(vector_store.get_by_ids(['bace987a-82b6-4382-92cb-fb0dfa403fc1']))

[Document(id='bace987a-82b6-4382-92cb-fb0dfa403fc1', metadata={}, page_content="who occasionally get swept up and the chanting online crowds\nthat want to tear down others don't lose themselves in it too much. In the end, I still think there's more\ngood than bad in people, but we're all, each of us, a mixed bag. I know I am very much flawed. I speak awkwardly. I sometimes say stupid shit. I can get irrationally emotional. I can be too much of a\ndick when I should be kind. I can lose myself in a biased rabbit hole before I wake up to the bigger, more\naccurate picture of reality. I'm human and so are you, for better or for worse. And I do still believe we're in this whole\nbeautiful mess together. I love you all.")]


In [53]:
len(vector_store.index_to_docstore_id)

185