In [40]:
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from llama_index import Document, SimpleDirectoryReader
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain


In [37]:
url = "https://www.youtube.com/watch?v=2eWuYf-aZE4&t=6s"
loader = YoutubeLoader.from_youtube_url(url)
raw = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
documents = splitter.split_documents(raw)



In [38]:
db = FAISS.from_documents(documents, embedding=OpenAIEmbeddings())
model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.8)

In [41]:
query ="what is stable diffusion?"

In [42]:
related_docs = db.similarity_search(query, k=4)
docs_page_content = " ".join([d.page_content for d in related_docs])
prompt = PromptTemplate(input_variables=["question", "docs"], 
    template="""You are a helpful Youtube assistant that can answer questions about
    videos bsaed on the video's transcript. 
    Answer the following questions: {question}
    By searching the following video transcript: {docs} 
    Only use the factual information from the transcript to answer the question.
    If you feel like you don't have enough information to answer the question. Please say 
    "I don't know".
    Your answers should be detailed.""")
response = LLMChain(llm=llm, prompt=prompt).predict(question=query, docs=docs_page_content)
response = response.replace("\n", "")
print(response)


Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised APIConnectionError: Error communicating with OpenAI: ('Connection aborted.', ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)).


KeyboardInterrupt: 

In [6]:

docs = SimpleDirectoryReader("./data").load_data()

In [10]:
print(docs[0])

Doc ID: 9922759d-b95d-422a-b507-76720c1bb44a
Text: hey everyone my name is Gustav sadistan I'm co-president at
Spotify I was asked by my colleagues to do a deep dive on AI for all
of you from Engineers to Executives of Spotify specifically on this
new type of generative AI and try to explain how these things actually
work how is it that we have services like Chachi BT where you can
create an ent...


In [12]:
from llama_index.node_parser import SimpleNodeParser

parser = SimpleNodeParser.from_defaults()

nodes = parser.get_nodes_from_documents(docs)

In [19]:
print(nodes[17])

Node ID: f49a676f-e729-450c-a21d-bdddcde21b65
Text: cats here but this could be anything it could be a picture of an
airplane and the text saying an airplane flying and then it's going to
kind of learn What airplane like noise looks like or the removal of
airplane like noise and what you can do is you can actually take
something like a song right a song is an audio wave but it turns out
you can t...


In [20]:
from llama_index import VectorStoreIndex
index = VectorStoreIndex(nodes)

In [21]:
query_engine = index.as_query_engine()


The key message of the author is that it is important for everyone, regardless of their background or profession, to understand the advancements in AI, particularly in the field of generative AI. The author believes that this is a significant development that will be talked about for years to come, similar to the splitting of the atom. The author also highlights the need to demystify complex vocabulary and concepts surrounding AI and emphasizes that while the practical implementation may be complicated, the theory behind it can be understood without extensive mathematical knowledge.


In [29]:
response = query_engine.query("give me an example of embedding")
print(response)

An example of embedding is when you take a piece of text, such as a sentence, and compress it into a vector representation. This vector representation captures the important dimensions or information of the text. The embedding process involves reducing the original text into a smaller set of numbers that still retain the essence of the text. This embedding can then be used for various purposes, such as text analysis, language modeling, or even compression algorithms.
