In [None]:
from pytube import Playlist
from langchain.document_loaders import YoutubeLoader
from langchain.llms import AzureOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma

In [None]:
playlist_id = "PLj6h78yzYM2Mwt-aVXI6ItZX5s9izAp0F"
playlist_link = "https://www.youtube.com/playlist?list=" + playlist_id
youtube_url_list = Playlist(playlist_link).video_urls

In [None]:
youtube_url_list = ["https://www.youtube.com/watch?v=C_78DM8fG6E"]

In [None]:
texts = []

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)

for url in youtube_url_list:
    try:
        loader = YoutubeLoader.from_youtube_url(url, add_video_info=False)
    except Exception as e:
        pass
    result = loader.load()

    texts.extend(text_splitter.split_documents(result))

text_splitter = RecursiveCharacterTextSplitter(chunk_size=8000, chunk_overlap=0)
texts = text_splitter.split_documents(texts)

# select which embeddings we want to use
embeddings = OpenAIEmbeddings(chunk_size=1)

# create the vectorestore to use as the index
db = Chroma.from_documents(texts, embeddings)

# expose this index in a retriever interface
retriever = db.as_retriever(search_type="similarity") #, search_kwargs={"k":1})

# create a chain to answer questions
qa = RetrievalQA.from_chain_type(
    llm=AzureOpenAI(temperature=0, deployment_name="text-davinci-003"), chain_type="map_reduce", retriever=retriever, return_source_documents=True)

In [None]:
query = "What are the implications of AI?"

result = qa({"query": query})

print(result)

In [None]:
from langchain.chains.conversation.memory import ConversationBufferMemory
from langchain import OpenAI, LLMChain, PromptTemplate

template = """You are a teacher in physics for High School student. Given the text of question, it is your job to write a answer that question with example.
{chat_history}
Human: {question}
AI:
"""
prompt_template = PromptTemplate(input_variables=["chat_history","question"], template=template)
memory = ConversationBufferMemory(memory_key="chat_history")

llm_chain = LLMChain(
    llm=AzureOpenAI(temperature=0, deployment_name="text-davinci-003"),
    prompt=prompt_template,
    verbose=True,
    memory=memory,
)

OpenAI()

llm_chain.predict(question="What are the implications of AI?")

result = llm_chain.predict(question="What is Joules?")
print(result)