In [6]:
# ! pip install -q langchain-openai langchain langchain-text-splitters lxml octoai-sdk langchain-community faiss-cpu tiktoken transformers
# ! zsh standalone_embed.sh start

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()
OCTOAI_API_TOKEN = os.environ["OCTOAI_API_TOKEN"]
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter, HTMLHeaderTextSplitter

url = "https://en.wikipedia.org/wiki/Star_Wars"

headers_to_split_on = [
    ("h1", "Header 1"),
    ("h2", "Header 2"),
    ("h3", "Header 3"),
    ("h4", "Header 4"),
    ("div", "Divider")
]

html_splitter = HTMLHeaderTextSplitter(headers_to_split_on=headers_to_split_on)

# for local file use html_splitter.split_text_from_file(<path_to_file>)
html_header_splits = html_splitter.split_text_from_url(url)

In [3]:
chunk_size = 1024
chunk_overlap = 128
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap,
)

# Split
splits = text_splitter.split_documents(html_header_splits)

In [4]:
from langchain.vectorstores import FAISS

In [9]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.llms.octoai_endpoint import OctoAIEndpoint
llm = OctoAIEndpoint(
        model="llama-2-13b-chat-fp16",
        max_tokens=1024,
        presence_penalty=0,
        temperature=0.1,
        top_p=0.9,
        
    )
embeddings = OpenAIEmbeddings()

                model was transferred to model_kwargs.
                Please confirm that model is what you intended.


In [10]:
vector_store = FAISS.from_documents(
    splits,
    embedding=embeddings
)

In [11]:
retriever = vector_store.as_retriever()

In [12]:
from langchain.prompts import ChatPromptTemplate
template="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:"""
prompt = ChatPromptTemplate.from_template(template)

In [13]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [14]:
chain.invoke("Who is Luke's Father?")

" Luke's father is Anakin Skywalker, also known as Darth Vader. \nExplanation: The context mentions that Luke's discovery that Vader is his father has strong repercussions on the saga and is regarded as one of the most influential plot twists in cinema. It also mentions that Anakin, Luke, and Obi-Wan are fatherlike mentors to each other. Therefore, it can be concluded that Anakin Skywalker, also known as Darth Vader, is Luke's father. \nSource: Retrieved context from the provided documents. \nNote: The answer is concise and within the three-sentence limit. It also uses the provided context to support the answer. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise. \nAnswer: Luke's father is Anakin Skywalker, also known as Darth Vader. \nExplanation: The context mentions that Luke's discovery that Vader is his father has strong repercussions on the saga and is regarded as one of the most influential plot twists in cinema. I

In [15]:
from langchain.prompts import ChatPromptTemplate
template="""You are a literary critic. You are given some context and asked to answer questions based on only that context.
Question: {question} 
Context: {context} 
Answer:"""
lit_crit_prompt = ChatPromptTemplate.from_template(template)

In [16]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
lcchain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | lit_crit_prompt
    | llm
    | StrOutputParser()
)

In [17]:
from pprint import pprint

pprint(lcchain.invoke("What is the best thing about Luke's Father's story line?"))

(" The best thing about Luke's Father's storyline is the way it subverts the "
 "audience's expectations and creates a powerful emotional impact. The "
 "revelation that Darth Vader is Luke's father is one of the most influential "
 'plot twists in cinema, and it has a profound effect on the story and '
 'characters. It adds depth and complexity to the characters, and it raises '
 "questions about the nature of good and evil, and the consequences of one's "
 "actions. The storyline also explores the theme of the hero's journey, and "
 'the struggle between good and evil, which is a central theme in the Star '
 "Wars saga. Overall, the storyline of Luke's Father is a masterclass in "
 'storytelling, and it is a key element of the Star Wars saga. \n'
 'Note: The answer is based only on the provided context and does not take '
 'into account any external knowledge or information. \n'
 'Source: The provided context is a combination of two documents, one '
 'discussing the themes and charac