In [18]:
from dotenv import load_dotenv
import os
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.llms import Ollama
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.runnables import RunnablePassthrough, RunnableParallel


In [19]:
# setting up llama
ollama_llm = Ollama(model = 'llama3')

In [4]:
load_dotenv()
API_KEY = os.getenv('OPENAI_API_KEY')
Model = 'gpt-3.5-turbo'
gpt_llm = ChatOpenAI(api_key = API_KEY,model= Model)

In [5]:

gpt_llm.invoke('what is a bot')

AIMessage(content='A bot, short for robot, is a software program that performs automated tasks on the internet. Bots can be programmed to perform a wide range of functions, such as answering questions, providing customer service, collecting data, and more. Bots can be used for both helpful and malicious purposes, depending on how they are programmed.', response_metadata={'token_usage': {'completion_tokens': 66, 'prompt_tokens': 11, 'total_tokens': 77}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-61009fc6-2fb3-4a68-9693-695307edd863-0', usage_metadata={'input_tokens': 11, 'output_tokens': 66, 'total_tokens': 77})

In [6]:
# returns only the content instead of the whole json output
parser = StrOutputParser()
gpt_chain = gpt_llm|parser
gpt_chain.invoke('what is a bot')

'A bot, short for robot, is a computer program that performs automated tasks on the internet. Bots can be programmed to perform a wide range of functions, such as answering customer service inquiries, collecting data, or interacting with users on social media platforms. Some bots are designed to mimic human behavior, while others simply carry out simple tasks without any human-like interactions.'

In [7]:
#loads data and breaks into pages
loader = TextLoader('data.txt', encoding='utf-8')
document = loader.load()

In [8]:
document

[Document(metadata={'source': 'data.txt'}, page_content="If you are the spouse or minor child of an H-1B visa holder, you are eligible to travel to the U.S. with your spouse or parent on an H-4 visa. If you want to pursue your education while you are in the U.S. at a university or college during your stay, you will have a few options, including studying on your H-4 visa or choosing to study on an F-1 visa. There are advantages and disadvantages to both options, making it important for you to understand the differences so that you can make the decision that makes the most sense for your situation and plans.\n\nWhat is an H-4 visa?\nAn H-4 visa is a dependent visa that is available to the spouses and minor unmarried children who are younger than 21 of H-1B visa holders. The H-1B visa is a nonimmigrant visa that is dual intent. It allows the visa holder to live and work for a sponsoring employer for up to three years and is renewable. If the H-1B visa holder renews the H-1B visa, he or sh

In [9]:
# split data into smaller chunks
spliter = RecursiveCharacterTextSplitter(chunk_size = 200, chunk_overlap = 50)
chunks = spliter.split_documents(document)
chunks[3]

Document(metadata={'source': 'data.txt'}, page_content='so that you can make the decision that makes the most sense for your situation and plans.')

In [10]:
# convert the chunks of information into numerical info (embeddings)
# and storing it in a vector storage FAISS
vector_storage = FAISS.from_documents(chunks, OpenAIEmbeddings())
retriever = vector_storage.as_retriever()

In [11]:
# retrieves the first 4 similar data chunks with information relating to
# the question we invoked
retriever.invoke('What is an H-4 visa?') #[2]

[Document(metadata={'source': 'data.txt'}, page_content='What is an H-4 visa?'),
 Document(metadata={'source': 'data.txt'}, page_content='Applying for the H-4 visa'),
 Document(metadata={'source': 'data.txt'}, page_content='An H-4 visa is a dependent visa that is available to the spouses and minor unmarried children who are younger than 21 of H-1B visa holders. The H-1B visa is a nonimmigrant visa that is dual intent.'),
 Document(metadata={'source': 'data.txt'}, page_content='What is an F-1 visa?')]

In [12]:
# creating a template that provides the bot with
# instructions on how to answer questions using the
# context (our data) given and the question asked by
# the user
template = ("""
You are AI-powered chatbot designed to provide
information and assistance for high school students
planning on going to college based on the context provided to you only.

Context:{context}
Question:{question}
""")


In [13]:
prompt = PromptTemplate.from_template(template=template)
prompt.format(
  context = ' Here is a context to use ',
  question = 'This is a question to answer'
)

'\nYou are AI-powered chatbot designed to provide\ninformation and assistance for high school students \nplanning on going to college based on the context provided to you only.\n\nContext: Here is a context to use  \nQuestion:This is a question to answer\n'

In [14]:
# this ensures that the chatbot takes in the context (out data) and uses
# RunnablePassThrough to run it through our vector store and our LLM (gpt-3.5-turbo)
# RunnableParallel allows this to happen in parallel
result = RunnableParallel(context = retriever, question = RunnablePassthrough())

# chains the result containing information from our vector store and
# the LLM and our promptTemplate
# the parser ensure we only recieve string outputs
chain = result | prompt | gpt_llm | parser

In [15]:
chain.invoke('What is an F-1 visa')

'An F-1 visa is a type of visa program designed to allow eligible international students to study in the U.S. at approved colleges and universities. The schools must be certified by the Student Exchange Visitor Program (SEVP).'

In [16]:
chain.invoke('Is a student on F-1 visa eligible for in-state tuiton')

'No, typically students on an F-1 visa are not eligible for in-state tuition. In-state tuition is usually reserved for residents of the state or individuals with certain visa statuses, such as H-4 visa holders in some states.'

In [20]:
chain.invoke('What is Running Start')

'Running Start is a program in the United States that allows high school students to take college-level courses for both high school and college credit. This program can help students get a head start on their college education and potentially save money on tuition costs.'