In [1]:
from torch import cuda, bfloat16
import transformers
#Deci/DeciLM-6b-instruct
#meta-llama/Llama-2-13b-chat-hf
model_id = 'meta-llama/Llama-2-13b-chat-hf'
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# set quantization configuration to load large model with less GPU memory

# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

# begin initializing HF items, you need an access token
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    #quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

# enable evaluation mode to allow model inference
model.eval()

print(f"Model loaded on {device}")



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Model loaded on cuda:0


In [2]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)



In [3]:
stop_list = ['\nHuman:', '\n```\n']


stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

[[1, 29871, 13, 29950, 7889, 29901], [1, 29871, 13, 28956, 13]]

In [4]:
import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

[tensor([    1, 29871,    13, 29950,  7889, 29901], device='cuda:0'),
 tensor([    1, 29871,    13, 28956,    13], device='cuda:0')]

In [5]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [6]:
generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.2,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=500,  # max number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

In [7]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)


In [8]:
from langchain.chains import LLMChain
from langchain import PromptTemplate


In [9]:
from langchain.document_loaders import TextLoader

loader = TextLoader("context_handbook.txt")
documents = loader.load()

# from langchain.document_loaders import PyPDFLoader

# loader = PyPDFLoader("grad-handbook-2023.pdf")
# documents = loader.load_and_split()

In [14]:
documents

[Document(page_content='Your goal is to answer questions as accurately as possible based on the instructions and below handbook content provided.\n\nYou are Humanoid robot named Kai. your task is to provide information to people who will be talking to you. you should respond from the information given below and do not provide new information which is not present in the below handbook.\n\nInformation about you. \nGraduate Student Handbook\nFall 2023\nGeneral Information, Policies, and Procedures\nfor Graduate Students beginning their studies in\nFall 2023 or later\n338 Davis Hall\nUniversity at Buffalo, The State University of New York\nBuffalo, NY 14260-2500\nTelephone: (716) 645-3180\nFax: (716) 645-3464\nhttp://www.cse.buffalo.edu\nhttps://engineering.buffalo.edu/content/dam/engineering/computerscience-engineering/graduate/resources-for-current-students/graduatehandbooks/grad-handbook-2023.pdf\n\nContents\nContents i\n1 INTRODUCTION 1\n1.1 Nature of this Document . . . . . . . . . . 

In [10]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
all_splits = text_splitter.split_documents(documents)

In [11]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
#sentence-transformers/all-mpnet-base-v2
#sentence-transformers/all-MiniLM-L6-v2
model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {"device": "cuda:0"}

embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

# storing embeddings in the vector store
vectorstore = FAISS.from_documents(all_splits, embeddings)

In [22]:
from langchain.chains import ConversationalRetrievalChain

chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(search_kwargs={"k": 5}), return_source_documents=False,verbose=True,)

In [23]:
chat_history = []

In [24]:
# custom_prompt_template = """Use the following pieces of information to answer the user's question.
# If you don't know the answer, just say that you don't know, don't try to make up an answer.

# Context: {context}
# Question: {question}

# Only return the helpful answer below and nothing else.
# Helpful answer:

In [25]:
query = "What are the course requirements for masters students?"

In [23]:
chat_history = []


In [29]:
#Which courses can i pursue in first semseter
#What are the course requirements for masters students?
#What is the course name of CSE 574
#Whom to contact regarding the financial aid or billing?
chat_history = []
query = "Explain about Master's thesis in detail?"
result = chain({"question": query, "chat_history": chat_history})
#chat_history = [(query, result["answer"])]
print(result['answer'])



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Director of Graduate Studies or the Chair of the Department.
3.5.2 Master’s Thesis
The MS Thesis involves doing research on a topic of mutual interest to the major professor and the student. It is expected that the thesis work should be of publishable quality. The contents can be:
• a survey of the state of the art in a well-defined area of computer science and engineering,
• a new solution to a well-motivated problem,
7https://grad.buffalo.edu/succeed/current-students/policy-library.html
18

3.3.2 Approval of Course of Study . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17
3.4 Miscellaneous . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 

In [22]:
result

{'question': 'What is the course name of CSE 574?',
 'chat_history': [],
 'answer': ' CSE 574 is titled "Intro to Machine Learning".',
 'source_documents': [Document(page_content='This is not a programming class: students are expected to know how to write and troubleshoot programs of significant complexity, and to acquire new technical knowledge as necessary. The main learning\nobjectives are related to teamwork, product development, client management, and navigating the tradeoffs and challenges of creating an application for use in the real world.\nAdmission to CSE 611 is limited; Students must contact the course instructor to apply.\n16\n3.3. Grade Requirements', metadata={'source': 'context_handbook.txt'}),
  Document(page_content='• CSE 503: Computer Science for Non-Majors I\n• CSE 504: Computer Science for Non-Majors II\n• CSE 507: SOA and Web Services for non-Majors\n11\nGRADUATE HANDBOOK\n• CSE 544: Machine Learning and Society for non-Majors (Take CSE 540 instead)\n• CSE 699: S

In [26]:
import spacy

# Load the English NER model
nlp = spacy.load("en_core_web_lg")

# Input text
text = "Provide directions from Davis Hall to Lockwood Library"

# Process the text with the NER model
doc = nlp(text)

# Extract location-related entities
locations = [ent.text for ent in doc.ents if ent.label_ == "GPE" or ent.label_ == "LOC"]

print("Location entities:", locations)


Location entities: []


In [20]:
if "?" not in "How are you":
    print('yes')

yes


In [22]:
ENV_HOST = "https://cloud.langfuse.com"
ENV_SECRET_KEY = "sk-lf-7cf88429-845c-4f96-bc84-5307dff121d7"
ENV_PUBLIC_KEY = "pk-lf-575c54a2-baaf-408c-a4c1-de8d05e8d973"

In [23]:
from langfuse.callback import CallbackHandler

handler = CallbackHandler(ENV_PUBLIC_KEY, ENV_SECRET_KEY, ENV_HOST)

In [None]:
result = chain.run({"question": "what are the Degree Requirements for Students in the Systems and AI/ML Course Tracks?", "chat_history": chat_history}, callbacks=[handler])

print(result)
handler.langfuse.flush()