# AdelphiLM: A Conversational Language Model

AdelphiLM is an implementation of a Conversational Language Model based on Langchain library. Unlike the current implementation that just returns a link to complex questions, AdelphiLM aims to function similarly to ChatGPT, providing conversational responses to user queries.

The model setup involves loading documents, splitting them into chunks, creating embeddings, building a vector store, setting up the LLMS model, and initializing memory and chain components.

In this notebook, we'll walk through the installation process, setting up the model, and running the Retrieval-Augmented Generation (RAG) Chain for conversational interactions.


### Install Statements

In [14]:
%pip install langchain faiss-gpu transformers torch sentence-transformers ctransformers llama-cpp-python transformers ragas pandas

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


### Import

In [15]:
import pickle
import time
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import CTransformers
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain_core.documents import Document
from datasets import Dataset

## Model Setup

In [3]:
# Function to print execution time
def print_execution_time(message, start_time):
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"{message}: {elapsed_time} seconds")

# Function to load documents from pickle object
def load_documents_from_pickle(pickle_file):
    start_time = time.time()
    with open(pickle_file, 'rb') as f:
        documents = pickle.load(f)
    print_execution_time("\nLoaded Documents", start_time)
    return documents

# Function to split text into chunks
def split_text_into_chunks(documents):
    start_time = time.time()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    text_chunks = text_splitter.split_documents(documents)
    print_execution_time("\nSplit Text", start_time)
    return text_chunks

# Function to create embeddings
def create_embeddings():
    start_time = time.time()
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': "cuda:0"}) # "cpu" if no GPU exists
    print_execution_time("\nCreated Embeddings", start_time)
    return embeddings

# Function to create vector store
def create_vector_store(text_chunks, embeddings):
    start_time = time.time()
    vector_store = FAISS.from_documents(text_chunks, embeddings)
    print_execution_time("\nCreated vector store", start_time)
    return vector_store

# Function to create LLMS model
def create_llms_model():
    start_time = time.time()
    llm = CTransformers(model="/home/kurtmuller/Documents/Projects/AIJobInterviewPrepBot-master/mistral-7b-instruct-v0.1.Q4_K_M.gguf", config={'max_new_tokens': 1000, 'temperature': 0.01, 'context_length': 16000})
    print_execution_time("\nCreated LLM", start_time)
    return llm

# Define setup function for the model
def setup_model():
    global documents, text_chunks, embeddings, vector_store, llm, chain

    # Loading of documents
    # /content/drive/MyDrive/CSC690 Capstone/test.txt
    # /content/drive/MyDrive/CSC690 Capstone/adelphidata.txt
    # /home/kurtmuller/Documents/Projects/TestFlask/adelphidata.txt
    documents = load_documents_from_pickle("./output.pkl")

    # Split text into chunks
    text_chunks = split_text_into_chunks(documents)

    # Create embeddings
    embeddings = create_embeddings()

    # Create vector store
    vector_store = create_vector_store(text_chunks, embeddings)

    # Create LLMS model
    llm = create_llms_model()

    # Create memory
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    # Create Chain
    chain = ConversationalRetrievalChain.from_llm(llm=llm, chain_type='stuff',
                                                  retriever=vector_store.as_retriever(search_kwargs={"k": 2}),
                                                  memory=memory)

# Define chat function
def conversation_chat(query):
    start_time = time.time()  # Start time
    query_with_instructions = query + " Be concise. Stick to the context. If a question that is asked is not found in the context, just say \"I could not find that in the given context.\" Be helpful. Avoid speculation. Use natural language. Provide accurate information. Use a maximum of 5 sentences"
    # Call the chain with the modified query
    result = chain({"question": query_with_instructions, "chat_history": []})  # Initialize chat history as empty list
    end_time = time.time()  # End time
    elapsed_time = end_time - start_time  # Calculate elapsed time
    return result["answer"], elapsed_time


## Initiate The RAG Chain

In [4]:
setup_model()


Loaded Documents: 0.19479823112487793 seconds

Split Text: 15.107919931411743 seconds


  from .autonotebook import tqdm as notebook_tqdm



Created Embeddings: 5.907993316650391 seconds

Created vector store: 333.7092516422272 seconds

Created LLM: 1.5255153179168701 seconds


## Use the RAG Chain

In [16]:
# Example usage of conversation_chat function
query = "Tell me about Adelphi"
response, query_time = conversation_chat(query)
print("Query:", query)
print("Response:", response)
print("Query Time:", query_time)

Query: Tell me about Adelphi
Response:  Adelphi University is located at One South Avenue, Garden City, NY 11046-3702. It is a private university in the United States that offers undergraduate and graduate programs. To learn more about Adelphi University, visit their website at adelphiedu or contact Todd Wilson, Strategic Communications Director, at twilson@adelphiedu for further information.
Query Time: 62.724855184555054


## Retrieval-Augmented Generation for Answering Strategies (RAGAS)

RAGAS, short for Retrieval-Augmented Generation for Answering Strategies, is a methodology used for testing language models, particularly conversational ones like AdelphiLM. It combines retrieval-based and generation-based approaches to generate responses to user queries.

In RAGAS, the model first retrieves a set of relevant documents or passages from a large corpus using a retrieval mechanism. These documents serve as context for generating a response. Then, a generation model, often a large language model like GPT, is used to generate a response based on the retrieved context.

This approach helps in evaluating the model's ability to understand the query, retrieve relevant information from a large corpus, and generate coherent and relevant responses. RAGAS is widely used in evaluating conversational AI models, including chatbots and virtual assistants, for their effectiveness in real-world scenarios.


In [10]:
#len(documents.page_content[0])
print(len(documents[1].page_content[:6500]))

6500


In [18]:
# # Example usage of conversation_chat function
# query = """\
# You are a chatbot that helps students, faculty, and others learn more about Adelphi University. For each context, create a question that is specific to the context. Avoid creating generic or general questions.

# question: a question about the context.

# Format the output as JSON with the following keys:
# question

# context: {context}
# """
# # Set the context to the first document
# context = documents[0]
# response, query_time = conversation_chat(query.format(context=context))
# print("Query:", query)
# print("Response:", response)
# print("Query Time:", query_time)


### Have the model generate a question based on a specific context

In [12]:
# Example usage of conversation_chat function
query = "Give me 10 questions that you can create based on the provided context. The output should be in a JSON format"
response, query_time = conversation_chat(query)

print("Query:", query)
print("Response:", response)
print("Query Time:", query_time)

Query: Give me 10 questions that you can create based on the provided context. The output should be in a JSON format
Response:  Web hosting is a service that allows individuals and organizations to publish their websites on the Internet. It typically involves renting space on a server from a web hosting provider, which provides the necessary infrastructure and support for the website to be accessible to users. In the case of Adelphi University's Panther system, it is an unix-based system that can be used by individuals for electronic file storage and creating personal web pages. It is not meant for official Adelphi department sites and accounts are available to all Adelphi students, faculty, and staff.
Query Time: 1946.5393521785736


In [11]:
# Example usage of conversation_chat function for 10 documents
#query_template = """\
""" You are a chatbot that helps students, faculty, and others learn more about Adelphi University. For each context, create a question that is specific to the context. Avoid creating generic or general questions.

question: a question about the context.

Format the output as JSON with the following keys:
question

context: {context}
""" 
query_template = """
You are a chatbot that helps students, faculty, and others learn more about Adelphi University. For each context, create a question that is specific to the context. 
Avoid creating generic or general questions. Your response should only be a question you have created about the context, nothing else. 



context: {context}
"""

# Define a list to store responses
responses = []

run = 0
# Loop through the first 10 documents
for i in range(10):
    # Set the context to the current document
    context = documents[i]
    
    # Limit the document to the first 8000 tokens
    context_text = context.page_content[:4500]  # Assuming page_content attribute contains the text
    
    # Format the query with the current context
    formatted_query = query_template.format(context=context_text)
    
    # Call the conversation_chat function with the formatted query
    response, query_time = conversation_chat(formatted_query)
    
    # Append the response to the list of responses
    responses.append(response)
    
    # Print run number
    run += 1
    print("Run: " + str(run))

# Print out all responses
for i, response in enumerate(responses):
    print(f"Response {i+1}: {response}")


  warn_deprecated(


Response 1:  What is Adelphi University known for?
Response 2:  Adelphi University's 100th anniversary celebration.
Response 3:   Adelphi University has a long history dating back to its founding in 1862. It was originally established as the New York College for Women and later became known as Adelphi University in 1906. Over the years, the university has undergone several transformations, including becoming a co-educational institution in 1975 and expanding its offerings to include doctoral programs in 2008. Today, Adelphi is a highly respected nationally ranked university with a focus on personalized learning and career success.
Response 4:   Some ways to celebrate Adelphi University's 100th anniversary include joining the yearlong party, watching and sharing the th celebration video, making a gift to the university, exploring notable alumni, traditions, and timeline of the university since its founding in Brooklyn.
Response 5:  Adelphi University is known for its personalized approa

In [76]:
from langchain.evaluation import load_evaluator
from pprint import pprint as print

# create evaluator
evaluator = load_evaluator("labeled_criteria", criteria="correctness", llm=llm, requires_reference=True)

prompt = "Where is Adelphi located?"
pred = "One South Avenue"

# evaluate
eval_result = evaluator.evaluate_strings(
    prediction=pred,
    input=prompt,
    reference="Adelphi is located at One South Avenue."
)

# print result
print(eval_result)

{'reasoning': '[BEGIN REASONING]\n'
              '1. Correctness: The input is asking for the location of '
              'Adelphi. The reference states that Adelphi is located at One '
              'South Avenue. Therefore, the submission must be correct if it '
              'answers the question correctly.\n'
              '2. Accuracy: The input asks for the location of Adelphi. The '
              'reference states that Adelphi is located at One South Avenue. '
              'Therefore, the submission must be accurate if it answers the '
              'question correctly.\n'
              '3. Factualness: The input asks for the location of Adelphi. The '
              'reference states that Adelphi is located at One South Avenue. '
              'Therefore, the submission must be factual if it answers the '
              'question correctly.\n'
              '[END REASONING]',
 'score': 1,
 'value': 'Y'}


## Testing with LangChain's ContextQAEvalChain

Link: https://www.philschmid.de/evaluate-llm

Retrieval Augmented Generation (RAG) is one of the most popular use cases for LLMs, but it is also one of the most difficult to evaluate. We want RAG models to use the provided context to correctly answer a question, write a summary, or generate a response. This is a challenging task for LLMs, and it is difficult to evaluate whether the model is using the context correctly.

Langchain has a handy ContextQAEvalChain class that allows you to evaluate your RAG models. It takes a context and a question as well as a prediction and a reference to evaluate the correctness of the generation. The evaluator returns a dictionary with the following values:

    reasoning: String "chain of thought reasoning" from the LLM generated prior to creating the score
    score: Binary integer 0 to 1, where 1 would mean that the output is correct, and 0 otherwise
    value: A "CORRECT" or "INCORRECT" corresponding to the score

In [None]:
# Define the QAC triples
qac_triples = [
    {"query": "What is the cost of a semester at Adelphi?", 
     "context": "The cost of a semester at Adelphi is $X.",
     "answer": "Response 1", 
     "ground_truth": "The cost of a semester at Adelphi is $X."},
    {"query": "Who is the president of Adelphi?", 
     "context": "Christine M. Riordan",
     "answer": "Christine M. Riordan", 
     "ground_truth": "Christine M. Riordan"},
    {"query": "What sports teams does Adelphi have?", 
     "context": "Adelphi has Ultimate Frisbee, Basketball, Baseball, Softball, Lacrosse, Field Hockey, and Volleyball teams.",
     "answer": "Ultimate Frisbee, Basketball, Baseball, Softball, Lacrosse, Field Hockey, Volleyball", 
     "ground_truth": "Adelphi has Ultimate Frisbee, Basketball, Baseball, Softball, Lacrosse, Field Hockey, and Volleyball teams."},
    {"query": "Where is Adelphi located?", 
     "context": "Adelphi is located at One South Avenue, Garden City, New York, 11530.",
     "answer": "One South Avenue, Garden City, New York, 11530", 
     "ground_truth": "Adelphi is located at One South Avenue, Garden City, New York, 11530."}
]

In [84]:
from langchain.evaluation import load_evaluator
from pprint import pprint as print

# create evaluator
evaluator = load_evaluator("context_qa", llm=llm)

question = "Where is Adelphi located?"
#pred = "One South Avenue"
pred = conversation_chat(question)

# evaluate
eval_result = evaluator.evaluate_strings(
  input=question,
  prediction=pred,
  context=documents,
  reference="One South Avenue"
)

# print result
print(eval_result)
# {'reasoning': 'CORRECT', 'score': 1, 'value': 'CORRECT'}


{'reasoning': 'CORRECT', 'score': 1, 'value': 'CORRECT'}


In [87]:
question = [
    "Where is Adelphi located?",
    "Who is the president of Adelphi?",
    "What teams does Adelphi have?",
    "What majors does Adelphi have?",
    "Where can I access eCampus for Adelphi?",
    "What undergraduate and graduate programs does Adelphi University offer?",
]
pred = []

for i in range(len(question)):

    # Call the conversation_chat function with the formatted query
    response, query_time = conversation_chat(question[i])
    
    # Append the response to the list of responses
    pred.append(response)

print(pred)


[' 19', ' 10', ' 19', ' 1ives', " I don't know.", ' 12']


In [None]:
from langchain.evaluation import load_evaluator
from pprint import pprint as print

# create evaluator
evaluator = load_evaluator("context_qa", llm=llm)




question = "Where is Adelphi located?"
#pred = "One South Avenue"
pred = conversation_chat(question)

# evaluate
eval_result = evaluator.evaluate_strings(
  input=question,
  prediction=pred,
  context=documents,
  reference="One South Avenue"
)

# print result
print(eval_result)
# {'reasoning': 'CORRECT', 'score': 1, 'value': 'CORRECT'}


Alternatively, if you are not having a reference you can reuse the criteria evaluator to evaluate the correctness using the "question" as input and the "context" as reference.

In [82]:
from langchain.evaluation import load_evaluator
from pprint import pprint as print

# create evaluator
evaluator = load_evaluator("labeled_criteria", criteria="correctness", llm=llm, requires_reference=True)

question = "Where is Adelphi located?"
pred = "One South Avenue"
context = documents[0]

# evaluate
eval_result = evaluator.evaluate_strings(
    prediction=pred,
    input=question,
    reference=context,
)

# print result
print(eval_result)

{'reasoning': '[BEGIN DATA]\n'
              '***\n'
              '[Input]: Where is Adelphi located?\n'
              '***\n'
              '[Submission]: One South Avenue\n'
              '***\n'
              '[Criteria]: correctness: Is the submission correct, accurate, '
              'and factual?\n'
              '***\n'
              "[Reference]: page_content='adelphi university higher education "
              'college new york skip to main content skip to site alert '
              'utility navigation apply visit give secondary navigation news '
              'events athletics alumni ecampus strategic plan search menu '
              'quick links close adelphi university site navigation meet '
              'adelphi mission vision leadership outcomes awards fast facts '
              'locations diversity equity inclusion belonging history '
              'traditions adelphi magazine academics majors programs colleges '
              'schools online programs preprofessional 