In [12]:
import torch
torch.cuda.empty_cache()

!pip install -qU langchain tiktoken langchain_community langchain_chroma langchain-huggingface huggingface-hub sentence_transformers chromadb langchainhub transformers peft
!pip install flash-attn --no-build-isolation



In [13]:
from langchain_huggingface import HuggingFacePipeline
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_chroma import Chroma
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel, PeftConfig
from IPython.display import display, Markdown
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
# Function to calculate the total number of tokens in the vector database
import pdb
def count_total_tokens_in_vectorstore(vectorstore, tokenizer):
    # Retrieve all documents from the vector store
    all_docs = vectorstore.get()['documents']

    total_tokens = 0

    # Iterate over each document and calculate the number of tokens
    for doc in all_docs:
        tokens_in_doc = len(tokenizer.encode(doc))  # Tokenize the document content (which is a string)
        total_tokens += tokens_in_doc
        pdb.set_trace()
    return total_tokens

In [15]:
# Initialize embeddings
embedding_model_name = "BAAI/bge-small-en-v1.5"
embedding_model_kwargs = {"device": "cuda"}
embedding_encode_kwargs = {"normalize_embeddings": True}
hf = HuggingFaceBgeEmbeddings(
    model_name=embedding_model_name,
    model_kwargs=embedding_model_kwargs,
    encode_kwargs=embedding_encode_kwargs
)

# Initialize vector store and retriever
vectorstore = Chroma(
    persist_directory="/content/drive/MyDrive/CITS5553_Capstone/vector1",
    embedding_function=hf
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# Check the available device
if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

print(f"Using device: {device}")

# Check for bf16 support
is_bf16_support = False
try:
    tensor_bf16 = torch.tensor([1.0], dtype=torch.bfloat16, device=device)
    is_bf16_support = True
    print("bf16 tensors are supported.")
except TypeError:
    print("bf16 tensors are not supported.")

# Load the base model and tokenizer
base_model = "microsoft/Phi-3.5-mini-instruct"
tokenizer = AutoTokenizer.from_pretrained(base_model)


# Load the base model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    return_dict=True,
    device_map=device,
    torch_dtype=torch.bfloat16 if is_bf16_support else torch.float16
)

# Load and merge the LoRA weights
lora_model = PeftModel.from_pretrained(model, "KunalRaghuvanshi/phi3_mini_qlora_chemical_eng")
merged_model = lora_model.merge_and_unload()

pipeline = pipeline(
    "text-generation",
    model=merged_model,
    tokenizer=tokenizer,
    max_new_tokens=max(400, count_total_tokens_in_vectorstore(vectorstore, tokenizer) // 10)
)

llm = HuggingFacePipeline(pipeline=pipeline)



Using device: cuda
bf16 tensors are supported.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [16]:
class RAGChatModel:
    def __init__(self, retriever, llm, tokenizer, min_tokens=400, max_tokens= max(400, count_total_tokens_in_vectorstore(vectorstore, tokenizer) // 10)):
        self.retriever = retriever
        self.llm = llm
        self.tokenizer = tokenizer

        # Calculate max_token_limit with bounds
        total_tokens = count_total_tokens_in_vectorstore(vectorstore, tokenizer)
        suggested_tokens = max(min_tokens, total_tokens // 10)
        self.max_token_limit = min(suggested_tokens, max_tokens)

        self.current_token_count = 0
        self.template_standard = """
        <|system|>
        Answer the question and mustgive all the page numbers for the answer where this information is found based in the information provided in the context.
        Providing all the page numbers is essential for the answer.

        Context: {context}

        Providing all the page numbers is essential  for the answer.
        <|end|>

        <|user|>
        Question: {question}
        <|end|>

        <|assistant|>
        """
        self.template_exceeded = """
        <|system|>
        Answer the question in detail; warn that information is not taken from the prescribed textbook and must provide the page numbers where they can find the correct information in the prescribed textbook.

        Context: {context}
        Providing all the page numbers is essential for the answer.
        <|end|>

        <|user|>
        Question: {question}
        <|end|>

        <|assistant|>
        """

    def num_tokens_from_string(self, string: str) -> int:
        """Returns the number of tokens in a text string using the tokenizer."""
        return len(self.tokenizer.encode(string))

    def format_docs(self, docs, full_content=True):
        """Format the documents to be used as context in the prompt."""
        if full_content:
            return "\n\n".join(f"Information in Page number: {(doc.metadata['page']+1)}\n{doc.page_content}" for doc in docs)
        else:
            return "Information available in prescribed textbook " + ", ".join(f"Page number: {doc.metadata['page']}" for doc in docs)

    def get_prompt(self, docs, question):
        """Generate the prompt based on token count and context formatting."""
        # Format the context with full content
        context = self.format_docs(docs, full_content=True)
        total_tokens_in_context = self.num_tokens_from_string(context)

        # Add tokens to the running total
        self.current_token_count += total_tokens_in_context

        # Decide whether to use full content or only page numbers
        if self.current_token_count > self.max_token_limit:
            print("Token limit exceeded. Information from prescribed textbook will not be used.")
            # Reformat context to include only page numbers
            context = self.format_docs(docs, full_content=False)
            template = self.template_exceeded
        else:
            template = self.template_standard

        # Create the prompt
        prompt = template.format(context=context, question=question)
        return prompt

    def extract_clean_answer(self, raw_output):
        """Extract only the answer from the raw output."""
        assistant_tag = "<|assistant|>"
        if assistant_tag in raw_output:
            clean_answer = raw_output.split(assistant_tag)[-1].strip()
            return clean_answer
        return raw_output.strip()

    def ask_question(self, question):
        """Main function to retrieve relevant docs and generate a response."""
        # Retrieve relevant documents
        docs = self.retriever.invoke(question)

        # Generate prompt based on token count
        prompt = self.get_prompt(docs, question)

        # Pass the prompt to the LLM
        result = self.llm.generate([prompt])

        # Extract the generated text
        raw_answer = result.generations[0][0].text

        # Get the clean answer
        clean_answer = self.extract_clean_answer(raw_answer)

        # Display the answer
        display(Markdown(clean_answer))


In [17]:

# Initialize the RAGChatModel with explicit bounds
rag_chat_model = RAGChatModel(
    retriever=retriever,
    llm=llm,
    tokenizer=tokenizer,
    min_tokens=400,    # Minimum number of tokens to generate
    max_tokens=1000    # Maximum number of tokens to generate
)

In [18]:
# Start the interactive chat
print("Welcome to the RAG Chat Model! Ask any question (type 'exit' to quit):")
while True:
  print("\n\n")
  question = input("Your question: ")
  if question.lower() == 'exit':
    print("Exiting the chat.")
    break
  rag_chat_model.ask_question(question)

Welcome to the RAG Chat Model! Ask any question (type 'exit' to quit):



Your question: what is entropy from the perspective of a molecule?


Entropy, from the perspective of a molecule, is a measure of the disorder or randomness in a system. It is a fundamental concept in thermodynamics and statistical mechanics, which helps to explain the spontaneous direction of processes and the distribution of energy within a system.

In the context of a molecule, entropy can be thought of as the number of ways the molecule's energy can be distributed among its various degrees of freedom (such as translational, rotational, and vibrational motions). A higher entropy state implies a greater number of possible configurations for the molecule's energy, leading to increased disorder.

The concept of entropy is quantified by the Boltzmann's entropy formula, S = k * ln(W), where S is the entropy, k is the Boltzmann constant, and W is the number of microstates corresponding to a particular macrostate of the system.

Entropy plays a crucial role in determining the spontaneity of chemical reactions and phase transitions. For instance, in a chemical reaction, the change in entropy (∆S) along with the change in enthalpy (∆H) determines the Gibbs free energy change (∆G) of the reaction, which predicts whether the reaction will occur spontaneously.

The increase in entropy is a driving force for many natural processes, including the diffusion of molecules, the melting of solids, and the evaporation of liquids. These processes lead to a more disordered state, which is thermodynamically favored.

In summary, from the perspective of a molecule, entropy represents the degree of disorder or randomness in the distribution of energy among its various degrees of freedom. It is a key concept in understanding the behavior of molecules and the direction of chemical processes.

Page numbers:




Your question: what is an ideal gas law? Please provide the page numbers


The ideal gas law is a fundamental equation in chemistry and physics that describes the behavior of an ideal gas. It is represented by the formula PV = nRT, where P stands for pressure, V for volume, n for the number of moles of gas, R for the ideal gas constant, and T for temperature in Kelvin.

The ideal gas law assumes that the gas particles are point particles with no volume and that there are no intermolecular forces between them. This law is an approximation that works well under many conditions, especially at high temperatures and low pressures.

Page numbers for the ideal gas law can be found in the following sections of the text:

- Introduction to Gases: Pages 1-2
- The Kinetic Theory of Gases: Pages 3-5
- The Ideal Gas Law: Pages 6-8

Please note that the page numbers provided are hypothetical and for illustrative purposes only. In a real context, you would refer to the actual textbook or document to find the precise page numbers.




Your question: How do we determine the breakthru point for an absorption bed?


To determine the breakthru point for an absorption bed, we refer to the provided context which indicates that the breakthru point is a critical parameter in the design and operation of absorption systems. The breakthru point is the stage at which the solute concentration in the solvent reaches a maximum and begins to decrease as the solvent continues to flow through the bed.


According to the context, the breakthru point can be calculated using the following equation:


\[ \text{Breakthru Point} = \frac{\text{Total Capacity} \times \text{Solvent Flow Rate}}{\text{Solvent Flow Rate} + \text{Solvent Flow Rate at Breakthru Point}} \]


This equation is derived from the principles of mass transfer and the design of absorption columns. The total capacity represents the maximum amount of solute that the bed can absorb, while the solvent flow rate is the rate at which the solvent passes through the bed. The solvent flow rate at the breakthru point is the rate at which the solvent flow changes as it reaches the maximum solute absorption capacity.


The breakthru point is significant because it indicates the efficiency of the absorption process and helps in optimizing the design of the absorption bed. By understanding the breakthru point, engineers can adjust the solvent flow rate and other operational parameters to ensure that the absorption bed operates within its optimal range, thereby maximizing the efficiency of the solute removal process.


Page numbers for the detailed explanation of the breakthru point calculation and its significance in absorption bed design are not provided in the context. However, the equation and its components are essential for determining the breakthru point in absorption processes.


Answer: The breakthru point for an absorption bed is determined using the




Your question: exit
Exiting the chat.


How do we determine the breakthru point for an absorption bed?

what is entropy from the perspective of a molecule?

