In [4]:
!pip install -q torch datasets
!pip install -q accelerate==0.21.0 \
                peft==0.4.0 \
                bitsandbytes==0.40.2 \
                transformers==4.31.0 \
                trl==0.4.7

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
!pip install -U transformers

In [38]:
import transformers
from transformers import AutoTokenizer, AutoConfig

model_identifier = 'mistralai/Mistral-7B-Instruct-v0.1'
config = AutoConfig.from_pretrained(model_identifier)
text_tokenizer = AutoTokenizer.from_pretrained(model_identifier, trust_remote_code=True)
text_tokenizer.pad_token = text_tokenizer.eos_token
text_tokenizer.padding_side = "right"

In [39]:
import torch
from transformers import BitsAndBytesConfig

enable_4bit_loading = True
float_precision_dtype = "float16"
quantization_format = "nf4"
enable_double_quantization = False
dtype_object = getattr(torch, float_precision_dtype)
quantization_config = BitsAndBytesConfig(
    load_in_4bit=enable_4bit_loading,
    bnb_4bit_quant_type=quantization_format,
    bnb_4bit_compute_dtype=dtype_object,
    bnb_4bit_use_double_quant=enable_double_quantization,
)

In [40]:
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(
    model_identifier,
    quantization_config=quantization_config,
)

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.


In [None]:
!pip install langchain

In [41]:
from langchain.llms import HuggingFacePipeline

text_generation_pipeline = transformers.pipeline(
    model=model,
    tokenizer=text_tokenizer,
    task="text-generation",
    temperature=0.21,
    repetition_penalty=1.15,
    return_full_text=True,
    max_new_tokens=350,
)

mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [None]:
!pip install sentence-transformers

In [None]:
!pip install chromadb

In [None]:
!pip install openai

In [None]:
!pip install tiktoken

In [None]:
from dataclasses import dataclass
import os
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
import pandas as pd
import time
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate
PROMPT_TEXT = """
[INST]
Please answer the question based only on the following context:

{context}

---

Answer the question based on the above context:

{question}

A. {A}
B. {B}
C. {C}
D. {D}

Only write one letter for the answer: A, B, C, or D
[/INST]
"""
#os.environ['OPENAI_API_KEY'] = # Entire key here
CHROMA_DB_PATH = "biology_lecture_notes_chroma2"
start_time = time.time()
embedding_func = OpenAIEmbeddings()
chroma_db = Chroma(persist_directory=CHROMA_DB_PATH, embedding_function=embedding_func)
df = pd.read_csv('college_biology_test.csv')
accuracy = 0
for index in df.index:
    query = df['Question'][index]
    #print("query ", query)
    choice_a = df['A'][index]
    choice_b = df['B'][index]
    choice_c = df['C'][index]
    choice_d = df['D'][index]
    # perform vector search
    results = chroma_db.similarity_search_with_relevance_scores(query, k = 3)
    #prompt_template = ChatPromptTemplate.from_template(PROMPT_TEXT)
    context = "\n\n - - - \n\n".join([doc.page_content for doc, _ in results])
    prompt = PromptTemplate(
      input_variables=["context", "question","A", "B", "C", "D"],
      template=PROMPT_TEXT,
    )
    llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)
    response = llm_chain.invoke({"context":context,
                      "question": query,
                      "A":choice_a,
                      "B":choice_b,
                      "C":choice_c,
                      "D":choice_d,})
    #prompt = prompt_template.format(context=context, question=query, A=choice_a, B=choice_b, C=choice_c, D=choice_d)
    #print("prompt ", prompt)
    #sources = [doc.metadata for doc, _ in results]
    #response = mistral_llm.predict(prompt)
    letter_response = response["context"][-2]
    print("Response ", response)
    print("first letter ", letter_response)
    label = df['Label'][index]
    print("label ", label)
    if letter_response == label:
        accuracy += 1
        print("accuracy increase ", accuracy)
end_time = time.time()
execution_time = end_time - start_time
print(f"Execution time: {execution_time} seconds")
print("accuracy ", accuracy/df.shape[0])