In [17]:
import os
import torch
import gradio as gr

from textwrap import fill
from IPython.display import Markdown, display

from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
    )

from langchain import PromptTemplate
from langchain import HuggingFacePipeline

from langchain.vectorstores import Chroma
from langchain_community.vectorstores import Weaviate
from langchain.schema import AIMessage, HumanMessage
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredMarkdownLoader, UnstructuredURLLoader, PDFMinerLoader
from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain

from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline

import warnings
warnings.filterwarnings('ignore')

In [35]:
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=torch.float32,
    trust_remote_code=True,
    device_map="cpu", 
    offload_folder="save_folder"
)

generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.top_p = 0.95
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15

pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    generation_config=generation_config,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
llm = HuggingFacePipeline(
    pipeline=pipeline,
    )

In [11]:
SOURCE_DIRECTORY = "source_docs/8-tsne.pdf"
WEAVIATE_URL = "https://exactas-guru-cluster-l99i5920.weaviate.network"

In [12]:
print(f"Loading documents from {SOURCE_DIRECTORY}")
loader = PDFMinerLoader(SOURCE_DIRECTORY)
document = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(document)
print(f"Split into {len(texts)} chunks of text")

Loading documents from source_docs/8-tsne.pdf
Split into 107 chunks of text


In [15]:
embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

load INSTRUCTOR_Transformer
max_seq_length  512


In [29]:
embeddings2 = HuggingFaceEmbeddings(
    model_name="thenlper/gte-large",
    model_kwargs={"device": "cpu"},
    encode_kwargs={"normalize_embeddings": True},
)

In [30]:
db = Chroma.from_documents(texts, embeddings2, persist_directory="db")

In [16]:
import weaviate

client = weaviate.Client(
    url=WEAVIATE_URL, 
    auth_client_secret=weaviate.AuthApiKey(api_key="8Y9ZhRo1dVPjuXgsheQcZPd1DGaNtjTHvFaU")
)

In [18]:
vectorstore = Weaviate.from_documents(
    texts, embeddings, client=client, by_text=False
)

In [19]:
question = "What is the t-SNE algorithm?"

In [31]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 2}),
    return_source_documents=True
)

In [32]:
result_ = qa_chain(
    question
)
result = result_["result"].strip()

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [21]:
template = """
[INST] <>
Act as a Machine Learning engineer who is teaching high school students.
<>

{text} [/INST]
"""

prompt = PromptTemplate(
    input_variables=["text"],
    template=template,
)

In [24]:
query = "Explain what the 't' in t-sne stands for in 2-3 sentences"
result = llm(prompt.format(text=query))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
