In [22]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import OpenAI
from langchain.document_loaders import TextLoader, PDFMinerLoader, CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain_community.vectorstores import Weaviate
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
import torch

import os

SOURCE_DIRECTORY = "source_docs/8-tsne.pdf"
WEAVIATE_URL = "https://exactas-guru-cluster-l99i5920.weaviate.network"

In [2]:
print(f"Loading documents from {SOURCE_DIRECTORY}")
loader = PDFMinerLoader(SOURCE_DIRECTORY)
document = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(document)
print(f"Split into {len(texts)} chunks of text")

Loading documents from source_docs/8-tsne.pdf
Split into 107 chunks of text


In [3]:
embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

load INSTRUCTOR_Transformer
max_seq_length  512


In [4]:
import weaviate

client = weaviate.Client(
    url=WEAVIATE_URL, 
    auth_client_secret=weaviate.AuthApiKey(api_key="8Y9ZhRo1dVPjuXgsheQcZPd1DGaNtjTHvFaU")
)

            Please consider upgrading to the latest version. See https://weaviate.io/developers/weaviate/client-libraries/python for details.


In [5]:
vectorstore = Weaviate.from_documents(
    texts, embeddings, client=client, by_text=False
)

In [6]:
question = "What is the t-SNE algorithm?"

In [7]:
docs = vectorstore.similarity_search(question, k=3)

In [11]:
print(docs[2].page_content)

3.4 Optimization Methods for t-SNE

We start by presenting a relatively simple, gradient descent procedure for optimizing the t-SNE cost
function. This simple procedure uses a momentum term to reduce the number of iterations required
and it works best if the momentum term is small until the map points have become moderately well
organized. Pseudocode for this simple algorithm is presented in Algorithm 1. The simple algorithm
can be sped up using the adaptive learning rate scheme that is described by Jacobs (1988), which
gradually increases the learning rate in directions in which the gradient is stable.


In [26]:
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=torch.float16,
    trust_remote_code=True,
    device_map="auto", 
    offload_folder="save_folder"
)

generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.top_p = 0.95
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15

pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    generation_config=generation_config,
)

ImportError: Using `low_cpu_mem_usage=True` or a `device_map` requires Accelerate: `pip install accelerate`

In [13]:
llm = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")

Loading checkpoint shards: 100%|██████████| 2/2 [09:16<00:00, 278.36s/it]


In [14]:
from langchain.chains import RetrievalQA

In [17]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever()
)

ValidationError: 2 validation errors for LLMChain
llm
  instance of Runnable expected (type=type_error.arbitrary_type; expected_arbitrary_type=Runnable)
llm
  instance of Runnable expected (type=type_error.arbitrary_type; expected_arbitrary_type=Runnable)

In [18]:
result = qa_chain({"query": question})

NameError: name 'qa_chain' is not defined

In [9]:
model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")

Loading checkpoint shards: 100%|██████████| 2/2 [08:39<00:00, 259.82s/it]
tokenizer_config.json: 100%|██████████| 967/967 [00:00<00:00, 87.8kB/s]
tokenizer.model: 100%|██████████| 493k/493k [00:00<00:00, 985kB/s]
tokenizer.json: 100%|██████████| 1.80M/1.80M [00:00<00:00, 5.63MB/s]
special_tokens_map.json: 100%|██████████| 72.0/72.0 [00:00<00:00, 122kB/s]


In [10]:
prompt = "Hey, are you conscious? Can you talk to me?"
inputs = tokenizer(prompt, return_tensors="pt")

In [11]:
generate_ids = model.generate(inputs.input_ids, max_length=30)
tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [14]:
# chain = RetrievalQAWithSourcesChain.from_chain_type(
    # llm, chain_type="stuff", retriever=vectorstore.as_retriever()
# )

In [10]:
from langchain_core.prompts import ChatPromptTemplate

template = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

print(prompt)

input_variables=['context', 'question'] messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:\n"))]


In [21]:
# from langchain_core.output_parsers import StrOutputParser
# from langchain_core.runnables import RunnablePassthrough

# rag_chain = (
#     {"context": vectorstore.as_retriever(), "question": RunnablePassthrough()}
#     | prompt
#     | llm
#     | StrOutputParser()
# )

# rag_chain.invoke({"input" : "What is t-sne?"})

In [22]:
llm.invoke({"input" : "Hello"})

AttributeError: 'MistralForCausalLM' object has no attribute 'invoke'