In [1]:
%load_ext autoreload
%autoreload 2

### Necessary imports

In [2]:
!pip install -q -U torch datasets transformers tensorflow langchain playwright html2text sentence_transformers faiss-cpu
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 trl==0.4.7

### Dependencies

In [1]:
import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from datasets import load_dataset
from peft import LoraConfig, PeftModel

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline

2023-12-31 00:32:28.905193: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-31 00:32:28.956729: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-31 00:32:28.956766: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-31 00:32:28.958157: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-31 00:32:28.966585: I tensorflow/core/platform/cpu_feature_guar

### Load quantized Mistal 7B

In [2]:
#################################################################
# Tokenizer
#################################################################

model_name='mistralai/Mistral-7B-Instruct-v0.2'

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

#################################################################
# Load pre-trained config
#################################################################
mistral_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

Your GPU supports bfloat16: accelerate training with bf16=True


config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

### Count number of trainable parameters

In [3]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(mistral_model))

trainable model parameters: 262410240
all model parameters: 3752071168
percentage of trainable model parameters: 6.99%


### Build Mistral text generation pipelines

In [4]:
standalone_query_generation_pipeline = pipeline(
 model=mistral_model,
 tokenizer=tokenizer,
 task="text-generation",
 temperature=0.0,
 repetition_penalty=1.1,
 return_full_text=True,
 max_new_tokens=1000,
)
standalone_query_generation_llm = HuggingFacePipeline(pipeline=standalone_query_generation_pipeline)

response_generation_pipeline = pipeline(
 model=mistral_model,
 tokenizer=tokenizer,
 task="text-generation",
 temperature=0.2,
 repetition_penalty=1.1,
 return_full_text=True,
 max_new_tokens=1000,
)
response_generation_llm = HuggingFacePipeline(pipeline=response_generation_pipeline)

### Load and chunk documents. Load chunked documents into FAISS index 

In [None]:
!playwright install 
!playwright install-deps 

In [5]:
import nest_asyncio
nest_asyncio.apply()

# Articles to index
articles = ["https://www.fantasypros.com/2023/12/fantasy-football-panic-meter-patrick-mahomes-austin-ekeler-stefon-diggs-travis-etienne/",]

# Scrapes the blogs above
loader = AsyncChromiumLoader(articles)
docs = loader.load()

In [11]:
# Converts HTML to plain text 
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=800, 
                                      chunk_overlap=0)
chunked_documents = text_splitter.split_documents(docs_transformed)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents, 
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever(k = 1)

Created a chunk of size 4148, which is longer than the specified 800
Created a chunk of size 898, which is longer than the specified 800


### Create PromptTemplate and LLMChain

In [12]:
from langchain.schema import format_document
from langchain_core.messages import get_buffer_string
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain.memory import ConversationBufferMemory
from langchain.prompts.prompt import PromptTemplate
from langchain_core.prompts.chat import ChatPromptTemplate

from operator import itemgetter

In [13]:
_template = """
[INST] 
Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language, that can be used to query a FAISS index. This query will be used to retrieve documents with additional context. 

Let me share a couple examples that will be important. 

If you do not see any chat history, you MUST return the "Follow Up Input" as is:

```
Chat History:

Follow Up Input: How is Lawrence doing?
Standalone Question:
How is Lawrence doing?
```

If this is the second question onwards, you should properly rephrase the question like this:

```
Chat History:
Human: How is Lawrence doing?
AI: 
Lawrence is injured and out for the season.

Follow Up Input: What was his injurt?
Standalone Question:
What was Lawrence's injury?
```

Now, with those examples, here is the actual chat history and input question.

Chat History:
{chat_history}

Follow Up Input: {question}
Standalone question:
[your response here]
[/INST] 
"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

In [14]:
template = """
[INST] 
Answer the question based only on the following context:
{context}

Question: {question}
[/INST] 
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

In [15]:
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")

def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)

In [16]:
# Instantiate ConversationBufferMemory
memory = ConversationBufferMemory(
 return_messages=True, output_key="answer", input_key="question"
)

# First we add a step to load memory
# This adds a "memory" key to the input object
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)
# Now we calculate the standalone question
standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: get_buffer_string(x["chat_history"]),
    }
    | CONDENSE_QUESTION_PROMPT
    | standalone_query_generation_llm,
}
# Now we retrieve the documents
retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "question": lambda x: x["standalone_question"],
}
# Now we construct the inputs for the final prompt
final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "question": itemgetter("question"),
}
# And finally, we do the part that returns the answers
answer = {
    "answer": final_inputs | ANSWER_PROMPT | response_generation_llm,
    "question": itemgetter("question"),
    "context": final_inputs["context"]
}
# And now we put it all together!
final_chain = loaded_memory | standalone_question | retrieved_documents | answer

In [17]:
def call_conversational_rag(question, chain, memory):
    """
    Calls a conversational RAG (Retrieval-Augmented Generation) model to generate an answer to a given question.

    This function sends a question to the RAG model, retrieves the answer, and stores the question-answer pair in memory 
    for context in future interactions.

    Parameters:
    question (str): The question to be answered by the RAG model.
    chain (LangChain object): An instance of LangChain which encapsulates the RAG model and its functionality.
    memory (Memory object): An object used for storing the context of the conversation.

    Returns:
    dict: A dictionary containing the generated answer from the RAG model.
    """
    
    # Prepare the input for the RAG model
    inputs = {"question": question}

    # Invoke the RAG model to get an answer
    result = chain.invoke(inputs)
    
    # Save the current question and its answer to memory for future context
    memory.save_context(inputs, {"answer": result["answer"]})
    
    # Return the result
    return result

In [18]:
question = "how is maholmes doing?"
call_conversational_rag(question, final_chain, memory)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'answer': 'In the context provided, Patrick Mahomes, the quarterback for the Kansas City Chiefs, is currently ranked as QB10 for the 2023 fantasy football season. He has finished with less than 17 points in seven out of his last eight games. However, despite his recent struggles, he still offers a high upside due to his talent. There are other quarterbacks, such as Baker Mayfield and Joe Flacco, who have shown a better floor lately. Mayfield is averaging 22.9 PPG over his last three games, while Flacco is averaging 351 pass YPG and 20.6 PPG during the same period.',
 'question': "Standalone Question:\nHow is Mahomes doing?\n\n(Note: Mahomes is a common name, so it's essential to specify which Mahomes is being referred to if there are multiple options.)",
 'context': '|\n\n2 min read\n\nNext Up - **Fantasy Football NFL Week 17 Injury Report & Outlook (2023)**\n\nNext Article  \n\nThis website uses cookies to provide basic functionality, enhance user\nexperience, and to analyze performa

In [19]:
question = "Who are some good alternatives to him?"
call_conversational_rag(question, final_chain, memory)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'answer': "Based on the context provided, two quarterbacks who have shown better performance than Patrick Mahomes recently are Baker Mayfield and Joe Flacco. Mayfield is averaging 22.9 points per game (PPG) over his last three games, while Flacco is averaging 351 passing yards per game (YPG) and 20.6 PPG over the same period. These numbers suggest that Mayfield and Flacco may provide a higher floor compared to Mahomes' inconsistent performances. However, it's essential to consider other factors such as team situation, opponent matchups, and personal preferences before making any decisions.",
 'question': 'Which quarterbacks have shown better performance than Patrick Mahomes recently, and could be considered as potential alternatives?',
 'context': '**Patrick Mahomes (QB – KC)| Panic Meter: 3 **\n\nSooner or later the Chiefs have to figure it out, right? We’ve been asking\nourselves this question for 8 weeks now and nothing has changed. In week 16,\nMahomes finished with less than 17 p

In [20]:
question = "How many PPG are both averaging?"
call_conversational_rag(question, final_chain, memory)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'answer': "Baker Mayfield's average number of points per game (PPG) over his last three games is 22.9.\nJoe Flacco's average number of points per game (PPG) over his last three games is 20.6.",
 'question': 'What is the average number of points per game (PPG) for both Baker Mayfield and Joe Flacco?',
 'context': '**Patrick Mahomes (QB – KC)| Panic Meter: 3 **\n\nSooner or later the Chiefs have to figure it out, right? We’ve been asking\nourselves this question for 8 weeks now and nothing has changed. In week 16,\nMahomes finished with less than 17 points for the seventh time in his last\neight games. He is QB10 on the season. No QB will offer you the same upside as\nMahomes, but there are several likely-available players that have displayed a\nmuch higher floor as of late. Consider these options:\n\n  * Baker Mayfield\n\n(QB – TB): averaging 22.9 PPG over his last three games.\n\n  * Joe Flacco\n\n(QB – CLE): averaging 351 pass YPG and 20.6 PPG over his last three games.\n\n**Austin E

In [21]:
question = "Who did I originally ask about?"
call_conversational_rag(question, final_chain, memory)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'answer': 'The original quarterback mentioned in the conversation was Patrick Mahomes.',
 'question': 'Who was the original quarterback mentioned in the conversation?',
 'context': '|\n\n2 min read\n\nNext Up - **Fantasy Football NFL Week 17 Injury Report & Outlook (2023)**\n\nNext Article  \n\nThis website uses cookies to provide basic functionality, enhance user\nexperience, and to analyze performance and traffic. We also share information\nabout your use of our site with our social media, advertising, and analytics\npartners.  \n  \nBy using this website you agree to our Terms of Use.\n\nDo Not Sell My Personal Information Accept Cookies\n\n__ Follow\n\n## More Articles\n\n### Fantasy Football NFL Week 17 Injury Report & Outlook (2023)\n\nby **Deepak Chona - MD** | 2 min read\n\n### Fantasy Football Week 17 Rankings, Grades & Start/Sit Advice (2023)\n\nby **FantasyPros Staff** | 15+ min read\n\n### NFL DFS Week 17 Stacking Advice & Picks (2023 Fantasy Football)\n\nby **Joe Pepe** |