### Imports

In [12]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig
import transformers

import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

  _builtin_ops.append((v, "aten::" + name))
2023-10-16 01:17:34.308425: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-16 01:17:35.534095: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Load and test out mistral-7b

In [13]:
model_name='mistralai/Mistral-7B-Instruct-v0.1'

model_config = transformers.AutoConfig.from_pretrained(
    model_name,
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [16]:
################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

In [17]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

Your GPU supports bfloat16: accelerate training with bf16=True


In [18]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [21]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 262410240
all model parameters: 3752071168
percentage of trainable model parameters: 6.99%


In [22]:
device = 'cuda'

messages = [
    {"role": "user", "content": "What can you tell me about the 2021 fantasy football season?"},
    {"role": "assistant", "content": """The 2021 fantasy football season began on August 17th, 2021, with the first games of the NFL Preseason. The regular season started on September 11th, 2021 and ended on January 9th, 2022. The Super Bowl LVI, which was the 56th Super Bowl, was held on February 6th, 2022, at SoFi Stadium in Los Angeles, California.

Due to the COVID-19 pandemic, the 2021 fantasy football season was played with some changes, including a reduced preseason schedule, the elimination of in-person attendance at games, and the use of virtual meetings for teams.

In terms of fantasy football leagues, many leagues opted to use a PPR (Point Pertinent) format, which rewards players for receiving passing yards in addition to rushing yards. The use of PPR has become increasingly popular in recent years, as it provides more opportunities for players to score points.

As for notable players in the 2021 fantasy football season, some of the top performers included:

* Quarterbacks: Patrick Mahomes of the Kansas City Chiefs, Justin Herbert of the Los Angeles Chargers, and Tom Brady of the Tampa Bay Buccaneers.
* Running Backs: Christian McCaffery of the Carolina Panthers, Alvin Kamara of the New Orleans Saints, and Dalvin Cook of the Minnesota Vikings.
* Wide Receivers: Tyreek Hill of the Kansas City Chiefs, Stefon Diggs of the Buffalo Bills, and DeAndre Hopkins of the Arizona Cardinals.
* Tight Ends: Travis Kelce of the Kansas City Chiefs, George Kittle of the San Francisco 49ers, and Tyler Higbee of the Los Angeles Rams.

In terms of team performance, the Tampa Bay Buccaneers won the Super Bowl LVI, defeating the Kansas City Chiefs 31-9. The Green Bay Packers finished the regular season with the best record (13-3), but were eliminated in the divisional round of the playoffs by the Los Angeles Rams. The Oakland Raiders, who moved to Las Vegas in 2020, had a strong season, finishing with a record of 10-6 and making it to the playoffs for the first time since 2016."""},
    {"role": "user", "content": "How many points did Maholmes score that season?"},
    {"role": "assistant", "content": "Patrick Mahomes, the quarterback for the Kansas City Chiefs, scored a total of 456 points in the regular season of the 2021 fantasy football season. This made him the top scoring quarterback in the PPR (Point Pertinent) format, which rewards players for receiving passing yards in addition to rushing yards."},
    {"role": "user", "content": "Can you tell me how many points he scored in week 3 of that season?"},
    {"role": "assistant", "content": "Patrick Mahomes scored 35 points in week 3 of the 2021 fantasy football season. This was a strong performance, as Mahomes completed 18 of 29 passes for 283 yards and three touchdowns in a win over the Indianapolis Colts."},
    {"role": "user", "content": "Did anyone score more than him in that week?"}
]

encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")

model_inputs = encodeds.to(device)
model

generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])

Using sep_token, but it is not set yet.
Using cls_token, but it is not set yet.
Using mask_token, but it is not set yet.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<s> [INST] What can you tell me about the 2021 fantasy football season? [/INST]The 2021 fantasy football season began on August 17th, 2021, with the first games of the NFL Preseason. The regular season started on September 11th, 2021 and ended on January 9th, 2022. The Super Bowl LVI, which was the 56th Super Bowl, was held on February 6th, 2022, at SoFi Stadium in Los Angeles, California.

Due to the COVID-19 pandemic, the 2021 fantasy football season was played with some changes, including a reduced preseason schedule, the elimination of in-person attendance at games, and the use of virtual meetings for teams.

In terms of fantasy football leagues, many leagues opted to use a PPR (Point Pertinent) format, which rewards players for receiving passing yards in addition to rushing yards. The use of PPR has become increasingly popular in recent years, as it provides more opportunities for players to score points.

As for notable players in the 2021 fantasy football season, some of the top

### RAG RAG RAG

In [23]:
from operator import itemgetter

from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.chains import LLMChain
from langchain.llms import HuggingFacePipeline
from langchain.vectorstores import FAISS

Example ChatPromptTemplate

```python
from langchain.prompts import ChatPromptTemplate

template = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI bot. Your name is {name}."),
    ("human", "Hello, how are you doing?"),
    ("ai", "I'm doing well, thanks!"),
    ("human", "{user_input}"),
])

messages = template.format_messages(
    name="Bob",
    user_input="What is your name?"
)
```

In [94]:
text_generation_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,
)

text_generation_pipeline_0_temp = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.0,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,
)

In [95]:
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

mistral_llm_0_temp = HuggingFacePipeline(pipeline=text_generation_pipeline_0_temp)

In [96]:
prompt_template = """
### [INST] Instruction: Answer the question based on your fantasy football knowledge. Here is context to help:

{context}

### QUESTION:
{question} [/INST]
 """

In [97]:
# Create prompt from prompt template 
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain 
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

In [98]:
from langchain.document_loaders import AsyncChromiumLoader, UnstructuredURLLoader, PlaywrightURLLoader
from langchain.document_loaders.blob_loaders.schema import Blob
from langchain.document_loaders.parsers.txt import TextParser
from langchain.document_loaders.url_selenium import SeleniumURLLoader
from langchain.document_transformers import Html2TextTransformer
from langchain.text_splitter import CharacterTextSplitter

In [33]:
test_document = """
ALLEN PARK, Mich. -- Detroit Lions running back Jahmyr Gibbs has been ruled out for Sunday's game against the Tampa Bay Buccaneers with a hamstring injury while Pro Bowl receiver Amon-Ra St. Brown (abdomen) is set to make his return.

Gibbs, who did not practice this week, also missed last week's 42-24 victory over the Carolina Panthers with the same injury. With 179 yards and a touchdown with an additional 70 receiving yards in his four appearances, he is Detroit's second-leading rusher.

Veteran running back David Montgomery has carried the load in his absence, with back-to-back games of at least 100 yards rushing. Detroit has rushed for nine touchdowns through the first five games of the season, tying a team record.

Although expectations are high for Gibbs as the Lions' No. 12 overall selection, running back coach Scottie Montgomery is helping him develop at his own pace.

"He's right where I thought he would be, he's right where he should be coming along at at this time," Scottie Montgomery said.

"We don't judge players based off of anything else besides achievement. Like there is success and there is expectations and I want people outside the building to put those expectations on him because, guess what, whether we believe it or not it motivates him. It does and that's a really, really good tool."

Meanwhile, rookie safety Brian Branch, who has an ankle injury, is also set to miss his second consecutive game. The second-round pick out of Alabama was held out of practice for the entire week. With 25 tackles, a pick-six and 4 passes defended, he is Detroit's fourth-leading tackler.

Rookie tight end Sam LaPorta is questionable with a calf strain that flared up Wednesday, forcing him to miss Thursday's practice. He said it likely stemmed from Sunday's win over Carolina, and he intends to play Sunday.

"It's football, things happen, we certainly work our bodies pretty hard so I'm feeling a lot better today," LaPorta said.
"""

In [34]:
raw_documents = TextParser().parse(Blob(data=test_document))

In [36]:
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
db = FAISS.from_documents(documents, HuggingFaceEmbeddings())

Created a chunk of size 234, which is longer than the specified 100
Created a chunk of size 258, which is longer than the specified 100
Created a chunk of size 233, which is longer than the specified 100
Created a chunk of size 158, which is longer than the specified 100
Created a chunk of size 126, which is longer than the specified 100
Created a chunk of size 307, which is longer than the specified 100
Created a chunk of size 286, which is longer than the specified 100
Created a chunk of size 220, which is longer than the specified 100


```python
HuggingFaceEmbeddings(
    *,
    client: Any = None,
    model_name: str = 'sentence-transformers/all-mpnet-base-v2',
    cache_folder: Optional[str] = None,
    model_kwargs: Dict[str, Any] = None,
    encode_kwargs: Dict[str, Any] = None,
    multi_process: bool = False,
) -> None
```

In [37]:
retriever = db.as_retriever()

In [38]:
chain = ( 
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

In [39]:
query = "Did Gibbs start this week?"
docs = db.similarity_search(query)
print(docs[0].page_content)

Gibbs, who did not practice this week, also missed last week's 42-24 victory over the Carolina Panthers with the same injury. With 179 yards and a touchdown with an additional 70 receiving yards in his four appearances, he is Detroit's second-leading rusher.


In [40]:
chain.invoke("Why didn't Gibbs practice this week?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': [Document(page_content="Gibbs, who did not practice this week, also missed last week's 42-24 victory over the Carolina Panthers with the same injury. With 179 yards and a touchdown with an additional 70 receiving yards in his four appearances, he is Detroit's second-leading rusher.", metadata={'source': None}),
  Document(page_content="ALLEN PARK, Mich. -- Detroit Lions running back Jahmyr Gibbs has been ruled out for Sunday's game against the Tampa Bay Buccaneers with a hamstring injury while Pro Bowl receiver Amon-Ra St. Brown (abdomen) is set to make his return.", metadata={'source': None}),
  Document(page_content="Although expectations are high for Gibbs as the Lions' No. 12 overall selection, running back coach Scottie Montgomery is helping him develop at his own pace.", metadata={'source': None}),
  Document(page_content="Meanwhile, rookie safety Brian Branch, who has an ankle injury, is also set to miss his second consecutive game. The second-round pick out of Alaba

#### Let's try to load the same article with LangChains web crawler loaders

In [41]:
import nest_asyncio
nest_asyncio.apply()

In [42]:
loader = AsyncChromiumLoader(["https://www.espn.com/nfl/story/_/id/38652877/lions-rb-jahmyr-gibbs-wr-amon-ra-st-brown-return-vs-bucs"])
docs = loader.load()

In [43]:
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

In [44]:
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
documents = text_splitter.split_documents(docs_transformed)
db = FAISS.from_documents(documents, HuggingFaceEmbeddings())

Created a chunk of size 192, which is longer than the specified 100
Created a chunk of size 274, which is longer than the specified 100
Created a chunk of size 103, which is longer than the specified 100
Created a chunk of size 161, which is longer than the specified 100
Created a chunk of size 1743, which is longer than the specified 100
Created a chunk of size 1985, which is longer than the specified 100
Created a chunk of size 121, which is longer than the specified 100
Created a chunk of size 195, which is longer than the specified 100
Created a chunk of size 343, which is longer than the specified 100
Created a chunk of size 103, which is longer than the specified 100
Created a chunk of size 1685, which is longer than the specified 100
Created a chunk of size 1649, which is longer than the specified 100
Created a chunk of size 117, which is longer than the specified 100
Created a chunk of size 421, which is longer than the specified 100
Created a chunk of size 410, which is longer

What do these documents look like?


In [48]:
documents[:5]

[Document(page_content='Skip to main content  Skip to navigation', metadata={'source': 'https://www.espn.com/nfl/story/_/id/38652877/lions-rb-jahmyr-gibbs-wr-amon-ra-st-brown-return-vs-bucs'}),
 Document(page_content="Top Events  NCAAF  MLB  NFL  WNBA  F1  NHL  NBA  PGA Tour  NASCAR  Tennis (W)\nLPGA Tour  NCAAM  Tennis (M)  UFC  NCAAW  Top Soccer  PLL (Lacrosse)  ICC\nCricket World Cup  Men's NCAA Ice Hockey", metadata={'source': 'https://www.espn.com/nfl/story/_/id/38652877/lions-rb-jahmyr-gibbs-wr-amon-ra-st-brown-return-vs-bucs'}),
 Document(page_content='NFL', metadata={'source': 'https://www.espn.com/nfl/story/_/id/38652877/lions-rb-jahmyr-gibbs-wr-amon-ra-st-brown-return-vs-bucs'}),
 Document(page_content="* Top Events \n  * NCAAF \n  * MLB \n  * NFL \n  * WNBA \n  * F1 \n  * NHL \n  * NBA \n  * PGA Tour \n  * NASCAR \n  * Tennis (W) \n  * LPGA Tour \n  * NCAAM \n  * Tennis (M) \n  * UFC \n  * NCAAW \n  * Top Soccer \n  * PLL (Lacrosse) \n  * ICC Cricket World Cup \n  * Men's NC

Super noisy but let's still see how it performs?

In [62]:
chain.invoke("What was Gibbs injury?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': [Document(page_content="Gibbs, who did not practice this week, also missed last week's 42-24 victory over the Carolina Panthers with the same injury. With 179 yards and a touchdown with an additional 70 receiving yards in his four appearances, he is Detroit's second-leading rusher.", metadata={'source': None}),
  Document(page_content="ALLEN PARK, Mich. -- Detroit Lions running back Jahmyr Gibbs has been ruled out for Sunday's game against the Tampa Bay Buccaneers with a hamstring injury while Pro Bowl receiver Amon-Ra St. Brown (abdomen) is set to make his return.", metadata={'source': None}),
  Document(page_content="Although expectations are high for Gibbs as the Lions' No. 12 overall selection, running back coach Scottie Montgomery is helping him develop at his own pace.", metadata={'source': None}),
  Document(page_content="Meanwhile, rookie safety Brian Branch, who has an ankle injury, is also set to miss his second consecutive game. The second-round pick out of Alaba

Sweet, still gives the right answer! This may cause issues as we add more docs

### Conversational Retrieval Chain

In [107]:
from langchain.schema.runnable import RunnableMap
from langchain.schema import format_document

In [108]:
from langchain.prompts.prompt import PromptTemplate

_template = """[INST] Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:
[/INST]"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

In [121]:
template = """[INST]Answer the question based only on the following context and chat history:
Context:
{context}

Chat History:
{chat_history}

Question: {question}
[/INST]
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

In [122]:
# Create llm chain 
llm_chain_cq = LLMChain(llm=mistral_llm_0_temp, prompt=CONDENSE_QUESTION_PROMPT)

# Create llm chain 
llm_chain_a = LLMChain(llm=mistral_llm, prompt=ANSWER_PROMPT)

In [123]:
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")
def _combine_documents(docs, document_prompt = DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)

In [112]:
from typing import Tuple, List
def _format_chat_history(chat_history: List[Tuple]) -> str:
    buffer = ""
    for dialogue_turn in chat_history:
        human = "Human: " + dialogue_turn[0]
        ai = "Assistant: " + dialogue_turn[1]
        buffer += "\n" + "\n".join([human, ai])
    return buffer

In [127]:
RunnableMap(
    standalone_question=RunnablePassthrough.assign(
        chat_history=lambda x: _format_chat_history(x['chat_history']))
    )   

{
  standalone_question: RunnableAssign(mapper={
                         chat_history: RunnableLambda(...)
                       })
}

In [136]:
_inputs = RunnableMap(
    standalone_question=RunnablePassthrough.assign(
        chat_history=lambda x: _format_chat_history(x['chat_history'])
    ) | llm_chain_cq ,
)
_context = {
    # "chat_history": RunnableMap(
    # chat_history=RunnablePassthrough.assign(
    #     chat_history=lambda x: _format_chat_history(x['chat_history']))
    # ),
    "context": lambda x: x["standalone_question"]['text'] | retriever | _combine_documents,
    "question": lambda x: x["standalone_question"]['text']
}
conversational_qa_chain = _inputs | _context | llm_chain_a

In [135]:
_inputs.invoke({
    "question": "What was Gibbs injury?",
    "chat_history": [],
})

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'standalone_question': {'question': 'What was Gibbs injury?',
  'chat_history': '',
  'text': " What was Gibbs' injury?"}}

In [137]:
conversational_qa_chain.invoke({
    "question": "What was Gibbs injury?",
    "chat_history": [],
})

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


TypeError: Expected a Runnable, callable or dict.Instead got an unsupported type: <class 'str'>

In [119]:
_inputs.invoke({
    "question": "What did i just ask?",
    "chat_history": [("What was Gibbs injury?", "Gibbs suffered a hamstring injury.")],
})

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'standalone_question': ' "What was Gibbs\' injury?"'}

In [120]:
conversational_qa_chain.invoke({
    "question": "Can you repeat my previous question?",
    "chat_history": [("What was Gibbs injury?", "Gibbs suffered a hamstring injury.")],
})

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': 'Gibbs, who did not practice this week, also missed last week\'s 42-24 victory over the Carolina Panthers with the same injury. With 179 yards and a touchdown with an additional 70 receiving yards in his four appearances, he is Detroit\'s second-leading rusher.\n\nALLEN PARK, Mich. -- Detroit Lions running back Jahmyr Gibbs has been ruled out for Sunday\'s game against the Tampa Bay Buccaneers with a hamstring injury while Pro Bowl receiver Amon-Ra St. Brown (abdomen) is set to make his return.\n\nAlthough expectations are high for Gibbs as the Lions\' No. 12 overall selection, running back coach Scottie Montgomery is helping him develop at his own pace.\n\n"It\'s football, things happen, we certainly work our bodies pretty hard so I\'m feeling a lot better today," LaPorta said.',
 'question': ' "What was Gibbs\' injury?"',
 'text': 'Gibbs suffered a hamstring injury.'}