### Imports

In [1]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer
from transformers import GenerationConfig
import transformers

import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

2023-10-14 15:43:18.251805: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-10-14 15:43:19.466295: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Load and test out mistral-7b

In [2]:
model_name='mistralai/Mistral-7B-Instruct-v0.1'

model_config = transformers.AutoConfig.from_pretrained(
    model_name,
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

In [4]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

Your GPU supports bfloat16: accelerate training with bf16=True


In [5]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 262410240
all model parameters: 3752071168
percentage of trainable model parameters: 6.99%


In [35]:
device = 'cuda'

messages = [
    {"role": "user", "content": "What can you tell me about the 2021 fantasy football season?"},
    {"role": "assistant", "content": """The 2021 fantasy football season began on August 17th, 2021, with the first games of the NFL Preseason. The regular season started on September 11th, 2021 and ended on January 9th, 2022. The Super Bowl LVI, which was the 56th Super Bowl, was held on February 6th, 2022, at SoFi Stadium in Los Angeles, California.

Due to the COVID-19 pandemic, the 2021 fantasy football season was played with some changes, including a reduced preseason schedule, the elimination of in-person attendance at games, and the use of virtual meetings for teams.

In terms of fantasy football leagues, many leagues opted to use a PPR (Point Pertinent) format, which rewards players for receiving passing yards in addition to rushing yards. The use of PPR has become increasingly popular in recent years, as it provides more opportunities for players to score points.

As for notable players in the 2021 fantasy football season, some of the top performers included:

* Quarterbacks: Patrick Mahomes of the Kansas City Chiefs, Justin Herbert of the Los Angeles Chargers, and Tom Brady of the Tampa Bay Buccaneers.
* Running Backs: Christian McCaffery of the Carolina Panthers, Alvin Kamara of the New Orleans Saints, and Dalvin Cook of the Minnesota Vikings.
* Wide Receivers: Tyreek Hill of the Kansas City Chiefs, Stefon Diggs of the Buffalo Bills, and DeAndre Hopkins of the Arizona Cardinals.
* Tight Ends: Travis Kelce of the Kansas City Chiefs, George Kittle of the San Francisco 49ers, and Tyler Higbee of the Los Angeles Rams.

In terms of team performance, the Tampa Bay Buccaneers won the Super Bowl LVI, defeating the Kansas City Chiefs 31-9. The Green Bay Packers finished the regular season with the best record (13-3), but were eliminated in the divisional round of the playoffs by the Los Angeles Rams. The Oakland Raiders, who moved to Las Vegas in 2020, had a strong season, finishing with a record of 10-6 and making it to the playoffs for the first time since 2016."""},
    {"role": "user", "content": "How many points did Maholmes score that season?"},
    {"role": "assistant", "content": "Patrick Mahomes, the quarterback for the Kansas City Chiefs, scored a total of 456 points in the regular season of the 2021 fantasy football season. This made him the top scoring quarterback in the PPR (Point Pertinent) format, which rewards players for receiving passing yards in addition to rushing yards."},
    {"role": "user", "content": "Can you tell me how many points he scored in week 3 of that season?"},
    {"role": "assistant", "content": "Patrick Mahomes scored 35 points in week 3 of the 2021 fantasy football season. This was a strong performance, as Mahomes completed 18 of 29 passes for 283 yards and three touchdowns in a win over the Indianapolis Colts."},
    {"role": "user", "content": "Did anyone score more than him in that week?"}
]

encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")

model_inputs = encodeds.to(device)
model

generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
print(decoded[0])

Using sep_token, but it is not set yet.
Using cls_token, but it is not set yet.
Using mask_token, but it is not set yet.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<s> [INST] What can you tell me about the 2021 fantasy football season? [/INST]The 2021 fantasy football season began on August 17th, 2021, with the first games of the NFL Preseason. The regular season started on September 11th, 2021 and ended on January 9th, 2022. The Super Bowl LVI, which was the 56th Super Bowl, was held on February 6th, 2022, at SoFi Stadium in Los Angeles, California.

Due to the COVID-19 pandemic, the 2021 fantasy football season was played with some changes, including a reduced preseason schedule, the elimination of in-person attendance at games, and the use of virtual meetings for teams.

In terms of fantasy football leagues, many leagues opted to use a PPR (Point Pertinent) format, which rewards players for receiving passing yards in addition to rushing yards. The use of PPR has become increasingly popular in recent years, as it provides more opportunities for players to score points.

As for notable players in the 2021 fantasy football season, some of the top

### RAG RAG RAG

In [36]:
pip install langchain openai faiss-gpu tiktoken

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting langchain
  Obtaining dependency information for langchain from https://files.pythonhosted.org/packages/5a/52/851a5e896ead3c22b04caab67b38805f9dbfa7b59e69b5d9dcd06a6a6c9b/langchain-0.0.314-py3-none-any.whl.metadata
  Downloading langchain-0.0.314-py3-none-any.whl.metadata (15 kB)
Collecting openai
  Obtaining dependency information for openai from https://files.pythonhosted.org/packages/1e/9f/385c25502f437686e4aa715969e5eaf5c2cb5e5ffa7c5cdd52f3c6ae967a/openai-0.28.1-py3-none-any.whl.metadata
  Downloading openai-0.28.1-py3-none-any.whl.metadata (11 kB)
Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting tiktoken
  Obtaining dependency information for tiktoken from https://files.pythonhosted.org/packages/f4/2e/0adf6e264b996e263b1c57cad6560ffd5492a69beb9fd7

In [41]:
!pip install sentence-transformers

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting sentence-transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting torchvision (from sentence-transformers)
  Obtaining dependency information for torchvision from https://files.pythonhosted.org/packages/84/eb/4f6483ae9094e164dc5b9b792e377f7d37823b0bedc3eef3193d416d2bb6/torchvision-0.16.0-cp310-cp310-manylinux1_x86_64.whl.metadata
  Downloading torchvision-0.16.0-cp310-cp310-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting nltk (from sentence-transformers)
  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m30.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting sentencepiece (from sentence-transformers)
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014

In [7]:
!playwright install

In [7]:
from operator import itemgetter

from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.chains import LLMChain
from langchain.llms import HuggingFacePipeline
from langchain.vectorstores import FAISS

Example ChatPromptTemplate

```python
from langchain.prompts import ChatPromptTemplate

template = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI bot. Your name is {name}."),
    ("human", "Hello, how are you doing?"),
    ("ai", "I'm doing well, thanks!"),
    ("human", "{user_input}"),
])

messages = template.format_messages(
    name="Bob",
    user_input="What is your name?"
)
```

In [8]:
text_generation_pipeline = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=1000,

)

In [9]:
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [10]:
prompt_template = """
### [INST] Instruction: Answer the question based on your fantasy football knowledge. Here is context to help:

{context}

### QUESTION:
{question} [/INST]
 """

In [11]:
# Create prompt from prompt template 
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create llm chain 
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)

In [12]:
input_text = {
    "context": "",
    "question": "How many fantasy football points did patrick maholmes score in the 2023 season?"
}

# Generate and print LLM response 
response = llm_chain(input_text)

print(response['text'])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Patrick Mahomes is a professional American football player who plays quarterback for the Kansas City Chiefs in the National Football League (NFL). In the 2023 season, he would have scored fantasy football points based on his performance in games throughout the year. However, as the 2023 season has not yet started and the final scores are unknown, I cannot provide an answer to this question at this time.


In [126]:
vectorstore = FAISS.from_texts(["In the 2023 session, patrick maholmes scored 2000 fantasy football points."], embedding=HuggingFaceEmbeddings())
retriever = vectorstore.as_retriever()

```python
HuggingFaceEmbeddings(
    *,
    client: Any = None,
    model_name: str = 'sentence-transformers/all-mpnet-base-v2',
    cache_folder: Optional[str] = None,
    model_kwargs: Dict[str, Any] = None,
    encode_kwargs: Dict[str, Any] = None,
    multi_process: bool = False,
) -> None
```

In [158]:
chain = ( 
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

In [128]:
chain.invoke("How many fantasy football points did patrick maholmes score in the 2023 season?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': [Document(page_content='In the 2023 session, patrick maholmes scored 2000 fantasy football points.')],
 'question': 'How many fantasy football points did patrick maholmes score in the 2023 season?',
 'text': ' Patrick Mahomes scored 2000 fantasy football points in the 2023 season.'}

In [169]:
!pip install unstructured

Collecting unstructured
  Obtaining dependency information for unstructured from https://files.pythonhosted.org/packages/86/26/440da052da58a71b895009eaff33564472633efb4f481011e01b75ec8037/unstructured-0.10.22-py3-none-any.whl.metadata
  Downloading unstructured-0.10.22-py3-none-any.whl.metadata (24 kB)
Collecting chardet (from unstructured)
  Obtaining dependency information for chardet from https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl.metadata
  Downloading chardet-5.2.0-py3-none-any.whl.metadata (3.4 kB)
Collecting filetype (from unstructured)
  Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Collecting python-magic (from unstructured)
  Downloading python_magic-0.4.27-py2.py3-none-any.whl (13 kB)
Collecting lxml (from unstructured)
  Obtaining dependency information for lxml from https://files.pythonhosted.org/packages/3c/d2/11533f0bc47ff4d828a20cfb702f3453fe714bd5b475fcdc8cec6e6b7dc

In [13]:
from langchain.document_loaders import AsyncChromiumLoader, UnstructuredURLLoader, PlaywrightURLLoader
from langchain.document_loaders.blob_loaders.schema import Blob
from langchain.document_loaders.parsers.txt import TextParser
from langchain.document_loaders.url_selenium import SeleniumURLLoader
from langchain.text_splitter import CharacterTextSplitter

In [14]:
test_document = """
ALLEN PARK, Mich. -- Detroit Lions running back Jahmyr Gibbs has been ruled out for Sunday's game against the Tampa Bay Buccaneers with a hamstring injury while Pro Bowl receiver Amon-Ra St. Brown (abdomen) is set to make his return.

Gibbs, who did not practice this week, also missed last week's 42-24 victory over the Carolina Panthers with the same injury. With 179 yards and a touchdown with an additional 70 receiving yards in his four appearances, he is Detroit's second-leading rusher.

Veteran running back David Montgomery has carried the load in his absence, with back-to-back games of at least 100 yards rushing. Detroit has rushed for nine touchdowns through the first five games of the season, tying a team record.

Although expectations are high for Gibbs as the Lions' No. 12 overall selection, running back coach Scottie Montgomery is helping him develop at his own pace.

"He's right where I thought he would be, he's right where he should be coming along at at this time," Scottie Montgomery said.

"We don't judge players based off of anything else besides achievement. Like there is success and there is expectations and I want people outside the building to put those expectations on him because, guess what, whether we believe it or not it motivates him. It does and that's a really, really good tool."

Meanwhile, rookie safety Brian Branch, who has an ankle injury, is also set to miss his second consecutive game. The second-round pick out of Alabama was held out of practice for the entire week. With 25 tackles, a pick-six and 4 passes defended, he is Detroit's fourth-leading tackler.

Rookie tight end Sam LaPorta is questionable with a calf strain that flared up Wednesday, forcing him to miss Thursday's practice. He said it likely stemmed from Sunday's win over Carolina, and he intends to play Sunday.

"It's football, things happen, we certainly work our bodies pretty hard so I'm feeling a lot better today," LaPorta said.
"""

In [15]:
loader = AsyncChromiumLoader(["https://www.espn.com/nfl/story/_/id/38652877/lions-rb-jahmyr-gibbs-wr-amon-ra-st-brown-return-vs-bucs"])
loader.load()

RuntimeError: asyncio.run() cannot be called from a running event loop

In [182]:
!playwright install

Downloading Chromium 117.0.5938.62 (playwright build v1080)[2m from https://playwright.azureedge.net/builds/chromium/1080/chromium-linux.zip[22m
Chromium 117.0.5938.62 (playwright build v1080) downloaded to /home/jupyter/.cache/ms-playwright/chromium-1080
Downloading FFMPEG playwright build v1009[2m from https://playwright.azureedge.net/builds/ffmpeg/1009/ffmpeg-linux.zip[22m
FFMPEG playwright build v1009 downloaded to /home/jupyter/.cache/ms-playwright/ffmpeg-1009
Downloading Firefox 117.0 (playwright build v1424)[2m from https://playwright.azureedge.net/builds/firefox/1424/firefox-debian-11.zip[22m
Firefox 117.0 (playwright build v1424) downloaded to /home/jupyter/.cache/ms-playwright/firefox-1424
Downloading Webkit 17.0 (playwright build v1908)[2m from https://playwright.azureedge.net/builds/webkit/1908/webkit-debian-11.zip[22m
Webkit 17.0 (playwright build v1908) downloaded to /home/jupyter/.cache/ms-playwright/webkit-1908


In [186]:

loader = PlaywrightURLLoader(["https://www.espn.com/nfl/story/_/id/38652877/lions-rb-jahmyr-gibbs-wr-amon-ra-st-brown-return-vs-bucs"])
loader.load()

Error: It looks like you are using Playwright Sync API inside the asyncio loop.
Please use the Async API instead.

In [154]:
raw_documents = TextParser().parse(Blob(data=test_document))

In [160]:
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
documents = text_splitter.split_documents(raw_documents)
db = FAISS.from_documents(documents, HuggingFaceEmbeddings())

Created a chunk of size 234, which is longer than the specified 100
Created a chunk of size 258, which is longer than the specified 100
Created a chunk of size 233, which is longer than the specified 100
Created a chunk of size 158, which is longer than the specified 100
Created a chunk of size 126, which is longer than the specified 100
Created a chunk of size 307, which is longer than the specified 100
Created a chunk of size 286, which is longer than the specified 100
Created a chunk of size 220, which is longer than the specified 100


In [161]:
query = "Did Gibbs start this week?"
docs = db.similarity_search(query)
print(docs[0].page_content)

Gibbs, who did not practice this week, also missed last week's 42-24 victory over the Carolina Panthers with the same injury. With 179 yards and a touchdown with an additional 70 receiving yards in his four appearances, he is Detroit's second-leading rusher.


In [162]:
retriever = db.as_retriever()

In [164]:
chain.invoke("Why didn't Gibbs practice this week?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


{'context': [Document(page_content="ALLEN PARK, Mich. -- Detroit Lions running back Jahmyr Gibbs has been ruled out for Sunday's game against the Tampa Bay Buccaneers with a hamstring injury while Pro Bowl receiver Amon-Ra St. Brown (abdomen) is set to make his return.\n\nGibbs, who did not practice this week, also missed last week's 42-24 victory over the Carolina Panthers with the same injury. With 179 yards and a touchdown with an additional 70 receiving yards in his four appearances, he is Detroit's second-leading rusher.\n\nVeteran running back David Montgomery has carried the load in his absence, with back-to-back games of at least 100 yards rushing. Detroit has rushed for nine touchdowns through the first five games of the season, tying a team record.\n\nAlthough expectations are high for Gibbs as the Lions' No. 12 overall selection, running back coach Scottie Montgomery is helping him develop at his own pace.", metadata={'source': None}),
  Document(page_content='"He\'s right w