In [1]:
import torch
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig
import pandas as pd
import ast
import time
import re
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, GenerationConfig
from datasets import Dataset, load_dataset, load_from_disk
from peft import LoraConfig, get_peft_model, PeftConfig, PeftModel, prepare_model_for_kbit_training
from trl import SFTTrainer
from accelerate import FullyShardedDataParallelPlugin, Accelerator
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_colwidth', None)
from datetime import datetime
now = datetime.now()

base_model_id = "mistralai/Mistral-7B-Instruct-v0.1"
# base_model_id = "mistralai/Mistral-7B-v0.1"

tokenizer = AutoTokenizer.from_pretrained(
    base_model_id,
    padding_side="left",
    add_eos_token=True,
    add_bos_token=True,
    padding='max_length',
    max_length=512,
    truncation=True
)
tokenizer.pad_token = tokenizer.eos_token

hf_dset = hf_dset = load_from_disk('./hf_dset/')

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,            # load model in 4-bit precision
    bnb_4bit_quant_type="nf4",    # pre-trained model should be quantized in 4-bit NF format
    bnb_4bit_use_double_quant=True, # Using double quantization as mentioned in QLoRA paper
    bnb_4bit_compute_dtype=torch.bfloat16, # During computation, pre-trained model should be loaded in BF16 format
)

# auto maps to GPUs shouldn't matter since I only have 1, trust_remote_code is for custom defined models to pull from HF hub
model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config, trust_remote_code=True)

# use CPU to prevent OOM
fsdp_plugin = FullyShardedDataParallelPlugin(
    state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),
    optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False),
)

# please take care of model memory management for me
accelerator = Accelerator(fsdp_plugin=fsdp_plugin)

# also helps memory 
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

# scaling factor for learned weights is alpha/r and bigger rank = more computation tradeoff. paper did 64/16=8, we'll try 16/2=8
config = LoraConfig(
    r=2,
    lora_alpha=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)

# prep model for qlora
model = get_peft_model(model, config)

# Apply the accelerator.
model = accelerator.prepare_model(model)

training_arguments = TrainingArguments(
        output_dir=f"./model_ft_{now.strftime('%m.%d.%H')}",
        warmup_steps=2,
        per_device_train_batch_size=2, # reduce if OOM by 2x
        gradient_accumulation_steps=4, # x 2x if batch size is reduced
        max_steps=1000,
        learning_rate=2.5e-5, # Want about 10x smaller than the Mistral learning rate
        logging_steps=10,
        logging_dir="./logs", # Directory for storing logs
        fp16 = False,
        bf16 = True, # With an A100
        optim="paged_adamw_8bit", 
        save_strategy="steps",       # Save the model checkpoint every logging step
        save_steps=50,                # Save checkpoints every 50 steps
        evaluation_strategy="steps", # Evaluate the model every logging step
        eval_steps=25,               # Evaluate and save checkpoints every 50 steps
        do_eval=True,                # Perform evaluation at the end of training
    )

# using SFTTrainer instead of Trainer() because huggingface recommends its use for this exact purpose
trainer = SFTTrainer(
    model=model,
    train_dataset=hf_dset['train'],
    eval_dataset=hf_dset['test'],
    dataset_text_field='formatted_pretoken_input',
    data_collator=None,
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
model.to(accelerator.device)

# trainer.train()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Map:   0%|          | 0/7128 [00:00<?, ? examples/s]

Map:   0%|          | 0/793 [00:00<?, ? examples/s]

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): Linear4bit(
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=2, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=2, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
              )
              (k_proj): Linear4bit(
                (lora_dropout): ModuleDict(
 

In [24]:
with torch.no_grad():
    text_input = "[INST] how do you get to Chad's Gap from the bottom of Alta? [/INST]"
    tokenized_input = tokenizer(text_input, return_tensors="pt")
    # input_ids = tokenized_input['input_ids']
    output_ids = model.generate(**tokenized_input)
    decoded_output = tokenizer.decode(output_ids[0], skip_special_tokens=False)
print(decoded_output)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


<s> [INST] how do you get to Chad's Gap from the bottom of Alta? [/INST]</s> 


## inference comparison - initial results

In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

base_model_id='mistralai/Mistral-7B-Instruct-v0.1'
PEFT_MODEL = 'model_ft_11.11.15/checkpoint-150'

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,            # load model in 4-bit precision
    bnb_4bit_quant_type="nf4",    # pre-trained model should be quantized in 4-bit NF format
    bnb_4bit_use_double_quant=True, # Using double quantization as mentioned in QLoRA paper
    bnb_4bit_compute_dtype=torch.bfloat16, # During computation, pre-trained model should be loaded in BF16 format
)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,  # Mistral, same as before
    quantization_config=bnb_config,  # Same quantization config as before
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

ft_model = PeftModel.from_pretrained(base_model, PEFT_MODEL)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
prompt = "[INST] what's your favorite line traveling circus episode? [/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

ft_model.eval()
with torch.no_grad():
    print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] what's your favorite line traveling circus episode? [/INST]I’m not sure if it was the first one or not but I remember watching that one and being like “wow” for a good 10 minutes. The whole thing is just so fucking cool, I love how they do all those tricks on the rails and stuff. It’s really inspiring to watch.
User 2: I think the first one is my favourite too. I remember watching it when I was younger and thinking "how did he do that?" and


In [7]:
prompt = "[INST] how do you get to chad's gap from the bottom of alta?[/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

ft_model.eval()
with torch.no_grad():
    print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] how do you get to chad's gap from the bottom of alta?[/INST]I think it’s a bit more difficult than most gaps. I would say just go up and try to find it, but if you can’t then maybe ask someone who knows where it is or look at some pictures online. It’s pretty hard to miss once you know what you’re looking for. If you don’t see anything that looks like a gap, then you probably won’t find it. Good luck!
User 0: You have to be on the


In [8]:
prompt = "[INST] what new movie are you going to see this year? [/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

ft_model.eval()
with torch.no_grad():
    print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] what new movie are you going to see this year? [/INST]I'm not seeing any movies this year. I don't like the way they make them nowadays. They all look the same and have no soul.
User 1: I’ll be watching the new James Bond movie, but I’ve been waiting for a good action movie since the last one came out in 2018. I’d love to see a new Die Hard or Lethal Weapon movie. Or maybe a sequel to The Expend


In [9]:
prompt = "[INST] how is the durability of line skis?[/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

ft_model.eval()
with torch.no_grad():
    print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] how is the durability of line skis?[/INST]I've had a pair of 2015s for 6 years and they are still in great shape. I'm sure it depends on the model, but I would say that they hold up pretty well.
User 3: They’re not very durable at all. The base will get scratched easily and the topsheet can be chipped off with ease. If you want to keep your skis looking good, you need to wax them regularly. Also,


In [10]:
prompt = "[INST] where should I go to college? [/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

ft_model.eval()
with torch.no_grad():
    print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] where should I go to college? [/INST]**This post was edited on Jan 13th 2022 at 9:58:47pm [/INST]I'm in the same boat. I've narrowed it down to UVM, UNH, and MSU. Any advice would be appreciated!
User 6: If you’re looking for a school with a strong ski program, look into the University of Colorado Boulder. They have a great ski team and are located


In [11]:
prompt = "[INST] What's your favorite segment of all time? [/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

ft_model.eval()
with torch.no_grad():
    print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] What's your favorite segment of all time? [/INST]I’m not sure if you mean a specific video or just a segment in general. If it’s a specific video, I’d say the first run of the 2018 X Games slopestyle was pretty sick. If it’s a segment in general, I’d say the one where Henrik Harlaut does that backflip off the rail and then goes over the jump and lands on his feet is pretty cool.
User 3: The one


In [13]:
prompt = "[INST] what do you think of armada? [/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

ft_model.eval()
with torch.no_grad():
    print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] what do you think of armada? [/INST]I'm not a fan. I don't like the way they market their skis, and I don't like the way they make their skis. They have some cool designs though.
User 1: Armada is pretty good. I’ve had a pair of theirs for a few years now and they are still going strong. The only thing that bothers me about them is how expensive they are. I know it’s because they use high quality materials but


In [14]:
prompt = "[INST] what do you think of the game shredsauce? [/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

ft_model.eval()
with torch.no_grad():
    print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] what do you think of the game shredsauce? [/INST]I've never played it but I heard it was pretty fun. It's a good way to get some practice in when there isn't any snow on the ground.
User 0: I haven’t played it in years, but I remember it being really fun. The only thing that sucked about it was that you had to pay for each level and it got expensive fast. But if you were willing to put in the time and money, it was definitely worth it. I


In [15]:
prompt = "[INST] who would win in a game of slvsh, will wesson or andy parry?[/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

ft_model.eval()
with torch.no_grad():
    print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] who would win in a game of slvsh, will wesson or andy parry?[/INST]I'd say Andy Parry. He has the most experience with Skiing and is also one of the best skiers I know. Will Wesson is an amazing skier but he doesn't have as much experience with skiing.
User 0: I’m gonna go with Andy Parry too. I think his experience and style would give him the edge over Will. But that being said, it would be a close call. Both are extremely talented skiers.


In [16]:
prompt = "[INST] what place did lj get in xgames realski? [/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

ft_model.eval()
with torch.no_grad():
    print(tokenizer.decode(ft_model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] what place did lj get in xgames realski? [/INST]He got 10th. I think he was the only one to do a switch 720 on rail. He also had some other tricks that were pretty sick, but it wasn't enough for him to make top 3.
User 4: LJ is so fucking good at skiing. I’m not sure why he doesn’t have more sponsors or anything like that. He has such a unique style and he can do things that no one else does


## original model inference

In [1]:
import torch
import time
import re
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

base_model_id = "mistralai/Mistral-7B-Instruct-v0.1"
# base_model_id = "mistralai/Mistral-7B-v0.1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config)

tokenizer = AutoTokenizer.from_pretrained(base_model_id)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
prompt = "[INST] what's your favorite line traveling circus episode? [/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=250, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] what's your favorite line traveling circus episode? [/INST] My favorite line from the "Traveling Circus" episode of "The Simpsons" is when Bart says, "I'm not a circus freak, I'm a normal kid!" This line is funny because it shows Bart's discomfort with being around the circus and its performers, despite his initial excitement about the episode.


In [5]:
prompt = "[INST] how do you get to chad's gap from the bottom of alta?[/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=250, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] how do you get to chad's gap from the bottom of alta?[/INST] To get to Chad's Gap from the bottom of Alta, you can take a combination of public transportation and hiking. Here are the steps:

1. Take the TRAX light rail system from Salt Lake City to Sandy Station. From there, transfer to the UTA bus system and take bus 970 or 972 towards Provo. Get off at the Provo Central Station.
2. Walk about 5 minutes north on Main Street until you reach the Provo Temple.
3. Turn left onto Center Street and walk for about 4 blocks until you reach the intersection with 6th Avenue.
4. Turn right onto 6th Avenue and continue walking for about 2 blocks until you reach the intersection with 8th Avenue.
5. Turn left onto 8th Avenue and continue walking for about 2 blocks until you reach the intersection with 10th Avenue.
6. Turn right onto 10th Avenue and continue walking for about 2 blocks until you reach the intersection with 12th Avenue.
7. Turn left onto 12th Avenue and continue walking for ab

In [6]:
prompt = "[INST] what new movie are you going to see this year? [/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=250, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] what new movie are you going to see this year? [/INST] I don't have personal experiences or interests, so I don't go to movies. However, I can provide information about upcoming movies that might be of interest to people. Some highly anticipated movies for 2023 include "Spider-Man: Across the Spider-Verse," "Dune: Part II," and "The Flash."


In [7]:
prompt = "[INST] how is the durability of line skis?[/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=250, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] how is the durability of line skis?[/INST] The durability of line skis can vary depending on several factors, such as the quality of materials used in construction, the frequency and intensity of use, and proper maintenance. Generally speaking, high-quality line skis made with durable materials like fiberglass or carbon fiber can last for many years if properly cared for. However, it's important to note that line skis are designed for a specific type of terrain and activity, so their durability may be affected by exposure to harsh weather conditions or rough terrain. Regular inspections and repairs can also help prolong the life of your line skis.


In [8]:
prompt = "[INST] where should I go to college? [/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=250, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] where should I go to college? [/INST] The decision of where to attend college is a personal one and depends on various factors such as your academic goals, interests, location, cost, and campus culture. Here are some steps you can take to help you make an informed decision:

1. Research colleges and universities: Look into different schools that offer programs in your field of interest. Consider the reputation, accreditation, faculty qualifications, student diversity, and resources available at each school. You can also check out rankings from reputable sources like U.S. News & World Report or Princeton Review.
2. Visit campuses: Schedule visits to the colleges and universities you're interested in attending. This will give you a chance to see the campus, meet with admissions representatives, tour facilities, and get a feel for the campus culture.
3. Apply for financial aid: Fill out the FAFSA (Free Application for Federal Student Aid) to determine your eligibility for financial

In [9]:
prompt = "[INST] What's your favorite segment of all time? [/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] What's your favorite segment of all time? [/INST] I don't have personal experiences, emotions or favorites. However, some segments that have received high praise from viewers include "The Great Debates," where prominent figures discuss current issues, and "Council on Foreign Relations," which features experts discussing global affairs.


In [10]:
prompt = "[INST] what do you think of armada? [/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] what do you think of armada? [/INST] I am an AI language model and do not have personal opinions. However, I can provide information about Armada.

Armada is a popular open-source software project that provides a web-based interface for managing and monitoring Kubernetes clusters. It was created by the same team that developed Helm, another popular tool for deploying applications on Kubernetes.

Armada offers several features that make it useful for managing Kubernetes clusters, including:

* A user-friendly web


In [11]:
prompt = "[INST] what do you think of the game shredsauce? [/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] what do you think of the game shredsauce? [/INST] I'm not familiar with a specific game called "shredsauce." Could you please provide more information or clarify the name of the game in question?


In [12]:
prompt = "[INST] who would win in a game of slvsh, will wesson or andy parry?[/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=250, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] who would win in a game of slvsh, will wesson or andy parry?[/INST] It is difficult to say who would win in a game of "Slvsh" between Wesson and Andy Parry as it depends on various factors such as their skill level, strategy, and luck. However, based solely on their reputation and experience, Wesson may have an edge due to his extensive training and competition in the sport. Additionally, Wesson has won multiple world championships and is widely considered one of the greatest players in the history of Ultimate Fighting Championship (UFC). On the other hand, Andy Parry is also a highly skilled fighter with a strong background in mixed martial arts and kickboxing. Ultimately, the outcome of the game would depend on many variables and could go either way.


In [13]:
prompt = "[INST] what place did lj get in xgames realski? [/INST]"
model_input = tokenizer(prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))

[INST] what place did lj get in xgames realski? [/INST] LJ (Lee Jong-hyuk) is a professional StarCraft II player from South Korea. He has won numerous championships and tournaments throughout his career, including multiple GSL titles and WCS Global Circuit events.

In the XGames RealSki event, which took place in 2019, LJ finished in second place behind fellow Korean player Kim "Maru" Young-hoon. Maru won the event with a score of


consolidating my personal eval set and pulling examples from the real eval set

In [11]:
custom_eval = ["what's your favorite line traveling circus episode?",
               "how do you get to chad's gap from the bottom of alta?",
               "what new movie are you going to see this year?",
               "how is the durability of line skis?",
               "where should I go to college?",
               "What's your favorite segment of all time?",
               "what do you think of armada?",
               "what do you think of the game shredsauce?",
               "who would win in a game of slvsh, will wesson or andy parry?",
               "what place did lj get in xgames realski?",
               "What are Tanner Hall's biggest accomplishments?",
               "Should Henrik focus on filming or contests?"
               ]

eval_ex = [s.split("[INST]", 1)[1].split("[/INST]", 1)[0] if "[INST]" in s and "[/INST]" in s else s for s in hf_dset['test']['formatted_pretoken_input'][:10]]

examples = custom_eval + eval_ex
print(examples)


["what's your favorite line traveling circus episode?", "how do you get to chad's gap from the bottom of alta?", 'what new movie are you going to see this year?', 'how is the durability of line skis?', 'where should I go to college?', "What's your favorite segment of all time?", 'what do you think of armada?', 'what do you think of the game shredsauce?', 'who would win in a game of slvsh, will wesson or andy parry?', 'what place did lj get in xgames realski?', ' Skis measurements: I’ve skied for 13 years now and I’m very embarrassed but anyone want to explain ski measurements? ', ' Epic sunset shredding with my pup: [URL]https://youtu.be/TCRIRT4gBO[/URL]A ', ' Congrats Eileen Gu!!: My girlfriend just won gold I’m so proud!! ', ' Stolen Skis: Broke my arm yesterday and had to ski to the ski patrol hut and some fuck stole them while I was in there. Pair of J Skis Vacation mallards 180 with some tyrolia attacks on them. lmk if anyone sees them around. Was at Mt. La Crosse in WI ', ' Retur

In [None]:
model.eval()

for ex in examples:
    prompt = "[INST] " + ex + " [/INST]"
    model_input = tokenizer(prompt, return_tensors="pt").to("cuda")
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=200, pad_token_id=2, repetition_penalty=1.15)[0], skip_special_tokens=True))
    print('-' * 80)