Connect to T4 GPU!!! Takuto was here

Notebooks: most code was copied from the first notebook, except the first cell for import which was copied from the second notebook (since the first notebook caused errors)

1. Finetuning Llama 3.1: https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.1_(8B)-Alpaca.ipynb

2. GRPO (R1 reasoning) with Llama 3.1: https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.1_(8B)-GRPO.ipynb#scrollTo=GPgXROedCcqP


Useful links:
- https://docs.unsloth.ai/basics/continued-pretraining
- https://docs.unsloth.ai/get-started/fine-tuning-guide

In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    !pip install --no-deps unsloth

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
from unsloth import FastLanguageModel
import torch
import os
import numpy as np
import pandas as pd
import copy
from tqdm import tqdm

In [None]:
max_seq_length = 8192 # Choose any! We auto support RoPE Scaling internally!

# model_name = "unsloth/Meta-Llama-3.1-8B" # default

model_name = "./model" # loading a pretrained saved model


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = None,  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
    load_in_4bit = True,    # Use 4bit quantization to reduce memory usage. Can be False.
)

In [None]:
prompt = """
### Instruction:
Below is a math problem, please solve it as best you can step by step.

### Input:
{}

### Response:
{}
"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_func_gsm8k(examples):
    question = examples['question']
    answer = examples['answer']
    texts = []

    for q, a in zip(question, answer):
        text = prompt.format(q, a) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

def formatting_func_MATH(examples):
    question = examples['problem']
    level = examples['level']
    answer = examples['solution']
    texts = []

    for q, a in zip(question, answer):
        text = prompt.format(q, a) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }


from datasets import load_dataset
# gsm8k = load_dataset("openai/gsm8k", "main")
# gsm8k_train = gsm8k['train']
# gsm8k_test = gsm8k['test']
# gsm8k_train = gsm8k_train.map(formatting_func_gsm8k, batched=True)
# gsm8k_test = gsm8k_test.map(formatting_func_gsm8k, batched=True)

MATH = load_dataset("nlile/hendrycks-MATH-benchmark")
MATH_train = MATH['train']
MATH_test = MATH['test']
MATH_train = MATH_train.map(formatting_func_MATH, batched=True)
MATH_test = MATH_test.map(formatting_func_MATH, batched=True)

# dataset = load_dataset("openai/gsm8k", "main", split="train")
# dataset = dataset.map(formatting_func_gsm8k, batched=True)


In [None]:
save_dir='wherever you want to save the outputs'

from google.colab import files
save_name = "gsm8k_0"

In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

# sample = gsm8k_test[int(np.random.randint(0, gsm8k_test.num_rows))]
rows=[]

for i in tqdm(range(len(MATH_test)), desc="Working", unit="iter", ncols=80):
  sample = MATH_test[i]
  inputs = tokenizer(
  [
      prompt.format(
          # f"{sample['question']}", # instruction
          f"{sample['problem']}", # instruction
          "", # output - leave this blank for generation!
      )
  ], return_tensors = "pt").to("cuda")

  outputs = model.generate(**inputs, max_new_tokens = 1024, use_cache = True)
  new_row= copy.deepcopy(MATH_test[i])
  new_row['model_response']= tokenizer.batch_decode(outputs)[0]
  rows.append(new_row)
  try:
    if (i+1)%100==0:
        df = pd.DataFrame(rows)
        df.to_csv(f'{save_name}_{i}.csv')
        # files.download(f'{save_name}_{i}.csv')
  except Exception as e:
    print(e)


In [None]:
##just to be sure

df = pd.DataFrame(rows)
df.to_csv(f'{save_name}_final.csv')
files.download(f'{save_name}_final.csv')