In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
thedevastator_grade_school_math_8k_q_a_path = kagglehub.dataset_download('thedevastator/grade-school-math-8k-q-a')
mistral_ai_mistral_pytorch_7b_instruct_v0_1_hf_1_path = kagglehub.model_download('mistral-ai/mistral/PyTorch/7b-instruct-v0.1-hf/1')

print('Data source import complete.')


In [None]:
!pip install -q -U torch --index-url https://download.pytorch.org/whl/cu117

In [None]:
!pip install -q -U transformers=="4.38.2"
!pip install -q accelerate
!pip install -q -i https://pypi.org/simple/ bitsandbytes
!pip install -q -U datasets

In [None]:
!pip install -q -U trl==0.8.5
!pip install -q -U git+https://github.com/huggingface/peft

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
# import torch_xla
# import torch_xla.core.xla_model as xm

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm

import torch
import torch.nn as nn

import transformers
from transformers import (AutoModelForCausalLM,
                          AutoTokenizer,
                          BitsAndBytesConfig,
                          TrainingArguments,
                          pipeline,
                          logging)
from datasets import Dataset
from peft import LoraConfig, PeftConfig
import bitsandbytes as bnb
from trl import SFTTrainer

from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)
from sklearn.model_selection import train_test_split

In [None]:
model_name = "/kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1"

compute_dtype = getattr(torch, "float16")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,
)

In [None]:
model_name = "/kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1"

max_seq_length = 2048
tokenizer = AutoTokenizer.from_pretrained(model_name, max_seq_length=max_seq_length)
EOS_TOKEN = tokenizer.eos_token

In [None]:
filename = "/kaggle/input/grade-school-math-8k-q-a/main_train.csv"

df = pd.read_csv(filename)
df

In [None]:
trainfilename = "/kaggle/input/grade-school-math-8k-q-a/main_train.csv"
testfilename = "/kaggle/input/grade-school-math-8k-q-a/main_test.csv"

traindf = pd.read_csv(trainfilename)
# traindf = traindf.drop(['title'],axis=1)

traindf, evaldf = train_test_split(df, test_size=0.2, random_state=42)

testdf = pd.read_csv(testfilename)
# testdf = testdf.drop(['title'],axis=1)

#selecting the firsst 100 rows only
# testdf = testdf[:50]
# evaldf = evaldf[:3]
# traindf = traindf[:3]

In [None]:
def generate_prompt_gsm8k(data_point):
    return f"""{data_point['question']} [SEP] {data_point['answer']}
            """.strip() + EOS_TOKEN

def generate_prompt_test_gsm8k(data_point):
    return f"""
            Instruction: Give a very short numeric solution with in 30 words or less.

            Question: Joy can read 8 pages of a book in 20 minutes. How many hours will it take her to read 120 pages?
            Solution: In one hour, there are 3 sets of 20 minutes. So, Joy can read 8 x 3 = <<8*3=24>>24 pages in an hour. It will take her 120/24 = <<120/24=5>>5 hours to read 120 pages. #### 5

            Question: {data_point['question']}.
            Solution:
            """.strip()

import re
ANS_RE = re.compile(r"#### (\-?[0-9\.\,]+)")
INVALID_ANS = "[invalid]"
def extract_the_answer(data_point):
#     print(data_point)
    match = ANS_RE.search(data_point['answer'])
    if match:
        match_str = match.group(1).strip()
        match_str = match_str.replace(",", "")
        return match_str
    else:
        return INVALID_ANS
X_train = pd.DataFrame(traindf.apply(generate_prompt_gsm8k, axis=1), columns=["question"])
X_eval = pd.DataFrame(evaldf.apply(generate_prompt_test_gsm8k, axis=1), columns=["question"])

y_true = pd.DataFrame(testdf.apply(extract_the_answer, axis=1), columns=["answer"])
X_test = pd.DataFrame(testdf.apply(generate_prompt_test_gsm8k, axis=1), columns=["question"])

train_data = Dataset.from_pandas(X_train)
eval_data = Dataset.from_pandas(X_eval)

In [None]:
train_data[0]

In [None]:
X_train = pd.DataFrame(traindf.apply(generate_prompt_gsm8k, axis=1), columns=["question",])

In [None]:
def evaluate(y_true, y_pred):
    labels = [True, False, None]
    mapping = {True: 1, False: 0, None: 2}
    def map_func(x):
        return x

    y_true = np.vectorize(map_func)(y_true)
    y_pred = np.vectorize(map_func)(y_pred)

    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')

In [None]:
SUBSTITUTIONS = [
    ('an ', ''), ('a ', ''), ('.$', '$'), ('\\$', ''), (r'\ ', ''), ('\%', '%'),
    (' ', ''), ('mbox', 'text'), (',\\text{and}', ','),
    ('\\text{and}', ','), ('\\text{m}', '\\text{}')
]
REMOVED_EXPRESSIONS = [
    'square', 'ways', 'integers', 'dollars', 'mph', 'inches', 'ft',
    'hours', 'km', 'units', '\\ldots', 'sue', 'points', 'feet',
    'minutes', 'digits', 'cents', 'degrees', 'cm', 'gm', 'pounds',
    'meters', 'meals', 'edges', 'students', 'childrentickets', 'multiples',
    '\\text{s}', '\\text{.}', '\\text{\ns}', '\\text{}^2',
    '\\text{}^3', '\\text{\n}', '\\text{}', r'\mathrm{th}',
    r'^\circ', r'^{\circ}', r'\;', r',\!', '{,}', '"', '\\dots'
]

def normalize_final_answer(final_answer: str) -> str:
    """Normalize a final answer to a quantitative reasoning question."""
    final_answer = final_answer.split('=')[-1]

    for before, after in SUBSTITUTIONS:
        final_answer = final_answer.replace(before, after)
    for expr in REMOVED_EXPRESSIONS:
        final_answer = final_answer.replace(expr, '')

    final_answer = re.sub(r'(.*?)(\$)(.*?)(\$)(.*)', '$\\3$', final_answer)
    final_answer = re.sub(r'(\\text\{)(.*?)(\})', '\\2', final_answer)
    final_answer = re.sub(r'(\\textbf\{)(.*?)(\})', '\\2', final_answer)
    final_answer = re.sub(r'(\\overline\{)(.*?)(\})', '\\2', final_answer)
    final_answer = re.sub(r'(\\boxed\{)(.*)(\})', '\\2', final_answer)

    final_answer = re.sub(
        r'(frac)([^{])(.)', 'frac{\\2}{\\3}', final_answer)
    final_answer = re.sub(
        r'(sqrt)([^{])', 'sqrt{\\2}', final_answer)
    final_answer = final_answer.replace('$', '')

    final_answer = final_answer.replace(',', '')

    return final_answer

In [None]:
y_true_predict = y_true["answer"].tolist()
def predict(X_test, model, tokenizer, y_true):
    y_pred = []
    for i in tqdm(range(len(X_test))):
        prompt = X_test.iloc[i]["question"]
        input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(**input_ids, max_new_tokens=100, temperature=0.0)
        result = tokenizer.decode(outputs[0])
        answer = result.split("Solution:")[-1]
        answer = normalize_final_answer(answer)
        pattern = re.compile(fr"[^0-9]*{y_true_predict[i]}[^0-9]+")
        match = re.search(pattern, answer)
        print(pattern)
        print(answer)
        if match:
            y_pred.append(y_true_predict[i])
            print("matched!")
        else:
            y_pred.append(int(y_true_predict[i])+1)
    return y_pred

In [None]:
y_pred = predict(X_test, model, tokenizer, y_true)

In [None]:
print("fine-tuned on 6 row ")
evaluate(y_true, y_pred)

In [None]:
train_data

In [None]:
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj",],
)

training_arguments = TrainingArguments(
    output_dir="logs",
    num_train_epochs=5,
    gradient_checkpointing=True,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    optim="paged_adamw_32bit",
    save_steps=0,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=False,
    evaluation_strategy='steps',
    eval_steps = 112,
    eval_accumulation_steps=1,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,
    dataset_text_field="question",
    tokenizer=tokenizer,
    max_seq_length=max_seq_length,
    args=training_arguments,
    packing=False,
)

In [None]:
# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained("trained-model-12")

Afterwards, loading the TensorBoard extension and start TensorBoard, pointing to the logs/runs directory, which is assumed to contain the training logs and checkpoints for your model, will allow you to understand how the models fits during the training.

In [None]:
%load_ext tensorboard
%tensorboard --logdir logs/runs

The following code will first predict the sentiment labels for the test set using the predict() function. Then, it will evaluate the model's performance on the test set using the evaluate() function. The result now should be impressive with an overall accuracy of over 0.8 and high accuracy, precision and recall for the single sentiment labels. The prediction of the neutral label can still be improved, yet it is impressive how much could be done with little data and some fine-tuning.

In [None]:
y_pred = predict(X_test, model, tokenizer)
evaluate(y_true, y_pred)

In [None]:
evaluation = pd.DataFrame({'question': X_test["question"],
                           'y_true':y_true,
                           'y_pred': y_pred},
                         )
evaluation.to_csv("test_predictions.csv", index=False)