# Install Packages

In [None]:
# %%capture
# !pip install pip3-autoremove
# !pip-autoremove torch torchvision torchaudio -y
# !pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121
# !pip install unsloth

# Import Libraries

In [None]:
import re
import torch
import pandas as pd
from datasets import load_dataset
from trl import SFTTrainer
from tqdm.auto import tqdm
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported, FastLanguageModel
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seabor as sns

  from .autonotebook import tqdm as notebook_tqdm

Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import is_bfloat16_supported, FastLanguageModel


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
classes_list = ["Algebra", "Geometry and Trigonometry", "Calculus and Analysis",
                "Probability and Statistics", "Number Theory", "Combinatorics and Discrete Math",
                "Linear Algebra", "Abstract Algebra and Topology"]
n_classes = len(classes_list)

train_csv_file = './data/train.csv'

In [None]:
df = pd.read_csv(train_csv_file)
df = df.rename(columns={'Question': 'text'})

train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

In [None]:
# Подсчитать количество изображений в каждом классе для обучающего набора данных
train_class_counts = np.zeros(n_classes)
for idx, row in train_df.iterrows():
    label = row['label']
    train_class_counts[label] += 1
    
print(f"Classes conts on train: {train_class_counts}")
print(f"Class diff counts: {np.max(train_class_counts) - train_class_counts}")

# Подсчитать количество изображений в каждом классе для валидационного набора данных
val_class_counts = np.zeros(n_classes)
for idx, row in val_df.iterrows():
    label = row['label']
    val_class_counts[label] += 1

# посчитаем веса для каждого класса
class_weights = (sum(train_class_counts.tolist()) / (n_classes * train_class_counts)).tolist()
print(f"Class weights: {class_weights}")
class_weights = torch.tensor(class_weights)

# Создать barplot
fig, ax = plt.subplots(1, 2, figsize=(14, 6))

# Barplot для обучающего набора данных
sns.barplot(x=classes_list, y=train_class_counts, ax=ax[0])
ax[0].set_title('Train Dataset')
ax[0].set_xlabel('Class')
ax[0].set_ylabel('Number of samples')
ax[0].tick_params(axis='x', rotation=90)

# Barplot для валидационного набора данных
sns.barplot(x=classes_list, y=val_class_counts, ax=ax[1])
ax[1].set_title('Validation Dataset')
ax[1].set_xlabel('Class')
ax[1].set_ylabel('Number of samples')
ax[1].tick_params(axis='x', rotation=90)

plt.tight_layout()
plt.show()

# Load LLAMA 3.2 1B Model and Tokenizer

In [None]:
from unsloth import FastLanguageModel

max_seq_length = 2048
dtype = None # None for auto detection.
load_in_4bit = True # 4bit quantization to reduce memory usage. 

model, tokenizer = FastLanguageModel.from_pretrained(
    # model_name = "unsloth/Llama-3.2-1B-bnb-4bit",
    # model_name = "unsloth/Meta-Llama-3.1-8B-bnb-4bit", #! CUDA out of memory
    model_name = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    # model_name = "unsloth/Qwen3-8B-unsloth-bnb-4bit",
    # model_name = "unsloth/Qwen3-0.6B-unsloth-bnb-4bit",
    # model_name = "unsloth/Qwen3-1.7B-unsloth-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    #token = "" HF_Token for gated models
)

==((====))==  Unsloth 2025.4.7: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 4070 Laptop GPU. Num GPUs = 1. Max memory: 7.747 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Fetching 2 files: 100%|██████████| 2/2 [29:11<00:00, 875.52s/it] 
Loading checkpoint shards: 100%|██████████| 2/2 [00:09<00:00,  4.85s/it]


deepseek-ai/deepseek-math-7b-base does not have a padding token! Will use pad_token = <|PAD_TOKEN|>.


# Add LoRA Adapters

In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

Unsloth 2025.4.7 patched 30 layers with 30 QKV layers, 30 O layers and 30 MLP layers.


# Load Original Training Data and Merge with New Data

In [4]:
label_map = {0: "Algebra",
            1: "Geometry and Trigonometry",
            2: "Calculus and Analysis",
            3: "Probability and Statistics",
            4: "Number Theory",
            5: "Combinatorics and Discrete Math",
            6: "Linear Algebra",
            7: "Abstract Algebra and Topology"}


train = pd.read_csv("./data/train.csv")
train["instruction"] = "Classify this math problem into one of these eight topics: Algebra, Geometry and Trigonometry, Calculus and Analysis, Probability and Statistics, Number Theory, Combinatorics and Discrete Math, Linear Algebra, Abstract Algebra and Topology."
train["label"] = train["label"].map(label_map)
train = train.rename(columns={"label": "output", "Question": "input"})
train.to_csv("train_updated.csv", index=False)


dataset = load_dataset("csv", data_files="train_updated.csv", split="train")

Generating train split: 10189 examples [00:00, 250652.58 examples/s]


# Prepare Data

In [5]:
prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        text = prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

dataset = dataset.map(formatting_prompts_func, batched = True,)

Map: 100%|██████████| 10189/10189 [00:00<00:00, 172810.79 examples/s]


In [6]:
dataset, dataset[0]

(Dataset({
     features: ['input', 'output', 'instruction', 'text'],
     num_rows: 10189
 }),
 {'input': "A solitaire game is played as follows.  Six distinct pairs of matched tiles are placed in a bag.  The player randomly draws tiles one at a time from the bag and retains them, except that matching tiles are put aside as soon as they appear in the player's hand.  The game ends if the player ever holds three tiles, no two of which match; otherwise the drawing continues until the bag is empty.  The probability that the bag will be emptied is $p/q,\\,$ where $p\\,$ and $q\\,$ are relatively prime positive integers.  Find $p+q.\\,$ ",
  'output': 'Probability and Statistics',
  'instruction': 'Classify this math problem into one of these eight topics: Algebra, Geometry and Trigonometry, Calculus and Analysis, Probability and Statistics, Number Theory, Combinatorics and Discrete Math, Linear Algebra, Abstract Algebra and Topology.',
  'text': "Below is an instruction that describes a ta

In [7]:
print(dataset[0]["text"])

Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Classify this math problem into one of these eight topics: Algebra, Geometry and Trigonometry, Calculus and Analysis, Probability and Statistics, Number Theory, Combinatorics and Discrete Math, Linear Algebra, Abstract Algebra and Topology.

### Input:
A solitaire game is played as follows.  Six distinct pairs of matched tiles are placed in a bag.  The player randomly draws tiles one at a time from the bag and retains them, except that matching tiles are put aside as soon as they appear in the player's hand.  The game ends if the player ever holds three tiles, no two of which match; otherwise the drawing continues until the bag is empty.  The probability that the bag will be emptied is $p/q,\,$ where $p\,$ and $q\,$ are relatively prime positive integers.  Find $p+q.\,$ 

### Response:
Probability and Statistics<

# Setup Trainer

In [8]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 8,
        warmup_steps = 5,
        max_steps = 642,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none"
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2): 100%|██████████| 10189/10189 [00:02<00:00, 3549.70 examples/s]


# Show current memory stats

In [9]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA GeForce RTX 4070 Laptop GPU. Max memory = 7.747 GB.
5.0 GB of memory reserved.


# Start Training

In [10]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 10,189 | Num Epochs = 3 | Total steps = 642
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 8 x 1) = 32
 "-____-"     Trainable parameters = 37,478,400/7,000,000,000 (0.54% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,1.4776
2,1.5953
3,1.5599
4,1.5498
5,1.6165
6,1.3856
7,1.263
8,1.1693
9,1.0667
10,1.0484


In [None]:
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(
    f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

# Save Model (Just LoRA Adapters) and Tokenzer

In [11]:
model.save_pretrained("lora_model")
tokenizer.save_pretrained("lora_model")

('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/tokenizer.json')

# Load the Saved Model and Tokenizer

In [2]:
max_seq_length = 2048
dtype = None # None for auto detection.
load_in_4bit = True # 4bit quantization to reduce memory usage. 

# if False:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "lora_model",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit
)

==((====))==  Unsloth 2025.4.7: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 4070 Laptop GPU. Num GPUs = 1. Max memory: 7.747 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards: 100%|██████████| 2/2 [00:11<00:00,  5.60s/it]
Unsloth 2025.4.7 patched 30 layers with 30 QKV layers, 30 O layers and 30 MLP layers.


# Make Predictions on Comptetion Test Set

In [3]:
public_set = pd.read_csv("./data/test.csv")
public_set

Unnamed: 0,id,Question
0,0,b'Solve 0 = -i - 91*i - 1598*i - 64220 for i.\n'
1,1,Galperin G.A.\n\nA natural number $N$ is 999.....
2,2,Example 7 Calculate $\frac{1}{2 \sqrt{1}+\sqrt...
3,3,"If $A$, $B$, and $C$ represent three distinct ..."
4,4,2. Calculate $1+12+123+1234+12345+123456+12345...
...,...,...
3039,3039,"Find the greatest possible value of $pq + r$, ..."
3040,3040,"4. Given that $a, b, c$ are the lengths of the..."
3041,3041,"3.18. Find the eccentricity, coordinates of th..."
3042,3042,Find the least positive integer $k$ for which ...


In [4]:
FastLanguageModel.for_inference(model)

prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

public_set["instruction"] = "Classify this math problem into one of these eight topics: Algebra, Geometry and Trigonometry, Calculus and Analysis, Probability and Statistics, Number Theory, Combinatorics and Discrete Math, Linear Algebra, Abstract Algebra and Topology."
public_set.rename(columns = {"Question": "input"}, inplace=True)

raw_outputs = []
for i in tqdm(range(len(public_set))):
  inputs = tokenizer(
  [
      prompt.format(
          public_set.iloc[0]["instruction"], 
          public_set.iloc[i]["input"], 
          "",
      )
  ], return_tensors = "pt", truncation = True, max_length = 2048).to("cuda")

  outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
  raw_outputs.append(tokenizer.batch_decode(outputs))

100%|██████████| 3044/3044 [13:37<00:00,  3.72it/s]


In [11]:
def parse_output(output):
    # re_match = re.search(r'### Response:\n(.*?)<\|end▁of▁sentence\|>', output, re.DOTALL)
    re_match = re.search(r'### Response:\n(.*?)<｜end▁of▁sentence｜>', output, re.DOTALL)
    if re_match:
        response = re_match.group(1).strip()
        return response
    else:
        return ''

In [12]:
public_set["raw_outputs"] = [raw_output[0] for raw_output in raw_outputs]
print(public_set["raw_outputs"].iloc[1])

<｜begin▁of▁sentence｜>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Classify this math problem into one of these eight topics: Algebra, Geometry and Trigonometry, Calculus and Analysis, Probability and Statistics, Number Theory, Combinatorics and Discrete Math, Linear Algebra, Abstract Algebra and Topology.

### Input:
Galperin G.A.

A natural number $N$ is 999... 99 ( $k$ nines) times greater than the sum of its digits. Specify all possible values of $k$ and for each of them, provide an example of such a number.

### Response:
Number Theory<｜end▁of▁sentence｜>


In [13]:
print(parse_output(public_set["raw_outputs"].iloc[1]))

Number Theory


In [14]:
public_set["parsed_outputs"] = public_set["raw_outputs"].apply(parse_output)
public_set

Unnamed: 0,id,input,instruction,raw_outputs,parsed_outputs
0,0,b'Solve 0 = -i - 91*i - 1598*i - 64220 for i.\n',Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Algebra
1,1,Galperin G.A.\n\nA natural number $N$ is 999.....,Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Number Theory
2,2,Example 7 Calculate $\frac{1}{2 \sqrt{1}+\sqrt...,Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Algebra
3,3,"If $A$, $B$, and $C$ represent three distinct ...",Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Number Theory
4,4,2. Calculate $1+12+123+1234+12345+123456+12345...,Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Combinatorics and Discrete Math
...,...,...,...,...,...
3039,3039,"Find the greatest possible value of $pq + r$, ...",Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Number Theory
3040,3040,"4. Given that $a, b, c$ are the lengths of the...",Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Geometry and Trigonometry
3041,3041,"3.18. Find the eccentricity, coordinates of th...",Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Geometry and Trigonometry
3042,3042,Find the least positive integer $k$ for which ...,Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Number Theory


In [15]:
label_map = {0: "Algebra",
            1: "Geometry and Trigonometry",
            2: "Calculus and Analysis",
            3: "Probability and Statistics",
            4: "Number Theory",
            5: "Combinatorics and Discrete Math",
            6: "Linear Algebra",
            7: "Abstract Algebra and Topology"}

In [16]:
label2id = {v:k for k,v in label_map.items()}
label2id

{'Algebra': 0,
 'Geometry and Trigonometry': 1,
 'Calculus and Analysis': 2,
 'Probability and Statistics': 3,
 'Number Theory': 4,
 'Combinatorics and Discrete Math': 5,
 'Linear Algebra': 6,
 'Abstract Algebra and Topology': 7}

In [17]:
public_set["label"] = public_set["parsed_outputs"].map(label2id)
public_set

Unnamed: 0,id,input,instruction,raw_outputs,parsed_outputs,label
0,0,b'Solve 0 = -i - 91*i - 1598*i - 64220 for i.\n',Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Algebra,0.0
1,1,Galperin G.A.\n\nA natural number $N$ is 999.....,Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Number Theory,4.0
2,2,Example 7 Calculate $\frac{1}{2 \sqrt{1}+\sqrt...,Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Algebra,0.0
3,3,"If $A$, $B$, and $C$ represent three distinct ...",Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Number Theory,4.0
4,4,2. Calculate $1+12+123+1234+12345+123456+12345...,Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Combinatorics and Discrete Math,5.0
...,...,...,...,...,...,...
3039,3039,"Find the greatest possible value of $pq + r$, ...",Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Number Theory,4.0
3040,3040,"4. Given that $a, b, c$ are the lengths of the...",Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Geometry and Trigonometry,1.0
3041,3041,"3.18. Find the eccentricity, coordinates of th...",Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Geometry and Trigonometry,1.0
3042,3042,Find the least positive integer $k$ for which ...,Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Number Theory,4.0


# Make Submission to the Competition

In [18]:
public_set["label"] = public_set["label"].fillna(0).astype(int)
public_set.rename(columns = {"input": "Question"})
public_set

Unnamed: 0,id,input,instruction,raw_outputs,parsed_outputs,label
0,0,b'Solve 0 = -i - 91*i - 1598*i - 64220 for i.\n',Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Algebra,0
1,1,Galperin G.A.\n\nA natural number $N$ is 999.....,Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Number Theory,4
2,2,Example 7 Calculate $\frac{1}{2 \sqrt{1}+\sqrt...,Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Algebra,0
3,3,"If $A$, $B$, and $C$ represent three distinct ...",Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Number Theory,4
4,4,2. Calculate $1+12+123+1234+12345+123456+12345...,Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Combinatorics and Discrete Math,5
...,...,...,...,...,...,...
3039,3039,"Find the greatest possible value of $pq + r$, ...",Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Number Theory,4
3040,3040,"4. Given that $a, b, c$ are the lengths of the...",Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Geometry and Trigonometry,1
3041,3041,"3.18. Find the eccentricity, coordinates of th...",Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Geometry and Trigonometry,1
3042,3042,Find the least positive integer $k$ for which ...,Classify this math problem into one of these e...,<｜begin▁of▁sentence｜>Below is an instruction t...,Number Theory,4


In [19]:
public_set.rename(columns = {"input": "Question"}, inplace=True)
public_set[["id", "label"]].to_csv("submission.csv", index=False)
pd.read_csv("submission.csv")

Unnamed: 0,id,label
0,0,0
1,1,4
2,2,0
3,3,4
4,4,5
...,...,...
3039,3039,4
3040,3040,1
3041,3041,1
3042,3042,4
