# Tech Borås AI Lab 2

Testa kör kommandona nedan:

In [1]:
# This should display information about the GPU attached to the notebook
!nvidia-smi

Sat Sep  6 14:36:15 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.76.04              Driver Version: 580.97         CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4070 Ti     On  |   00000000:01:00.0  On |                  N/A |
|  0%   35C    P8             16W /  285W |    1416MiB /  12282MiB |      7%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

In [2]:
!uv pip list | grep setfit
# This should display setfit and the version of the packaged that is installed (1.1.3)

setfit                   1.1.3


## SetFit approach

In [None]:
from datetime import datetime
from datasets import load_dataset
from setfit import SetFitModel, Trainer, TrainingArguments, sample_dataset, SetFitModelCardData
import pandas as pd
from sklearn.metrics import classification_report

pd.set_option('display.max_rows', None)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Load data
train_dataset = load_dataset("SetFit/amazon_massive_intent_sv-SE", split="train")
val_dataset = load_dataset("SetFit/amazon_massive_intent_sv-SE", split="validation")
test_dataset = load_dataset("SetFit/amazon_massive_intent_sv-SE", split="test")

# Select task for embedding model by prefixing with "clustering"
# https://huggingface.co/nomic-ai/nomic-embed-text-v1.5#usage
model_name = "nomic-ai/modernbert-embed-base" 
model_task_prefix = "clustering: " # if changing model you might need to remove/switch text prefix

train_dataset = train_dataset.map(lambda x: {"text": model_task_prefix + x["text"]})
val_dataset = val_dataset.map(lambda x: {"text": model_task_prefix + x["text"]})
test_dataset = test_dataset.map(lambda x: {"text": model_task_prefix + x["text"]})

print("Training " + str(train_dataset))
print("Validation " + str(val_dataset))
print("Evaluation " + str(test_dataset))

Training Dataset({
    features: ['id', 'label', 'text', 'label_text'],
    num_rows: 11514
})
Validation Dataset({
    features: ['id', 'label', 'text', 'label_text'],
    num_rows: 2033
})
Evaluation Dataset({
    features: ['id', 'label', 'text', 'label_text'],
    num_rows: 2974
})


## Print statistics about the labels in the dataset

In [5]:
# Create lookup dictionaries to go from 1 (integer) -> 'iot_hue_lightchange' (string), and the other way around
labels = set(zip(train_dataset["label_text"], train_dataset["label"]))
id2label = {label_id: text for text, label_id in labels}
label2id = {text: label_id for text, label_id in labels}

# Collect statistics
train_df = train_dataset.to_pandas()

label_id_counts = train_df['label'].value_counts()
label_id_percentages = train_df['label'].value_counts(normalize=True)

summary_df = pd.DataFrame({
    'Count': label_id_counts,
    'Percentage': label_id_percentages
})

summary_df['Label text'] = summary_df.index.map(id2label)

summary_df = summary_df[['Label text', 'Count', 'Percentage']].sort_index()

summary_df['Percentage'] = summary_df['Percentage'].map('{:.2%}'.format)
summary_df = summary_df.sort_values(by='Count', ascending=False)

total_count = summary_df['Count'].sum()
summary_df.loc['Total'] = ['Total', total_count, '100.00%']

display(summary_df)

Unnamed: 0_level_0,Label text,Count,Percentage
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
50,calendar_set,810,7.03%
45,play_music,639,5.55%
13,weather_query,573,4.98%
32,calendar_query,566,4.92%
12,general_quirky,555,4.82%
49,qa_factoid,544,4.72%
22,news_query,503,4.37%
44,email_query,418,3.63%
33,email_sendemail,354,3.07%
0,datetime_query,350,3.04%


In [None]:
num_samples = 1 # Number of samples per class
epochs = 1 # Number of times each constative pair is shown to the model
batch_size = 32 # Number of samples loaded into memory at once (affects resource usage)

# To simulate a real-world use-case where we dont have access to a huge annotated dataset,
# we sample from the 11_000 rows of the amazon_massive_intent dataset
train_sample  = sample_dataset(
    train_dataset,
    label_column="label",
    seed=42,
    num_samples=num_samples
)

# Load a base SentanceTransformer model with a default classification head
model = SetFitModel.from_pretrained(
    model_name,
    labels=id2label,
    model_card_data=SetFitModelCardData(
        language="sv",
        license="apache-2.0",
        dataset_id="SetFit/amazon_massive_intent_sv-SE",
        dataset_name="Amazon Massive Intent SWE",
    ),
)

timestamp = datetime.now().strftime("%y%m%d_%H%M")
run_name = f"{timestamp}_setfit-{model_name.split('/')[-1]}_ns{num_samples}_ep{epochs}"

# Setup the training
args = TrainingArguments(
    output_dir=f"./results/{run_name}",
    logging_dir=f"./results/{run_name}/logs",
    report_to=["tensorboard"],
    run_name=run_name,
    logging_steps=50, # how often to print logs
    seed=42,
    save_total_limit=2, # only store latest 2 checkpoints
    sampling_strategy="oversampling", # "undersampling" "unique"

    num_epochs=epochs,
    batch_size=batch_size,
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_sample,
    eval_dataset=val_dataset,
    metric='accuracy',
)

model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference.
Map: 100%|██████████| 60/60 [00:00<00:00, 9602.71 examples/s]


In [7]:
# Train the model
trainer.train()

***** Running training *****
  Num unique pairs = 3540
  Batch size = 32


  Num epochs = 1


Step,Training Loss
1,0.264
50,0.0512
100,0.0025


## Evaluation results

In [8]:
eval_results = trainer.evaluate(test_dataset)
print("\nFinal evaluation results:", eval_results)

***** Running evaluation *****



Final evaluation results: {'accuracy': 0.18056489576328177}


In [9]:
y_pred_text = model.predict(test_dataset['text'])
y_pred_ids = [label2id[text] for text in y_pred_text]
y_true_ids = test_dataset["label"]
all_label_ids = sorted(id2label.keys())

target_names = [id2label[i] for i in all_label_ids]
report_dict = classification_report(
    y_true_ids,
    y_pred_ids,
    labels=all_label_ids,
    target_names=target_names,
    digits=3,
    output_dict=True
)

report_df = pd.DataFrame(report_dict).transpose()
report_df['support'] = report_df['support'].astype(int)

display(report_df)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Unnamed: 0,precision,recall,f1-score,support
datetime_query,0.25,0.363636,0.296296,88
iot_hue_lightchange,0.071429,0.111111,0.086957,36
transport_ticket,0.175,0.4,0.243478,35
takeaway_query,0.25641,0.285714,0.27027,35
qa_stock,0.264706,0.346154,0.3,26
general_greet,0.021739,1.0,0.042553,1
recommendation_events,0.0,0.0,0.0,43
music_dislikeness,0.024793,0.75,0.048,4
iot_wemo_off,0.142857,0.166667,0.153846,18
cooking_recipe,0.192308,0.069444,0.102041,72


## Run predictions

In [10]:
user_intents = [
    "Släck lampan",
    "God morgon", 
    "spela vikingarna", 
    "vilken dag är det nyår i år", 
    "dra ett skämt", 
    "starta dammsugaren", 
    "ge mig nyheter om stadsministern",
    "hitta ett glutenfritt bageri nära mig",
    "kan man äta rå falukorv"
]

all_predictions = model.predict(user_intents)
for intent, predicted_label in zip(user_intents, all_predictions):    
    
    print(f"Input: '{intent}'  =>  Predicted Label: '{predicted_label}'")

Input: 'Släck lampan'  =>  Predicted Label: 'transport_traffic'
Input: 'God morgon'  =>  Predicted Label: 'alarm_remove'
Input: 'spela vikingarna'  =>  Predicted Label: 'email_sendemail'
Input: 'vilken dag är det nyår i år'  =>  Predicted Label: 'general_greet'
Input: 'dra ett skämt'  =>  Predicted Label: 'qa_definition'
Input: 'starta dammsugaren'  =>  Predicted Label: 'iot_cleaning'
Input: 'ge mig nyheter om stadsministern'  =>  Predicted Label: 'datetime_convert'
Input: 'hitta ett glutenfritt bageri nära mig'  =>  Predicted Label: 'transport_query'
Input: 'kan man äta rå falukorv'  =>  Predicted Label: 'calendar_remove'
