In [1]:
from datasets import load_from_disk

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
model_name_distilbert = "distilbert/distilbert-base-uncased"
model_name_qwen = "Qwen/Qwen2.5-0.5B-Instruct"

In [3]:
lable_names = [
	"IN_Bank",
	"IN_School",
	"US_Bank",
	"US_School"
]

In [4]:
ds = load_from_disk("./data/mail_dataset_labeled")
ds["test"].features

{'text': Value('string'),
 'label': ClassLabel(names=['IN_Bank', 'IN_School', 'US_Bank', 'US_School'])}

In [5]:
ds["test"][:2]

{'text': ['Investment update Your portfolio summary is ready Hi JENNIFER, Your investments gained 2.3% this month. Review your performance and rebalancing recommendations. Portfolio value $127,845.92 as of May 30, 2025',
  "School magazine poetry submission from Ishika Saxena (Class 7, Admission #9802) - Hindi kavita titled 'Vidyalaya Ki Yaadein' for annual publication."],
 'label': [2, 1]}

In [6]:
mail_summaries = [
	ds["text"] for ds in ds["test"]
]


# Distilbert

In [7]:
from transformers import pipeline

In [8]:
model_distilbert = pipeline("zero-shot-classification", model=model_name_distilbert, device_map="cpu")

Device set to use cpu
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


In [9]:
results = model_distilbert(mail_summaries[0], candidate_labels=lable_names)
print(results)

{'sequence': 'Investment update Your portfolio summary is ready Hi JENNIFER, Your investments gained 2.3% this month. Review your performance and rebalancing recommendations. Portfolio value $127,845.92 as of May 30, 2025', 'labels': ['US_School', 'US_Bank', 'IN_Bank', 'IN_School'], 'scores': [0.3764089047908783, 0.27470993995666504, 0.20803505182266235, 0.1408461183309555]}


In [10]:
Error_counter = 0
for summary in ds["test"]:
	result = model_distilbert(summary["text"], candidate_labels=lable_names)
	largest_score = max(result["scores"])
	largest_label = result["labels"][result["scores"].index(largest_score)]
	actual_label = lable_names[summary["label"]]
	if largest_label != actual_label:
		Error_counter += 1
print(f"Total messages: {len(ds['test'])}")
print(f"Total Error: {Error_counter}")
print("-"*50)
print(f"Accuracy: {((Error_counter / len(ds['test']))*100)}%")

Total messages: 43
Total Error: 20
--------------------------------------------------
Accuracy: 46.51162790697674%


# Qwen

In [11]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

In [12]:
model_qwen = pipeline("text-generation", model=model_name_qwen, device_map="cpu")

Device set to use cpu


In [13]:
message_system = """
Classify the email summary into one of the following categories: "IN_Bank", "IN_School", "US_Bank", "US_School"
"""

In [14]:
def generate_chat_messages(dataset):
	messages = [
		[
			{"role": "system", "content": message_system},
			{"role": "user", "content": f"Classify: {ds['text']}"}
		] for ds in dataset
	]
	return messages

In [15]:
chat_messages = generate_chat_messages(ds["test"])
len(chat_messages)

43

In [16]:
Error_counter = 0
for index, summary in enumerate(chat_messages):
	response = model_qwen(summary, max_new_tokens=100,)
	generated_label = response[0]["generated_text"][-1]["content"]
	actual_label = lable_names[ds["test"][index]["label"]]
	if generated_label != actual_label:
		Error_counter += 1
print(f"Total messages: {len(ds['test'])}")
print(f"Total Error: {Error_counter}")
print("-"*50)
print(f"Accuracy: {((Error_counter / len(ds['test']))*100)}%")

Total messages: 43
Total Error: 25
--------------------------------------------------
Accuracy: 58.139534883720934%
