In [None]:
!pip install transformers datasets torch scikit-learn evaluate accelerate -q

print("âœ… Libraries Installed.")


In [None]:
!pip uninstall -y transformers tokenizers optimum onnxruntime

!pip install --upgrade pip -q
!pip install transformers optimum[onnxruntime] datasets torch onnx -q

print("âœ… Installation complete.")

In [None]:
import pandas as pd
from datasets import Dataset, DatasetDict
import os

!wget -q https://www.cs.ucsb.edu/~william/data/liar_dataset.zip
!unzip -o -q liar_dataset.zip
col_names = ['id', 'label_text', 'statement', 'subjects', 'speaker', 'job', 'state', 'party',
             'barely_true_cts', 'false_cts', 'half_true_cts', 'mostly_true_cts', 'pants_fire_cts', 'context']
print("Reading files...")
df_train = pd.read_csv('train.tsv', sep='\t', header=None, names=col_names, on_bad_lines='skip', quoting=3)
df_val = pd.read_csv('valid.tsv', sep='\t', header=None, names=col_names, on_bad_lines='skip', quoting=3)

label_map = {
    'true': 1,
    'mostly-true': 1,
    'half-true': 1,
    'barely-true': 0,
    'false': 0,
    'pants-fire': 0
}

df_train['label'] = df_train['label_text'].map(label_map)
df_val['label'] = df_val['label_text'].map(label_map)

df_train = df_train[['statement', 'label']].dropna()
df_val = df_val[['statement', 'label']].dropna()

dataset = DatasetDict({
    'train': Dataset.from_pandas(df_train),
    'validation': Dataset.from_pandas(df_val)
})

print(" Data Loaded Manually.")
print(f"Training examples: {len(dataset['train'])}")
print(f"Validation examples: {len(dataset['validation'])}")
print("Example:", dataset['train'][0])

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding
import numpy as np
import evaluate

model_name = "distilbert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    return tokenizer(examples["statement"], padding="max_length", truncation=True, max_length=128)

print("Tokenizing data")
tokenized_datasets = dataset.map(tokenize_function, batched=True)

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="no",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)


print("Training started.")
trainer.train()

In [None]:
import torch
import time
import os

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f" Force-moving model to {device.upper()} to fix error...")
model.to(device)

print("Phase 1: Calculating Accuracy")
eval_results = trainer.evaluate()
accuracy = eval_results["eval_accuracy"]

print("Phase 2: Measuring Speed")
model_cpu = model.to("cpu")
dummy_input = "The unemployment rate has dropped significantly this year."

inputs = tokenizer(dummy_input, return_tensors="pt").to("cpu")

for _ in range(10):
    with torch.no_grad():
        _ = model_cpu(**inputs)

start_time = time.time()
for _ in range(50):
    with torch.no_grad():
        _ = model_cpu(**inputs)
end_time = time.time()

avg_latency = (end_time - start_time) / 50 * 1000

torch.save(model.state_dict(), "baseline_model.pt")
size_mb = os.path.getsize("baseline_model.pt") / (1024 * 1024)

print("\n" + "="*40)
print(f"BASELINE RESULTS")
print(f"Model Size:   {size_mb:.2f} MB  (Target: <50 MB)")
print(f"Latency (CPU):{avg_latency:.2f} ms  (Target: <50 ms)")
print(f"Accuracy:     {accuracy*100:.2f}%")
print("="*40)

In [None]:
import torch.quantization
import os
import time
import numpy as np

print("Starting Compression")

model_cpu = model.to("cpu")

quantized_model = torch.quantization.quantize_dynamic(
    model_cpu,
    {torch.nn.Linear},
    dtype=torch.qint8
)

torch.save(quantized_model.state_dict(), "quantized_model.pt")
q_size_mb = os.path.getsize("quantized_model.pt") / (1024 * 1024)

dummy_input = "The unemployment rate has dropped significantly this year."
inputs = tokenizer(dummy_input, return_tensors="pt").to("cpu")

for _ in range(10):
    _ = quantized_model(**inputs)

start_time = time.time()
for _ in range(100):
    _ = quantized_model(**inputs)
end_time = time.time()

q_avg_latency = (end_time - start_time) / 100 * 1000

print("Accuracy on Validation Set")
quantized_model.eval()

correct = 0
total = 0
val_dataset = tokenized_datasets["validation"]

for i in range(len(val_dataset)):
    with torch.no_grad():

        input_ids = torch.tensor([val_dataset[i]["input_ids"]])
        attention_mask = torch.tensor([val_dataset[i]["attention_mask"]])
        label = val_dataset[i]["label"]


        outputs = quantized_model(input_ids, attention_mask=attention_mask)
        prediction = torch.argmax(outputs.logits, dim=1).item()

        if prediction == label:
            correct += 1
        total += 1

        if i % 200 == 0:
            print(f"Checked {i}/{len(val_dataset)}...")

q_accuracy = correct / total

print("\n" + "="*40)
print(f"COMPRESSION RESULTS")
print(f"Old Size: {size_mb:.2f} MB  ->  ðŸ†• New Size: {q_size_mb:.2f} MB")
print(f"Old Speed: {avg_latency:.2f} ms ->  ðŸ†• New Speed: {q_avg_latency:.2f} ms")
print(f" Old Acc:  {accuracy*100:.2f}%   ->  ðŸ†• New Acc:   {q_accuracy*100:.2f}%")
print(f"COMPRESSION RATIO: {size_mb/q_size_mb:.2f}x smaller")
print("="*40)

In [None]:
import os
import shutil
from optimum.onnxruntime import ORTModelForSequenceClassification, ORTQuantizer
from optimum.onnxruntime.configuration import AutoQuantizationConfig

save_directory = "./veritas_trained_temp"
if os.path.exists(save_directory):
    shutil.rmtree(save_directory)

trainer.model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)

model_onnx = ORTModelForSequenceClassification.from_pretrained(
    save_directory,
    export=True
)
model_onnx.save_pretrained("onnx_model_raw")

quantizer = ORTQuantizer.from_pretrained(model_onnx)
qconfig = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=True)

quantizer.quantize(
    save_dir="onnx_quantized",
    quantization_config=qconfig,
)

onnx_file = "onnx_quantized/model_quantized.onnx"
final_size = os.path.getsize(onnx_file) / (1024 * 1024)

print("\n" + "="*40)
print(f"SQUEEZE COMPLETE")
print(f"Final Model Size: {final_size:.2f} MB")
print(f"Target Status: {'SUCCESS (Under 100MB)' if final_size < 100 else 'FAILED'}")
print("="*40)

In [None]:
import matplotlib.pyplot as plt

sizes = [255.45, 132.29, final_size]
size_labels = ['Original FP32', 'PyTorch Quant', 'Veritas ONNX']
size_colors = ['#ff9999', '#ffcc99', '#99ff99']

plt.style.use('seaborn-v0_8-whitegrid')
plt.figure(figsize=(10, 6))
bars = plt.bar(size_labels, sizes, color=size_colors, edgecolor='black')

for bar in bars:
    plt.text(bar.get_x() + bar.get_width()/2., bar.get_height() + 3,
             f'{bar.get_height():.1f} MB', ha='center', fontweight='bold')

plt.axhline(y=100, color='r', linestyle='--', alpha=0.6)
plt.text(2.1, 105, 'Browser Limit (100MB)', color='red', fontweight='bold')
plt.title('Figure 1: Storage Footprint Reduction', fontsize=14, fontweight='bold')
plt.ylabel('Size (MB)')
plt.tight_layout()
plt.savefig('Figure1.png', dpi=300)
plt.show()

In [None]:
import torch
import time
import numpy as np
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers import AutoTokenizer

final_model_path = "onnx_quantized"
model = ORTModelForSequenceClassification.from_pretrained(final_model_path, file_name="model_quantized.onnx")
tokenizer = AutoTokenizer.from_pretrained("./veritas_trained_temp")

test_claims = [
    "The unemployment rate reached a 10-year low today.",
    "Scientists found a hidden city on Mars inhabited by giants.",
    "The cost of living has increased in major urban centers."
]

print(f"\n{'CLAIM':<60} | {'PREDICTION':<12} | {'CONF.'} | {'TIME'}")
print("-" * 95)

for claim in test_claims:

    inputs = tokenizer(claim, return_tensors="pt", padding=True, truncation=True)

    start = time.time()
    with torch.no_grad():
        outputs = model(**inputs)
    latency = (time.time() - start) * 1000

    raw_logits = outputs.logits

    if isinstance(raw_logits, np.ndarray):
        logits = torch.from_numpy(raw_logits)
    else:
        logits = raw_logits

    probs = torch.softmax(logits, dim=1)
    conf, pred = torch.max(probs, dim=1)

    label = "RELIABLE" if pred.item() == 1 else "UNRELIABLE"

    print(f"{claim[:58]:<60} | {label:<12} | {conf.item():.1%} | {latency:.1f}ms")
