# **Vietnamese News Sentiment Analysis with XLM-RoBERTa**
# This notebook fine-tunes XLM-RoBERTa for classifying sentiment of Vietnamese news summaries.
* Dataset: 12007 sample fix

In [1]:
# Install required packages
!pip install -q transformers datasets evaluate accelerate scikit-learn pandas matplotlib seaborn

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m86.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m72.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m37.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import os
import shutil
import zipfile
import pandas as pd
import numpy as np
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    EarlyStoppingCallback
)
from datasets import Dataset, DatasetDict
import evaluate
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score, accuracy_score
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import logging

2025-07-24 14:02:22.855632: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753365743.088732      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753365743.160835      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
# Configuration
class Config:
    MODEL_NAME = "xlm-roberta-base"
    SEED = 42
    BATCH_SIZE = 8  # Reduced to avoid GPU memory issues
    GRADIENT_ACCUMULATION_STEPS = 2  # Adjusted for effective batch size of 16
    LEARNING_RATE = 2e-5
    NUM_EPOCHS = 10
    MAX_LENGTH = 256
    WEIGHT_DECAY = 0.01
    OUTPUT_DIR = "./xlm-roberta-sentiment-complete"
    LOGGING_STEPS = 10  # Increased frequency for better monitoring
    SAVE_TOTAL_LIMIT = 2
    SENTIMENT_MAP = {'Negative': 0, 'Neutral': 1, 'Positive': 2}
    REVERSE_SENTIMENT_MAP = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
    EARLY_STOPPING_PATIENCE = 3
    LR_SCHEDULER_TYPE = "cosine"
    WARMUP_RATIO = 0.1
    USE_CLASS_WEIGHTS = True
    DATA_PATH = "/kaggle/input/data-news-sentiment-v3/data_news_sentiment_v3.xlsx"

config = Config()

# Create output directory
os.makedirs(config.OUTPUT_DIR, exist_ok=True)

In [4]:
# Set up logging with error handling
try:
    logging.basicConfig(
        filename=os.path.join(config.OUTPUT_DIR, 'training.log'),
        level=logging.DEBUG,  # Increased verbosity
        format='%(asctime)s - %(levelname)s - %(message)s'
    )
    logger = logging.getLogger(__name__)
    logger.info("Logging initialized successfully")
except PermissionError:
    logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger(__name__)
    logger.info("Fallback to console logging due to permission error for file: %s", os.path.join(config.OUTPUT_DIR, 'training.log'))
except Exception as e:
    logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger(__name__)
    logger.info("Logging setup failed with error: %s. Fallback to console logging.", str(e))

# Set random seed
torch.manual_seed(config.SEED)
np.random.seed(config.SEED)


In [5]:
# Custom Trainer with class weights
class WeightedTrainer(Trainer):
    def __init__(self, class_weights=None, **kwargs):
        super().__init__(**kwargs)
        self.class_weights = class_weights
        
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        
        labels = labels.long()
        
        if self.class_weights is not None:
            weights = torch.tensor(self.class_weights, device=logits.device, dtype=torch.float32)
            loss_fct = torch.nn.CrossEntropyLoss(weight=weights)
        else:
            loss_fct = torch.nn.CrossEntropyLoss()
            
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss

In [6]:
# Load and explore dataset
def load_and_explore_data(file_path):
    logger.info("Loading dataset...")
    df = pd.read_excel(file_path)
    
    df['label'] = df['sentiment'].map(config.SENTIMENT_MAP)
    
    with open(os.path.join(config.OUTPUT_DIR, 'dataset_info.txt'), 'w') as f:
        f.write(f"Total samples: {len(df)}\n")
        f.write("\nClass distribution:\n")
        f.write(df['sentiment'].value_counts().to_string())
    
    plt.figure(figsize=(8, 5))
    class_dist = df['sentiment'].value_counts()
    sns.barplot(x=class_dist.index, y=class_dist.values)
    plt.title('Class Distribution')
    plt.ylabel('Count')
    plt.savefig(os.path.join(config.OUTPUT_DIR, 'class_distribution.png'))
    plt.close()
    
    df['text_length'] = df['summary'].apply(lambda x: len(x.split()))
    
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    sns.histplot(df['text_length'], bins=30)
    plt.title('Text Length Distribution')
    
    plt.subplot(1, 2, 2)
    sns.boxplot(x='sentiment', y='text_length', data=df)
    plt.title('Text Length by Sentiment')
    plt.savefig(os.path.join(config.OUTPUT_DIR, 'text_length_distribution.png'))
    plt.close()
    
    return df

df = load_and_explore_data(config.DATA_PATH)
df = df.dropna()

if config.USE_CLASS_WEIGHTS:
    class_weights = compute_class_weight(
        'balanced', 
        classes=np.unique(df['label']),
        y=df['label']
    )
    config.CLASS_WEIGHTS = class_weights.tolist()
    logger.info(f"Class weights: {config.CLASS_WEIGHTS}")
else:
    config.CLASS_WEIGHTS = None

tokenizer = AutoTokenizer.from_pretrained(config.MODEL_NAME)

  with pd.option_context('mode.use_inf_as_na', True):


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

In [7]:
# Analyze token lengths
def analyze_token_lengths(texts, tokenizer, max_length):
    lengths = []
    for text in texts:
        tokens = tokenizer(text, truncation=True, max_length=max_length)["input_ids"]
        lengths.append(len(tokens))
    return lengths

token_lengths = analyze_token_lengths(df['summary'], tokenizer, config.MAX_LENGTH)

plt.figure(figsize=(10, 5))
sns.histplot(token_lengths, bins=30)
plt.title('Token Length Distribution')
plt.axvline(x=config.MAX_LENGTH, color='r', linestyle='--', label='Max Length')
plt.legend()
plt.savefig(os.path.join(config.OUTPUT_DIR, 'token_length_distribution.png'))
plt.close()

logger.info(f"Percentage of texts within max length: {sum(np.array(token_lengths) <= config.MAX_LENGTH) / len(token_lengths):.2%}")

  with pd.option_context('mode.use_inf_as_na', True):


In [8]:
# Preprocess function
def preprocess_function(examples):
    return tokenizer(
        examples["summary"],
        truncation=True,
        max_length=config.MAX_LENGTH,
        padding="max_length"
    )

# Split data
train_df, temp_df = train_test_split(
    df,
    test_size=0.2,
    random_state=config.SEED,
    stratify=df['label']
)
val_df, test_df = train_test_split(
    temp_df,
    test_size=0.5,
    random_state=config.SEED,
    stratify=temp_df['label']
)

train_dataset = Dataset.from_pandas(train_df[['summary', 'sentiment', 'label']])
val_dataset = Dataset.from_pandas(val_df[['summary', 'sentiment', 'label']])
test_dataset = Dataset.from_pandas(test_df[['summary', 'sentiment', 'label']])

dataset = DatasetDict({
    "train": train_dataset,
    "validation": val_dataset,
    "test": test_dataset
})

tokenized_datasets = dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=["summary", "sentiment"]
)

with open(os.path.join(config.OUTPUT_DIR, 'data_splits.txt'), 'w') as f:
    f.write(f"Train samples: {len(train_df)}\n")
    f.write(f"Validation samples: {len(val_df)}\n")
    f.write(f"Test samples: {len(test_df)}\n")

model = AutoModelForSequenceClassification.from_pretrained(
    config.MODEL_NAME,
    num_labels=3
)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Map:   0%|          | 0/9606 [00:00<?, ? examples/s]

Map:   0%|          | 0/1201 [00:00<?, ? examples/s]

Map:   0%|          | 0/1201 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
# Metrics
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_score(labels, predictions)
    f1_micro = f1_score(labels, predictions, average='micro')
    f1_macro = f1_score(labels, predictions, average='macro')
    f1_weighted = f1_score(labels, predictions, average='weighted')
    
    report = classification_report(
        labels,
        predictions,
        target_names=['Negative', 'Neutral', 'Positive'],
        output_dict=True
    )
    
    metrics = {
        'accuracy': accuracy,
        'f1_micro': f1_micro,
        'f1_macro': f1_macro,
        'f1_weighted': f1_weighted,
        'negative_precision': report['Negative']['precision'],
        'negative_recall': report['Negative']['recall'],
        'negative_f1': report['Negative']['f1-score'],
        'neutral_precision': report['Neutral']['precision'],
        'neutral_recall': report['Neutral']['recall'],
        'neutral_f1': report['Neutral']['f1-score'],
        'positive_precision': report['Positive']['precision'],
        'positive_recall': report['Positive']['recall'],
        'positive_f1': report['Positive']['f1-score']
    }
    
    logger.info(f"Evaluation metrics: {metrics}")
    
    return metrics

In [10]:
# Training arguments
training_args = TrainingArguments(
    output_dir=config.OUTPUT_DIR,
    run_name=f"xlm-roberta-sentiment-{datetime.now().strftime('%Y-%m-%d-%H-%M')}",
    eval_strategy="steps",
    eval_steps=100,
    logging_steps=config.LOGGING_STEPS,
    save_steps=100,
    save_total_limit=config.SAVE_TOTAL_LIMIT,
    learning_rate=config.LEARNING_RATE,
    per_device_train_batch_size=config.BATCH_SIZE,
    per_device_eval_batch_size=config.BATCH_SIZE,
    gradient_accumulation_steps=config.GRADIENT_ACCUMULATION_STEPS,
    num_train_epochs=config.NUM_EPOCHS,
    weight_decay=config.WEIGHT_DECAY,
    lr_scheduler_type=config.LR_SCHEDULER_TYPE,
    warmup_ratio=config.WARMUP_RATIO,
    load_best_model_at_end=True,
    metric_for_best_model="eval_f1_macro",
    greater_is_better=True,
    fp16=True,
    logging_dir="./logs",
    seed=config.SEED,
    report_to="none",  # Disable wandb logging
    log_level="debug"  # Increase logging verbosity
)

# Initialize Trainer
trainer = WeightedTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    class_weights=config.CLASS_WEIGHTS,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=config.EARLY_STOPPING_PATIENCE)]
)

  super().__init__(**kwargs)
Using auto half precision backend


In [11]:
# Check GPU memory before training
logger.info(f"GPU available: {torch.cuda.is_available()}")
logger.info(f"GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB allocated, {torch.cuda.memory_reserved() / 1024**3:.2f} GB reserved")
print(f"GPU available: {torch.cuda.is_available()}")
print(f"GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB allocated, {torch.cuda.memory_reserved() / 1024**3:.2f} GB reserved")

# Start training with error handling
try:
    logger.info("Starting training...")
    print("Starting training...")
    train_result = trainer.train()
except Exception as e:
    logger.error(f"Training failed with error: {str(e)}")
    print(f"Training failed with error: {str(e)}")
    raise e

# Save training metrics
metrics = train_result.metrics
trainer.save_metrics("train", metrics)
logger.info(f"Training metrics: {metrics}")

# Save the final model
trainer.save_model(config.OUTPUT_DIR)
tokenizer.save_pretrained(config.OUTPUT_DIR)
logger.info(f"Model saved to {config.OUTPUT_DIR}")

# Save training arguments
trainer.save_state()

Currently training with a batch size of: 8
The following columns in the Training set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: __index_level_0__. If __index_level_0__ are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 9,606
  Num Epochs = 10
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 2
  Total optimization steps = 6,010
  Number of trainable parameters = 278,045,955


GPU available: False
GPU memory: 0.00 GB allocated, 0.00 GB reserved
Starting training...


Step,Training Loss,Validation Loss,Accuracy,F1 Micro,F1 Macro,F1 Weighted,Negative Precision,Negative Recall,Negative F1,Neutral Precision,Neutral Recall,Neutral F1,Positive Precision,Positive Recall,Positive F1
100,1.1064,1.084641,0.509575,0.509575,0.321443,0.389749,0.583333,0.197492,0.295082,1.0,0.003115,0.006211,0.501832,0.976827,0.663037
200,0.9649,0.74083,0.729392,0.729392,0.664161,0.699522,0.650215,0.949843,0.771975,0.611111,0.274143,0.378495,0.820643,0.864528,0.842014
300,0.6252,0.613537,0.764363,0.764363,0.728421,0.753716,0.719603,0.909091,0.803324,0.630435,0.451713,0.526316,0.850352,0.860963,0.855624
400,0.6927,0.573023,0.752706,0.752706,0.74343,0.759782,0.778736,0.84953,0.812594,0.538265,0.657321,0.591865,0.915401,0.752228,0.825832
500,0.6637,0.594303,0.774355,0.774355,0.749915,0.772706,0.828179,0.755486,0.790164,0.601911,0.588785,0.595276,0.838926,0.891266,0.864304
600,0.602,0.584031,0.771024,0.771024,0.74131,0.764247,0.813291,0.805643,0.809449,0.614232,0.510903,0.557823,0.817152,0.900178,0.856658
700,0.6038,0.661457,0.775187,0.775187,0.748355,0.768907,0.830671,0.815047,0.822785,0.626866,0.523364,0.570458,0.81129,0.896613,0.85182
800,0.5324,0.602577,0.794338,0.794338,0.773824,0.794517,0.891892,0.724138,0.799308,0.623907,0.666667,0.644578,0.84975,0.907308,0.877586
900,0.5577,0.496221,0.805995,0.805995,0.790583,0.80828,0.866197,0.77116,0.81592,0.640669,0.716511,0.676471,0.88172,0.877005,0.879357
1000,0.5999,0.664311,0.729392,0.729392,0.71651,0.733164,0.676538,0.931034,0.783641,0.541311,0.5919,0.565476,0.946472,0.693405,0.800412


The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: __index_level_0__. If __index_level_0__ are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 1201
  Batch size = 8
Saving model checkpoint to ./xlm-roberta-sentiment-complete/checkpoint-100
Configuration saved in ./xlm-roberta-sentiment-complete/checkpoint-100/config.json
Model weights saved in ./xlm-roberta-sentiment-complete/checkpoint-100/model.safetensors
tokenizer config file saved in ./xlm-roberta-sentiment-complete/checkpoint-100/tokenizer_config.json
Special tokens file saved in ./xlm-roberta-sentiment-complete/checkpoint-100/special_tokens_map.json
The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: __index_level_0__. If __index_le

In [12]:
# Evaluate on test set
logger.info("Evaluating on test set...")
print("Evaluating on test set...")
test_results = trainer.evaluate(
    tokenized_datasets["test"],
    metric_key_prefix="test"
)

# Save evaluation results
with open(os.path.join(config.OUTPUT_DIR, 'test_results.txt'), 'w') as f:
    for key, value in test_results.items():
        f.write(f"{key}: {value}\n")

logger.info("\n=== Test Results ===")
print("\n=== Test Results ===")
for key, value in test_results.items():
    if key.startswith("test_"):
        logger.info(f"{key[5:]}: {value}")
        print(f"{key[5:]}: {value}")

The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: __index_level_0__. If __index_level_0__ are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 1201
  Batch size = 8


Evaluating on test set...


early stopping required metric_for_best_model, but did not find eval_f1_macro so early stopping is disabled



=== Test Results ===
loss: 0.4650983214378357
accuracy: 0.8159866777685262
f1_micro: 0.8159866777685263
f1_macro: 0.8023317409020145
f1_weighted: 0.81667438267899
negative_precision: 0.8790849673202614
negative_recall: 0.8432601880877743
negative_f1: 0.8608
neutral_precision: 0.6616314199395771
neutral_recall: 0.6801242236024845
neutral_f1: 0.6707503828483922
positive_precision: 0.8723404255319149
positive_recall: 0.8785714285714286
positive_f1: 0.8754448398576512
runtime: 417.5685
samples_per_second: 2.876
steps_per_second: 0.362


In [13]:
# Sample predictions function
def predict_sentiment(text):
    inputs = tokenizer(
        text,
        max_length=config.MAX_LENGTH,
        truncation=True,
        padding="max_length",
        return_tensors="pt"
    ).to(trainer.model.device)
    
    with torch.no_grad():
        outputs = trainer.model(**inputs)
    
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    pred_class = torch.argmax(probs).item()
    
    return {
        "sentiment": config.REVERSE_SENTIMENT_MAP[pred_class],
        "confidence": probs[0][pred_class].item(),
        "probabilities": {
            "Negative": probs[0][0].item(),
            "Neutral": probs[0][1].item(),
            "Positive": probs[0][2].item()
        }
    }

# Test on some samples and save predictions
sample_texts = df.sample(5, random_state=config.SEED)["summary"].tolist()
with open(os.path.join(config.OUTPUT_DIR, 'sample_predictions.txt'), 'w') as f:
    for i, text in enumerate(sample_texts):
        result = predict_sentiment(text)
        actual = df[df['summary'] == text]['sentiment'].values[0]
        
        f.write(f"\n=== Sample {i+1} ===\n")
        f.write(f"\nText: {text}\n")
        f.write(f"\nPredicted Sentiment: {result['sentiment']} (Confidence: {result['confidence']:.2f})\n")
        f.write(f"Probabilities: {result['probabilities']}\n")
        f.write(f"Actual Sentiment: {actual}\n")
        
        logger.info(f"Sample {i+1} - Predicted: {result['sentiment']}, Actual: {actual}")
        print(f"\n=== Sample {i+1} ===")
        print(f"\nText: {text}")
        print(f"\nPredicted Sentiment: {result['sentiment']} (Confidence: {result['confidence']:.2f})")
        print(f"Probabilities: {result['probabilities']}")
        print(f"Actual Sentiment: {actual}")


=== Sample 1 ===

Text: Đảng đối lập Thái Lan cáo buộc Thủ tướng Shinawatra thiếu năng lực và bị thao túng bởi ông Thaksin Shinawatra. Trong phiên điều trần, Đảng Nhân Dân khẳng định bà Shinawatra đang nhận chỉ thị từ ông Thaksin. Nhiều chỉ trích được đưa ra, nhưng bà Shinawatra không trực tiếp phản hồi. Dù có những chính sách kinh tế mới nhưng kết quả vẫn chưa đạt như mong đợi. Phiếu bỏ tín nhiệm dự kiến có thể không thành công. Ông Thaksin lên tiếng về chính sách kinh tế của Chính phủ và lo ngại ông vẫn ảnh hưởng đến quyết định.

Predicted Sentiment: Negative (Confidence: 0.97)
Probabilities: {'Negative': 0.9733138084411621, 'Neutral': 0.02532723918557167, 'Positive': 0.0013590147718787193}
Actual Sentiment: Negative

=== Sample 2 ===

Text: Lãi suất ngân hàng ngày 22/5/2025 tiếp tục đạt mức cao kỷ lục với nhiều ưu đãi đặc biệt trên 7%/năm. Dẫn đầu là ABBank với lãi suất lên tới 9,65%/năm, PVcomBank và HDBank cũng cung cấp mức lãi suất hấp dẫn cho khách hàng có số tiền gửi lớn. Ngườ

In [14]:
# Create zip file of all outputs
def zip_output_folder(output_dir):
    zip_path = os.path.join(output_dir, 'output.zip')
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(output_dir):
            for file in files:
                if file != 'output.zip':
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, output_dir)
                    zipf.write(file_path, arcname)
    return zip_path

output_zip = zip_output_folder(config.OUTPUT_DIR)
logger.info(f"Created zip file at: {output_zip}")

print("Training complete! Download the results:")
from IPython.display import FileLink
FileLink(output_zip)

logger.info("Training process completed successfully")

Training complete! Download the results:
