# **Vietnamese News Sentiment Analysis with XLM-RoBERTa**
# This notebook fine-tunes XLM-RoBERTa for classifying sentiment of Vietnamese news summaries.
* Dataset: 12007 samples

In [1]:
# Install required packages
!pip install -q transformers datasets evaluate accelerate scikit-learn pandas matplotlib seaborn

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m91.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m71.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m38.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m7

In [2]:
import os
import shutil
import zipfile
import pandas as pd
import numpy as np
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    EarlyStoppingCallback
)
from datasets import Dataset, DatasetDict
import evaluate
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score, accuracy_score
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import logging

2025-07-24 12:59:06.648050: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753361947.014892      77 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753361947.124677      77 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
# Configuration
class Config:
    MODEL_NAME = "xlm-roberta-base"
    SEED = 42
    BATCH_SIZE = 8  # Reduced to avoid GPU memory issues
    GRADIENT_ACCUMULATION_STEPS = 2  # Adjusted for effective batch size of 16
    LEARNING_RATE = 2e-5
    NUM_EPOCHS = 10
    MAX_LENGTH = 256
    WEIGHT_DECAY = 0.01
    OUTPUT_DIR = "./xlm-roberta-sentiment-complete"
    LOGGING_STEPS = 10  # Increased frequency for better monitoring
    SAVE_TOTAL_LIMIT = 2
    SENTIMENT_MAP = {'Negative': 0, 'Neutral': 1, 'Positive': 2}
    REVERSE_SENTIMENT_MAP = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
    EARLY_STOPPING_PATIENCE = 3
    LR_SCHEDULER_TYPE = "cosine"
    WARMUP_RATIO = 0.1
    USE_CLASS_WEIGHTS = True
    DATA_PATH = "/kaggle/input/data-news-v2/data_news_v2.xlsx"

config = Config()

# Create output directory
os.makedirs(config.OUTPUT_DIR, exist_ok=True)

In [4]:
# Set up logging with error handling
try:
    logging.basicConfig(
        filename=os.path.join(config.OUTPUT_DIR, 'training.log'),
        level=logging.DEBUG,  # Increased verbosity
        format='%(asctime)s - %(levelname)s - %(message)s'
    )
    logger = logging.getLogger(__name__)
    logger.info("Logging initialized successfully")
except PermissionError:
    logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger(__name__)
    logger.info("Fallback to console logging due to permission error for file: %s", os.path.join(config.OUTPUT_DIR, 'training.log'))
except Exception as e:
    logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
    logger = logging.getLogger(__name__)
    logger.info("Logging setup failed with error: %s. Fallback to console logging.", str(e))

# Set random seed
torch.manual_seed(config.SEED)
np.random.seed(config.SEED)


In [5]:
# Custom Trainer with class weights
class WeightedTrainer(Trainer):
    def __init__(self, class_weights=None, **kwargs):
        super().__init__(**kwargs)
        self.class_weights = class_weights
        
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        
        labels = labels.long()
        
        if self.class_weights is not None:
            weights = torch.tensor(self.class_weights, device=logits.device, dtype=torch.float32)
            loss_fct = torch.nn.CrossEntropyLoss(weight=weights)
        else:
            loss_fct = torch.nn.CrossEntropyLoss()
            
        loss = loss_fct(logits, labels)
        return (loss, outputs) if return_outputs else loss

In [6]:
# Load and explore dataset
def load_and_explore_data(file_path):
    logger.info("Loading dataset...")
    df = pd.read_excel(file_path)
    
    df['label'] = df['sentiment'].map(config.SENTIMENT_MAP)
    
    with open(os.path.join(config.OUTPUT_DIR, 'dataset_info.txt'), 'w') as f:
        f.write(f"Total samples: {len(df)}\n")
        f.write("\nClass distribution:\n")
        f.write(df['sentiment'].value_counts().to_string())
    
    plt.figure(figsize=(8, 5))
    class_dist = df['sentiment'].value_counts()
    sns.barplot(x=class_dist.index, y=class_dist.values)
    plt.title('Class Distribution')
    plt.ylabel('Count')
    plt.savefig(os.path.join(config.OUTPUT_DIR, 'class_distribution.png'))
    plt.close()
    
    df['text_length'] = df['summary'].apply(lambda x: len(x.split()))
    
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    sns.histplot(df['text_length'], bins=30)
    plt.title('Text Length Distribution')
    
    plt.subplot(1, 2, 2)
    sns.boxplot(x='sentiment', y='text_length', data=df)
    plt.title('Text Length by Sentiment')
    plt.savefig(os.path.join(config.OUTPUT_DIR, 'text_length_distribution.png'))
    plt.close()
    
    return df

df = load_and_explore_data(config.DATA_PATH)
df = df.dropna()

if config.USE_CLASS_WEIGHTS:
    class_weights = compute_class_weight(
        'balanced', 
        classes=np.unique(df['label']),
        y=df['label']
    )
    config.CLASS_WEIGHTS = class_weights.tolist()
    logger.info(f"Class weights: {config.CLASS_WEIGHTS}")
else:
    config.CLASS_WEIGHTS = None

tokenizer = AutoTokenizer.from_pretrained(config.MODEL_NAME)

  with pd.option_context('mode.use_inf_as_na', True):


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

In [7]:
# Analyze token lengths
def analyze_token_lengths(texts, tokenizer, max_length):
    lengths = []
    for text in texts:
        tokens = tokenizer(text, truncation=True, max_length=max_length)["input_ids"]
        lengths.append(len(tokens))
    return lengths

token_lengths = analyze_token_lengths(df['summary'], tokenizer, config.MAX_LENGTH)

plt.figure(figsize=(10, 5))
sns.histplot(token_lengths, bins=30)
plt.title('Token Length Distribution')
plt.axvline(x=config.MAX_LENGTH, color='r', linestyle='--', label='Max Length')
plt.legend()
plt.savefig(os.path.join(config.OUTPUT_DIR, 'token_length_distribution.png'))
plt.close()

logger.info(f"Percentage of texts within max length: {sum(np.array(token_lengths) <= config.MAX_LENGTH) / len(token_lengths):.2%}")

  with pd.option_context('mode.use_inf_as_na', True):


In [8]:
# Preprocess function
def preprocess_function(examples):
    return tokenizer(
        examples["summary"],
        truncation=True,
        max_length=config.MAX_LENGTH,
        padding="max_length"
    )

# Split data
train_df, temp_df = train_test_split(
    df,
    test_size=0.2,
    random_state=config.SEED,
    stratify=df['label']
)
val_df, test_df = train_test_split(
    temp_df,
    test_size=0.5,
    random_state=config.SEED,
    stratify=temp_df['label']
)

train_dataset = Dataset.from_pandas(train_df[['summary', 'sentiment', 'label']])
val_dataset = Dataset.from_pandas(val_df[['summary', 'sentiment', 'label']])
test_dataset = Dataset.from_pandas(test_df[['summary', 'sentiment', 'label']])

dataset = DatasetDict({
    "train": train_dataset,
    "validation": val_dataset,
    "test": test_dataset
})

tokenized_datasets = dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=["summary", "sentiment"]
)

with open(os.path.join(config.OUTPUT_DIR, 'data_splits.txt'), 'w') as f:
    f.write(f"Train samples: {len(train_df)}\n")
    f.write(f"Validation samples: {len(val_df)}\n")
    f.write(f"Test samples: {len(test_df)}\n")

model = AutoModelForSequenceClassification.from_pretrained(
    config.MODEL_NAME,
    num_labels=3
)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Map:   0%|          | 0/7477 [00:00<?, ? examples/s]

Map:   0%|          | 0/935 [00:00<?, ? examples/s]

Map:   0%|          | 0/935 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/1.12G [00:00<?, ?B/s]

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
# Metrics
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_score(labels, predictions)
    f1_micro = f1_score(labels, predictions, average='micro')
    f1_macro = f1_score(labels, predictions, average='macro')
    f1_weighted = f1_score(labels, predictions, average='weighted')
    
    report = classification_report(
        labels,
        predictions,
        target_names=['Negative', 'Neutral', 'Positive'],
        output_dict=True
    )
    
    metrics = {
        'accuracy': accuracy,
        'f1_micro': f1_micro,
        'f1_macro': f1_macro,
        'f1_weighted': f1_weighted,
        'negative_precision': report['Negative']['precision'],
        'negative_recall': report['Negative']['recall'],
        'negative_f1': report['Negative']['f1-score'],
        'neutral_precision': report['Neutral']['precision'],
        'neutral_recall': report['Neutral']['recall'],
        'neutral_f1': report['Neutral']['f1-score'],
        'positive_precision': report['Positive']['precision'],
        'positive_recall': report['Positive']['recall'],
        'positive_f1': report['Positive']['f1-score']
    }
    
    logger.info(f"Evaluation metrics: {metrics}")
    
    return metrics

In [10]:
# Training arguments
training_args = TrainingArguments(
    output_dir=config.OUTPUT_DIR,
    run_name=f"xlm-roberta-sentiment-{datetime.now().strftime('%Y-%m-%d-%H-%M')}",
    eval_strategy="steps",
    eval_steps=100,
    logging_steps=config.LOGGING_STEPS,
    save_steps=100,
    save_total_limit=config.SAVE_TOTAL_LIMIT,
    learning_rate=config.LEARNING_RATE,
    per_device_train_batch_size=config.BATCH_SIZE,
    per_device_eval_batch_size=config.BATCH_SIZE,
    gradient_accumulation_steps=config.GRADIENT_ACCUMULATION_STEPS,
    num_train_epochs=config.NUM_EPOCHS,
    weight_decay=config.WEIGHT_DECAY,
    lr_scheduler_type=config.LR_SCHEDULER_TYPE,
    warmup_ratio=config.WARMUP_RATIO,
    load_best_model_at_end=True,
    metric_for_best_model="eval_f1_macro",
    greater_is_better=True,
    fp16=True,
    logging_dir="./logs",
    seed=config.SEED,
    report_to="none",  # Disable wandb logging
    log_level="debug"  # Increase logging verbosity
)

# Initialize Trainer
trainer = WeightedTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    class_weights=config.CLASS_WEIGHTS,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=config.EARLY_STOPPING_PATIENCE)]
)

  super().__init__(**kwargs)
Using auto half precision backend


In [11]:
# Check GPU memory before training
logger.info(f"GPU available: {torch.cuda.is_available()}")
logger.info(f"GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB allocated, {torch.cuda.memory_reserved() / 1024**3:.2f} GB reserved")
print(f"GPU available: {torch.cuda.is_available()}")
print(f"GPU memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB allocated, {torch.cuda.memory_reserved() / 1024**3:.2f} GB reserved")

# Start training with error handling
try:
    logger.info("Starting training...")
    print("Starting training...")
    train_result = trainer.train()
except Exception as e:
    logger.error(f"Training failed with error: {str(e)}")
    print(f"Training failed with error: {str(e)}")
    raise e

# Save training metrics
metrics = train_result.metrics
trainer.save_metrics("train", metrics)
logger.info(f"Training metrics: {metrics}")

# Save the final model
trainer.save_model(config.OUTPUT_DIR)
tokenizer.save_pretrained(config.OUTPUT_DIR)
logger.info(f"Model saved to {config.OUTPUT_DIR}")

# Save training arguments
trainer.save_state()

Currently training with a batch size of: 16
The following columns in the Training set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: __index_level_0__. If __index_level_0__ are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 7,477
  Num Epochs = 10
  Instantaneous batch size per device = 8
  Training with DataParallel so batch size has been adjusted to: 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 2
  Total optimization steps = 2,340
  Number of trainable parameters = 278,045,955


GPU available: True
GPU memory: 1.04 GB allocated, 1.09 GB reserved
Starting training...


Step,Training Loss,Validation Loss,Accuracy,F1 Micro,F1 Macro,F1 Weighted,Negative Precision,Negative Recall,Negative F1,Neutral Precision,Neutral Recall,Neutral F1,Positive Precision,Positive Recall,Positive F1
100,1.0127,0.919944,0.68877,0.68877,0.625887,0.662897,0.651558,0.867925,0.744337,0.488189,0.256198,0.336043,0.773626,0.82243,0.797282
200,0.6383,0.622792,0.747594,0.747594,0.723928,0.745941,0.785455,0.815094,0.8,0.56087,0.533058,0.54661,0.823256,0.827103,0.825175
300,0.6305,0.600954,0.758289,0.758289,0.743386,0.761799,0.822394,0.803774,0.812977,0.559259,0.623967,0.589844,0.849754,0.806075,0.827338
400,0.6031,0.584761,0.772193,0.772193,0.734532,0.759844,0.752351,0.90566,0.821918,0.654545,0.446281,0.530713,0.829268,0.873832,0.850967
500,0.4599,0.632859,0.771123,0.771123,0.738145,0.762164,0.75,0.916981,0.825127,0.628415,0.475207,0.541176,0.848131,0.848131,0.848131
600,0.5139,0.617253,0.768984,0.768984,0.745819,0.766383,0.905405,0.758491,0.825462,0.59292,0.553719,0.57265,0.788501,0.897196,0.839344
700,0.5034,0.604859,0.780749,0.780749,0.759165,0.777821,0.882591,0.822642,0.851563,0.613636,0.557851,0.584416,0.805556,0.880841,0.841518
800,0.3766,0.614865,0.788235,0.788235,0.773507,0.788848,0.84,0.871698,0.855556,0.616935,0.632231,0.62449,0.856796,0.824766,0.840476
900,0.3994,0.663538,0.765775,0.765775,0.743936,0.763394,0.785235,0.883019,0.831261,0.591111,0.549587,0.569593,0.847087,0.815421,0.830952
1000,0.2386,0.745242,0.77754,0.77754,0.766634,0.781195,0.806228,0.879245,0.841155,0.588448,0.673554,0.628131,0.897019,0.773364,0.830615


The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: __index_level_0__. If __index_level_0__ are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 935
  Batch size = 16
Saving model checkpoint to ./xlm-roberta-sentiment-complete/checkpoint-100
Configuration saved in ./xlm-roberta-sentiment-complete/checkpoint-100/config.json
Model weights saved in ./xlm-roberta-sentiment-complete/checkpoint-100/model.safetensors
tokenizer config file saved in ./xlm-roberta-sentiment-complete/checkpoint-100/tokenizer_config.json
Special tokens file saved in ./xlm-roberta-sentiment-complete/checkpoint-100/special_tokens_map.json
The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: __index_level_0__. If __index_le

In [12]:
# Evaluate on test set
logger.info("Evaluating on test set...")
print("Evaluating on test set...")
test_results = trainer.evaluate(
    tokenized_datasets["test"],
    metric_key_prefix="test"
)

# Save evaluation results
with open(os.path.join(config.OUTPUT_DIR, 'test_results.txt'), 'w') as f:
    for key, value in test_results.items():
        f.write(f"{key}: {value}\n")

logger.info("\n=== Test Results ===")
print("\n=== Test Results ===")
for key, value in test_results.items():
    if key.startswith("test_"):
        logger.info(f"{key[5:]}: {value}")
        print(f"{key[5:]}: {value}")

The following columns in the Evaluation set don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: __index_level_0__. If __index_level_0__ are not expected by `XLMRobertaForSequenceClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 935
  Batch size = 16


Evaluating on test set...


early stopping required metric_for_best_model, but did not find eval_f1_macro so early stopping is disabled



=== Test Results ===
loss: 0.6309393048286438
accuracy: 0.758288770053476
f1_micro: 0.758288770053476
f1_macro: 0.7444634862297465
f1_weighted: 0.7619015516906373
negative_precision: 0.784452296819788
negative_recall: 0.8345864661654135
negative_f1: 0.8087431693989071
neutral_precision: 0.5682656826568265
neutral_recall: 0.6363636363636364
neutral_f1: 0.6003898635477583
positive_precision: 0.8740157480314961
positive_recall: 0.7798594847775175
positive_f1: 0.8242574257425742
runtime: 13.2986
samples_per_second: 70.308
steps_per_second: 4.437


In [13]:
# Sample predictions function
def predict_sentiment(text):
    inputs = tokenizer(
        text,
        max_length=config.MAX_LENGTH,
        truncation=True,
        padding="max_length",
        return_tensors="pt"
    ).to(trainer.model.device)
    
    with torch.no_grad():
        outputs = trainer.model(**inputs)
    
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
    pred_class = torch.argmax(probs).item()
    
    return {
        "sentiment": config.REVERSE_SENTIMENT_MAP[pred_class],
        "confidence": probs[0][pred_class].item(),
        "probabilities": {
            "Negative": probs[0][0].item(),
            "Neutral": probs[0][1].item(),
            "Positive": probs[0][2].item()
        }
    }

# Test on some samples and save predictions
sample_texts = df.sample(5, random_state=config.SEED)["summary"].tolist()
with open(os.path.join(config.OUTPUT_DIR, 'sample_predictions.txt'), 'w') as f:
    for i, text in enumerate(sample_texts):
        result = predict_sentiment(text)
        actual = df[df['summary'] == text]['sentiment'].values[0]
        
        f.write(f"\n=== Sample {i+1} ===\n")
        f.write(f"\nText: {text}\n")
        f.write(f"\nPredicted Sentiment: {result['sentiment']} (Confidence: {result['confidence']:.2f})\n")
        f.write(f"Probabilities: {result['probabilities']}\n")
        f.write(f"Actual Sentiment: {actual}\n")
        
        logger.info(f"Sample {i+1} - Predicted: {result['sentiment']}, Actual: {actual}")
        print(f"\n=== Sample {i+1} ===")
        print(f"\nText: {text}")
        print(f"\nPredicted Sentiment: {result['sentiment']} (Confidence: {result['confidence']:.2f})")
        print(f"Probabilities: {result['probabilities']}")
        print(f"Actual Sentiment: {actual}")


=== Sample 1 ===

Text: Chủ tịch Hội đồng thành viên EVN tiếp và làm việc với lãnh đạo KEPCO về ứng dụng công nghệ thông minh và phát triển nguồn điện hạt nhân. Các bên muốn tăng cường hợp tác, chia sẻ kinh nghiệm để phát triển dự án điện. EVN đề xuất thành lập tổ công tác chuyên môn. KEPCO đang quản lý 83GW và tham gia đầu tư dự án BOT tại Việt Nam. Nhiều đối tác nước ngoài muốn hợp tác với Việt Nam trong dự án điện hạt nhân Ninh Thuận, bao gồm Mỹ, Hàn Quốc, Nga, Nhật, Trung Quốc, Pháp.

Predicted Sentiment: Positive (Confidence: 0.87)
Probabilities: {'Negative': 0.00490963738411665, 'Neutral': 0.12209127843379974, 'Positive': 0.8729991912841797}
Actual Sentiment: Positive

=== Sample 2 ===

Text: Từ ngày 1/7, Việt Nam chỉ còn 34 tỉnh, thành phố sau khi sắp xếp lại hành chính từ 63 đơn vị trước đây. Việc này nhằm tạo điều kiện thuận lợi hơn cho các địa phương thu hút vốn đầu tư trực tiếp nước ngoài (FDI). Các “thủ phủ” mới đã được hình thành và hi vọng sẽ thu hút được nhiều vốn FDI h

In [14]:
# Create zip file of all outputs
def zip_output_folder(output_dir):
    zip_path = os.path.join(output_dir, 'output.zip')
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(output_dir):
            for file in files:
                if file != 'output.zip':
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, output_dir)
                    zipf.write(file_path, arcname)
    return zip_path

output_zip = zip_output_folder(config.OUTPUT_DIR)
logger.info(f"Created zip file at: {output_zip}")

print("Training complete! Download the results:")
from IPython.display import FileLink
FileLink(output_zip)

logger.info("Training process completed successfully")

Training complete! Download the results:
