## Cell 1: Import Libraries and Load Initial Data
This cell imports necessary libraries and loads your train.csv dataset into a pandas DataFrame.

In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from collections import Counter
import spacy
import re
import json
import time
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score

train_df = pd.read_csv('../data/train.csv')

print("Initial data loaded.")
print(f"Shape of train_df: {train_df.shape}")
print(train_df.head())

Initial data loaded.
Shape of train_df: (159571, 8)
                 id                                       comment_text  toxic  \
0  0000997932d777bf  Explanation\nWhy the edits made under my usern...      0   
1  000103f0d9cfb60f  D'aww! He matches this background colour I'm s...      0   
2  000113f07ec002fd  Hey man, I'm really not trying to edit war. It...      0   
3  0001b41b1c6bb37e  "\nMore\nI can't make any real suggestions on ...      0   
4  0001d958c54c6e35  You, sir, are my hero. Any chance you remember...      0   

   severe_toxic  obscene  threat  insult  identity_hate  
0             0        0       0       0              0  
1             0        0       0       0              0  
2             0        0       0       0              0  
3             0        0       0       0              0  
4             0        0       0       0              0  


### Cell 2: Define Text Preprocessing Function 
This cell defines your custom text preprocessing function using spaCy and regular expressions. It also loads the spaCy model.

In [3]:
nlp_preprocessor = spacy.load("en_core_web_sm", disable=["parser", "ner"])
print("spaCy 'en_core_web_sm' model loaded for preprocessing.")

def preprocess_text_custom_spacy(text_to_process, nlp_instance):
    if not nlp_instance:
        pass 
    if not isinstance(text_to_process, str): text_to_process = str(text_to_process)
    
    text_to_process = text_to_process.lower()
    text_to_process = re.sub(r'https?://\S+|www\.\S+', '', text_to_process)
    text_to_process = re.sub(r'@\w+', '', text_to_process)
    text_to_process = re.sub(r'<.*?>', '', text_to_process)
    text_to_process = re.sub(r'[^a-z\s]', '', text_to_process)
    text_to_process = re.sub(r'\s+', ' ', text_to_process).strip()
    
    doc = nlp_instance(text_to_process)
    processed_tokens = [
        token.lemma_ for token in doc if token.is_alpha and not token.is_stop
    ]
    return ' '.join(processed_tokens)

print("Custom text preprocessing function 'preprocess_text_custom_spacy' defined.")

spaCy 'en_core_web_sm' model loaded for preprocessing.
Custom text preprocessing function 'preprocess_text_custom_spacy' defined.


### Cell 3: Apply Text Preprocessing
This cell applies the defined preprocessing function to your comment text, handles NaNs, and removes any rows that become empty after processing.

In [4]:
train_df_processed = train_df.copy()
train_df_processed['comment_text_processed'] = train_df_processed['comment_text'].fillna('').apply(
    lambda x: preprocess_text_custom_spacy(x, nlp_preprocessor)
)

initial_rows = train_df_processed.shape[0]
train_df_processed = train_df_processed[train_df_processed['comment_text_processed'] != ""]
rows_removed = initial_rows - train_df_processed.shape[0]

print(f"Text preprocessing applied. Rows after processing and removing empty: {train_df_processed.shape[0]} (removed {rows_removed})")
print(train_df_processed[['comment_text', 'comment_text_processed']].head())

Text preprocessing applied. Rows after processing and removing empty: 159434 (removed 137)
                                        comment_text  \
0  Explanation\nWhy the edits made under my usern...   
1  D'aww! He matches this background colour I'm s...   
2  Hey man, I'm really not trying to edit war. It...   
3  "\nMore\nI can't make any real suggestions on ...   
4  You, sir, are my hero. Any chance you remember...   

                              comment_text_processed  
0  explanation edit username hardcore metallica f...  
1  daww match background colour m seemingly stick...  
2  hey man m try edit war guy constantly remove r...  
3  not real suggestion improvement wonder section...  
4                    sir hero chance remember page s  


### Cell 4: Build Vocabulary, Numericalize, and Pad Sequences
This cell creates the vocabulary from the processed text, converts text to numerical sequences, and pads/truncates them to a fixed length.

In [5]:
PROCESSED_TEXT_COLUMN_NAME = 'comment_text_processed'

all_processed_words = [word for comment in train_df_processed[PROCESSED_TEXT_COLUMN_NAME].astype(str) for word in comment.split()]
word_counts = Counter(all_processed_words)
sorted_words = sorted(word_counts, key=word_counts.get, reverse=True)
vocab_to_int = {word: i+2 for i, word in enumerate(sorted_words)}
vocab_to_int['<pad>'] = 0
vocab_to_int['<unk>'] = 1
SEQ_LENGTH = 200

train_df_processed['padded_features'] = train_df_processed[PROCESSED_TEXT_COLUMN_NAME].apply(
    lambda text: (
        lambda seq: seq[:SEQ_LENGTH] if len(seq) > SEQ_LENGTH else seq + [vocab_to_int['<pad>']] * (SEQ_LENGTH - len(seq))
    )([vocab_to_int.get(word, vocab_to_int['<unk>']) for word in str(text).split() if word])
)

print(f"Vocabulary created (size: {len(vocab_to_int)}), SEQ_LENGTH set to {SEQ_LENGTH}.")
print("Column 'padded_features' created.")


Vocabulary created (size: 197521), SEQ_LENGTH set to 200.
Column 'padded_features' created.


### Cell 5: Prepare PyTorch Tensors, Dataset, and DataLoaders
This cell extracts the padded features and labels, converts them to PyTorch tensors, defines a custom Dataset class, splits data into training and validation sets, and creates DataLoader instances.

In [6]:
X_list = train_df_processed['padded_features'].tolist()
X_np = np.array(X_list, dtype=np.int64)
features_tensor = torch.from_numpy(X_np)

label_columns = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
y_np = train_df_processed[label_columns].values.astype(np.float32)
labels_tensor = torch.from_numpy(y_np)

print(f"Shape of features_tensor: {features_tensor.shape}")
print(f"Shape of labels_tensor: {labels_tensor.shape}")

class ToxicityDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

full_dataset = ToxicityDataset(features_tensor, labels_tensor)

validation_split = 0.2
dataset_size = len(full_dataset)
val_size = int(validation_split * dataset_size)
train_size = dataset_size - val_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

print(f"Training samples: {len(train_dataset)}, Validation samples: {len(val_dataset)}")

BATCH_SIZE = 32
NUM_WORKERS = 0 

if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(f"DataLoaders will be configured for device: {device}")
use_pin_memory = True if device.type != 'cpu' else False


train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, num_workers=NUM_WORKERS, pin_memory=use_pin_memory)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=use_pin_memory)

print("PyTorch Dataset and DataLoaders created.")

Shape of features_tensor: torch.Size([159434, 200])
Shape of labels_tensor: torch.Size([159434, 6])
Training samples: 127548, Validation samples: 31886
DataLoaders will be configured for device: mps
PyTorch Dataset and DataLoaders created.


### Cell 6: Define NBoW Model, Loss, and Optimizer
This cell defines the SimplerNBoWClassifier model class, sets up the device, instantiates the model, defines the loss function (BCEWithLogitsLoss without weights for this baseline), and the Adam optimizer.

In [7]:
VOCAB_SIZE_NBOW = len(vocab_to_int)
EMBEDDING_DIM_NBOW = 100 
OUTPUT_DIM_NBOW = 6      
padding_idx_nbow = vocab_to_int.get('<pad>', 0)

class SimplerNBoWClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, output_dim, padding_idx_val):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx_val)
        self.fc = nn.Linear(embedding_dim, output_dim)

    def forward(self, text_batch):
        embedded = self.embedding(text_batch)
        averaged_embeddings = torch.mean(embedded, dim=1)
        logits = self.fc(averaged_embeddings)
        return logits

model_nbow = SimplerNBoWClassifier(VOCAB_SIZE_NBOW, EMBEDDING_DIM_NBOW, OUTPUT_DIM_NBOW, padding_idx_nbow)
model_nbow.to(device)

criterion = nn.BCEWithLogitsLoss() 
LEARNING_RATE_NBOW = 0.001
optimizer_nbow = optim.Adam(model_nbow.parameters(), lr=LEARNING_RATE_NBOW)

print("NBoW Model, Loss function, and Optimizer defined.")
print(f"Model moved to device: {device}")

NBoW Model, Loss function, and Optimizer defined.
Model moved to device: mps


### Cell 7: Helper Functions for Training and Evaluation Epochs
Defines two helper functions: `train_epoch_func` for handling the logic of a single training epoch (forward pass, loss calculation, backpropagation, optimizer step) and `evaluate_epoch_func` for a single validation epoch.

In [8]:
def train_epoch_func(model, dataloader, criterion_fn, optimizer_fn, current_device):
    model.train()
    total_loss = 0
    for features, labels in dataloader:
        features = features.to(current_device)
        labels = labels.to(current_device)
        optimizer_fn.zero_grad()
        predictions = model(features)
        loss = criterion_fn(predictions, labels)
        loss.backward()
        optimizer_fn.step()
        total_loss += loss.item()
    return total_loss / len(dataloader)

def evaluate_epoch_func(model, dataloader, criterion_fn, current_device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for features, labels in dataloader:
            features = features.to(current_device)
            labels = labels.to(current_device)
            predictions = model(features)
            loss = criterion_fn(predictions, labels)
            total_loss += loss.item()
    return total_loss / len(dataloader)

print("Helper functions 'train_epoch_func' and 'evaluate_epoch_func' defined.")

Helper functions 'train_epoch_func' and 'evaluate_epoch_func' defined.


## Cell 8: Calculate Class Weights and Define Weighted Loss Function
Calculates positive class weights (`pos_weight`) for each toxicity category based on the training set distribution to address class imbalance. Defines a new `BCEWithLogitsLoss` criterion (`criterion_nbow_weighted`) using these calculated weights.

In [19]:
import torch

train_labels_np = labels_tensor[train_dataset.indices].cpu().numpy() 

pos_weights_list = []
print("Calculating positive weights for BCEWithLogitsLoss:")
for i, col_name in enumerate(label_columns):
    num_total_train_samples = train_labels_np.shape[0]
    num_pos = train_labels_np[:, i].sum()
    num_neg = num_total_train_samples - num_pos

    if num_pos > 0 and num_neg > 0:
        weight = num_neg / num_pos
    elif num_pos == 0 and num_neg > 0: 
        weight = num_total_train_samples 
        print(f"Warning: No positive samples found for class '{col_name}' in training data. Using large weight: {weight:.2f}")
    else: 
        weight = 1.0 
        print(f"Warning: No negative samples or no samples for class '{col_name}'? Using weight: {weight:.2f}")
        
    pos_weights_list.append(weight)
    print(f"  Class '{col_name}': num_pos={num_pos}, num_neg={num_neg}, calculated_pos_weight={weight:.2f}")

pos_weights_tensor = torch.tensor(pos_weights_list, dtype=torch.float32).to(device)
print("\npos_weights_tensor created and moved to device.")
print(pos_weights_tensor)

criterion_nbow_weighted = nn.BCEWithLogitsLoss(pos_weight=pos_weights_tensor)
print("\nWeighted BCEWithLogitsLoss ('criterion_nbow_weighted') defined.")

Calculating positive weights for BCEWithLogitsLoss:
  Class 'toxic': num_pos=12280.0, num_neg=115268.0, calculated_pos_weight=9.39
  Class 'severe_toxic': num_pos=1282.0, num_neg=126266.0, calculated_pos_weight=98.49
  Class 'obscene': num_pos=6823.0, num_neg=120725.0, calculated_pos_weight=17.69
  Class 'threat': num_pos=379.0, num_neg=127169.0, calculated_pos_weight=335.54
  Class 'insult': num_pos=6357.0, num_neg=121191.0, calculated_pos_weight=19.06
  Class 'identity_hate': num_pos=1138.0, num_neg=126410.0, calculated_pos_weight=111.08

pos_weights_tensor created and moved to device.
tensor([  9.3866,  98.4914,  17.6938, 335.5383,  19.0642, 111.0808],
       device='mps:0')

Weighted BCEWithLogitsLoss ('criterion_nbow_weighted') defined.


### Cell 9: NBoW Model Training Loop (with Weighted Loss)
Executes the main training loop for the NBoW model (`model_nbow`) for a specified number of epochs. This loop uses the `train_epoch_func`, `evaluate_epoch_func`, the NBoW optimizer, and the `criterion_nbow_weighted` (weighted loss function).

In [11]:
NUM_EPOCHS_NBOW = 10 

print(f"Starting NBoW model training for {NUM_EPOCHS_NBOW} epochs...")
for epoch in range(NUM_EPOCHS_NBOW):
    epoch_start_time = time.time()
    avg_train_loss = train_epoch_func(model_nbow, train_loader, criterion_nbow_weighted, optimizer_nbow, device)
    avg_val_loss = evaluate_epoch_func(model_nbow, val_loader, criterion_nbow_weighted, device)
    epoch_end_time = time.time()
    epoch_duration = epoch_end_time - epoch_start_time
    print(f"Epoch [{epoch+1}/{NUM_EPOCHS_NBOW}] (NBoW) - Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Duration: {epoch_duration:.2f}s")

print("\nNBoW Training finished.")

Starting NBoW model training for 10 epochs...
Epoch [1/10] (NBoW) - Train Loss: 1.0911, Val Loss: 0.8907, Duration: 172.11s
Epoch [2/10] (NBoW) - Train Loss: 0.7660, Val Loss: 0.7242, Duration: 161.23s
Epoch [3/10] (NBoW) - Train Loss: 0.6169, Val Loss: 0.6802, Duration: 161.79s
Epoch [4/10] (NBoW) - Train Loss: 0.5379, Val Loss: 0.6630, Duration: 155.48s
Epoch [5/10] (NBoW) - Train Loss: 0.4857, Val Loss: 0.6536, Duration: 158.95s
Epoch [6/10] (NBoW) - Train Loss: 0.4433, Val Loss: 0.6495, Duration: 161.01s
Epoch [7/10] (NBoW) - Train Loss: 0.4080, Val Loss: 0.6775, Duration: 160.94s
Epoch [8/10] (NBoW) - Train Loss: 0.3768, Val Loss: 0.7067, Duration: 154.91s
Epoch [9/10] (NBoW) - Train Loss: 0.3485, Val Loss: 0.7069, Duration: 153.63s
Epoch [10/10] (NBoW) - Train Loss: 0.3238, Val Loss: 0.7275, Duration: 151.61s

NBoW Training finished.


### Cell 10: Initial NBoW Model Evaluation (Weighted Loss, Default 0.5 Threshold)
Performs an initial evaluation of the NBoW model (trained with weighted loss) on the 
validation set. It calculates and prints a classification report, ROC AUC scores, and Exact Match Ratio using the default 0.5 threshold for binary predictions.

In [12]:
print("Starting NBoW model evaluation on the validation set...")
model_nbow.eval()
all_true_labels_nbow = []
all_predicted_probs_nbow = []
all_predicted_labels_nbow = []

with torch.no_grad():
    for features, labels in val_loader:
        features = features.to(device)
        logits = model_nbow(features)
        probabilities = torch.sigmoid(logits).cpu().numpy()
        binary_predictions = (probabilities >= 0.5).astype(int)
        all_true_labels_nbow.extend(labels.numpy().astype(int))
        all_predicted_probs_nbow.extend(probabilities)
        all_predicted_labels_nbow.extend(binary_predictions)

all_true_labels_nbow_np = np.array(all_true_labels_nbow)
all_predicted_labels_nbow_np = np.array(all_predicted_labels_nbow)
all_predicted_probs_nbow_np = np.array(all_predicted_probs_nbow)

print("\n--- NBoW Model: Classification Report ---")
report_nbow = classification_report(all_true_labels_nbow_np, all_predicted_labels_nbow_np, target_names=label_columns, zero_division=0)
print(report_nbow)

print("\n--- NBoW Model: ROC AUC Score (per class and average) ---")
roc_auc_per_class_nbow = []
for i in range(all_true_labels_nbow_np.shape[1]):
    try:
        score = roc_auc_score(all_true_labels_nbow_np[:, i], all_predicted_probs_nbow_np[:, i])
        print(f"ROC AUC for class '{label_columns[i]}': {score:.4f}")
        roc_auc_per_class_nbow.append(score)
    except ValueError:
        print(f"ROC AUC for class '{label_columns[i]}': Not computable (likely only one class present in y_true).")
        roc_auc_per_class_nbow.append(float('nan'))

valid_roc_auc_scores_nbow = [s for s in roc_auc_per_class_nbow if not np.isnan(s)]
if valid_roc_auc_scores_nbow:
    print(f"Average ROC AUC (macro, ignoring NaN): {np.mean(valid_roc_auc_scores_nbow):.4f}")

exact_match_accuracy_nbow = accuracy_score(all_true_labels_nbow_np, all_predicted_labels_nbow_np)
print(f"\nNBoW Model: Exact Match Ratio (Accuracy): {exact_match_accuracy_nbow:.4f}")

Starting NBoW model evaluation on the validation set...

--- NBoW Model: Classification Report ---
               precision    recall  f1-score   support

        toxic       0.53      0.84      0.65      3013
 severe_toxic       0.22      0.90      0.35       313
      obscene       0.53      0.87      0.66      1626
       threat       0.06      0.88      0.11        99
       insult       0.47      0.86      0.60      1520
identity_hate       0.09      0.82      0.17       267

    micro avg       0.38      0.85      0.53      6838
    macro avg       0.32      0.86      0.42      6838
 weighted avg       0.48      0.85      0.60      6838
  samples avg       0.05      0.08      0.06      6838


--- NBoW Model: ROC AUC Score (per class and average) ---
ROC AUC for class 'toxic': 0.9364
ROC AUC for class 'severe_toxic': 0.9731
ROC AUC for class 'obscene': 0.9641
ROC AUC for class 'threat': 0.9606
ROC AUC for class 'insult': 0.9569
ROC AUC for class 'identity_hate': 0.9189
Average ROC

## Cell 11: Optimal Threshold Tuning
Using the probabilities predicted by the NBoW model (from Cell 10), this cell finds the optimal classification threshold for each toxicity class that maximizes its F1-score on the validation set. It then prints these optimal thresholds and a new classification report and Exact Match Ratio based on these tuned thresholds.

In [20]:
import numpy as np
from sklearn.metrics import f1_score, classification_report, accuracy_score 

print("Starting threshold tuning for NBoW model (trained with weighted loss)...")

optimal_thresholds = {}
best_f1_scores = {}

threshold_candidates = np.arange(0.01, 1.00, 0.01)

for i, class_name in enumerate(label_columns):
    best_threshold_for_class = 0.5 
    best_f1_for_class = 0.0
    
    true_labels_for_class = all_true_labels_nbow_np[:, i]
    pred_probs_for_class = all_predicted_probs_nbow_np[:, i]
    
    if np.sum(true_labels_for_class) == 0:
        print(f"Class '{class_name}': No positive samples in validation set. Skipping threshold tuning, using default 0.5.")
        optimal_thresholds[class_name] = 0.5
        temp_preds = (pred_probs_for_class >= 0.5).astype(int)
        best_f1_for_class = f1_score(true_labels_for_class, temp_preds, zero_division=0)
        best_f1_scores[class_name] = best_f1_for_class
        continue

    for threshold in threshold_candidates:
        binary_predictions_for_class = (pred_probs_for_class >= threshold).astype(int)
        current_f1 = f1_score(true_labels_for_class, binary_predictions_for_class, average='binary', zero_division=0)
        
        if current_f1 > best_f1_for_class:
            best_f1_for_class = current_f1
            best_threshold_for_class = threshold
            
    optimal_thresholds[class_name] = best_threshold_for_class
    best_f1_scores[class_name] = best_f1_for_class
    print(f"Class '{class_name}': Optimal Threshold = {best_threshold_for_class:.2f}, Best F1 = {best_f1_for_class:.4f}")

print("\nOptimal thresholds found for each class:")
for class_name, thresh in optimal_thresholds.items():
    print(f"  {class_name}: {thresh:.2f}")

all_predicted_labels_tuned_np = np.zeros_like(all_predicted_probs_nbow_np, dtype=int)
for i, class_name in enumerate(label_columns):
    threshold = optimal_thresholds[class_name]
    all_predicted_labels_tuned_np[:, i] = (all_predicted_probs_nbow_np[:, i] >= threshold).astype(int)

print("\n--- Classification Report (with Tuned Thresholds) ---")
report_tuned = classification_report(all_true_labels_nbow_np, all_predicted_labels_tuned_np, target_names=label_columns, zero_division=0)
print(report_tuned)
exact_match_accuracy_tuned = accuracy_score(all_true_labels_nbow_np, all_predicted_labels_tuned_np)
print(f"\nNBoW Model: Exact Match Ratio (Accuracy) with Tuned Thresholds: {exact_match_accuracy_tuned:.4f}")

Starting threshold tuning for NBoW model (trained with weighted loss)...
Class 'toxic': Optimal Threshold = 0.72, Best F1 = 0.7237
Class 'severe_toxic': Optimal Threshold = 0.92, Best F1 = 0.4751
Class 'obscene': Optimal Threshold = 0.76, Best F1 = 0.7317
Class 'threat': Optimal Threshold = 0.96, Best F1 = 0.3038
Class 'insult': Optimal Threshold = 0.75, Best F1 = 0.6710
Class 'identity_hate': Optimal Threshold = 0.91, Best F1 = 0.3307

Optimal thresholds found for each class:
  toxic: 0.72
  severe_toxic: 0.92
  obscene: 0.76
  threat: 0.96
  insult: 0.75
  identity_hate: 0.91

--- Classification Report (with Tuned Thresholds) ---
               precision    recall  f1-score   support

        toxic       0.71      0.74      0.72      3013
 severe_toxic       0.38      0.64      0.48       313
      obscene       0.69      0.77      0.73      1626
       threat       0.26      0.36      0.30        99
       insult       0.61      0.74      0.67      1520
identity_hate       0.26     

### Cell 12: Save NBoW Model and Artifacts
This cell saves the trained NBoW model's state dictionary and the vocab_to_int dictionary along with SEQ_LENGTH for future use (e.g., in an API).

In [18]:
import os
import torch
import json


ARTIFACTS_DIR = '../model_artifacts' 
os.makedirs(ARTIFACTS_DIR, exist_ok=True)

# Используем оригинальные имена файлов, которые ожидает API
MODEL_STATE_FILENAME_IN_ARTIFACTS = 'nbow_model_state.pth'
VOCAB_CONFIG_FILENAME_IN_ARTIFACTS = 'nbow_vocab_config.json' # <--- ИСПРАВЛЕНО ИМЯ ФАЙЛА

model_state_path = os.path.join(ARTIFACTS_DIR, MODEL_STATE_FILENAME_IN_ARTIFACTS)
vocab_config_path = os.path.join(ARTIFACTS_DIR, VOCAB_CONFIG_FILENAME_IN_ARTIFACTS)

torch.save(model_nbow.state_dict(), model_state_path)
print(f"NBoW Model state saved to: {model_state_path}")

full_config_to_save = {
    'vocab_to_int': vocab_to_int,
    'SEQ_LENGTH': SEQ_LENGTH,
    'label_columns': label_columns,
    'optimal_thresholds': optimal_thresholds
}
with open(vocab_config_path, 'w') as f:
    json.dump(full_config_to_save, f, ensure_ascii=False, indent=4)
print(f"NBoW Vocabulary, SEQ_LENGTH, label_columns, and OPTIMAL THRESHOLDS saved to: {vocab_config_path}")

NBoW Model state saved to: ../model_artifacts/nbow_model_state.pth
NBoW Vocabulary, SEQ_LENGTH, label_columns, and OPTIMAL THRESHOLDS saved to: ../model_artifacts/nbow_vocab_config.json
