In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/jigsaw-agile-community-rules/sample_submission.csv
/kaggle/input/jigsaw-agile-community-rules/train.csv
/kaggle/input/jigsaw-agile-community-rules/test.csv
/kaggle/input/xlm-roberta-base-offline/xlm_roberta_base_offline/config.json
/kaggle/input/xlm-roberta-base-offline/xlm_roberta_base_offline/tokenizer_config.json
/kaggle/input/xlm-roberta-base-offline/xlm_roberta_base_offline/model.safetensors
/kaggle/input/xlm-roberta-base-offline/xlm_roberta_base_offline/special_tokens_map.json
/kaggle/input/xlm-roberta-base-offline/xlm_roberta_base_offline/sentencepiece.bpe.model


In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.model_selection import StratifiedKFold
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel, get_linear_schedule_with_warmup

In [3]:
# -----------------------------
# Load and preprocess data
# -----------------------------
# Use Kaggle paths when running on Kaggle
MODEL_PATH = "/kaggle/input/xlm-roberta-base-offline/xlm_roberta_base_offline"

trn = "/kaggle/input/jigsaw-agile-community-rules/train.csv"
tst = "/kaggle/input/jigsaw-agile-community-rules/test.csv"
df_trn = pd.read_csv(trn)
df_tst = pd.read_csv(tst)


def fill_empty_examples_pandas(df):
    example_cols = ['positive_example_1', 'positive_example_2', 'negative_example_1', 'negative_example_2']
    for col in example_cols:
        df[col] = df[col].fillna('').astype(str)

    df['positive_example_1'] = df['positive_example_1'].mask(df['positive_example_1'] == '', df['positive_example_2'])
    df['positive_example_2'] = df['positive_example_2'].mask(df['positive_example_2'] == '', df['positive_example_1'])

    df['negative_example_1'] = df['negative_example_1'].mask(df['negative_example_1'] == '', df['negative_example_2'])
    df['negative_example_2'] = df['negative_example_2'].mask(df['negative_example_2'] == '', df['negative_example_1'])

    return df


def getText(value):
    return str(value) if pd.notna(value) else ''


def extract_texts(row):
    return {
        "body": getText(row["body"]),
        "rule": getText(row["rule"]),
        "subreddit": getText(row["subreddit"]),
        "pos1": f"{getText(row['positive_example_1'])}",
        "pos2": f"{getText(row['positive_example_2'])}",
        "neg1": f"{getText(row['negative_example_1'])}",
        "neg2": f"{getText(row['negative_example_2'])}",
    }

df_trn = fill_empty_examples_pandas(df_trn)
df_tst = fill_empty_examples_pandas(df_tst)

df_trn["inputs"] = df_trn.apply(extract_texts, axis=1)

df_tst['text_to_classify'] = df_tst['body'].apply(getText)
df_tst["inputs"] = df_tst.apply(extract_texts, axis=1)

text_feature_cols = [
    'body',
    'rule',
    'subreddit',
    'positive_example_1',
    'positive_example_2',
    'negative_example_1',
    'negative_example_2'
]

print("--- Comprehensive NaN Inspection for All Text Feature Columns ---")

# Count NaNs for each text feature column
print("\n--- NaN Counts per Text Feature Column ---")
print(df_trn[text_feature_cols].isnull().sum())

# Analyze rows with NaNs in 'body' (most critical)
print("\n--- Analysis for 'body' column NaNs ---")
body_nan_rows = df_trn[df_trn['body'].isnull()]
if not body_nan_rows.empty:
    print(f"Number of rows with NaN in 'body': {len(body_nan_rows)}")
    print("Rule violation distribution for rows with NaN in 'body':")
    print(body_nan_rows['rule_violation'].value_counts(normalize=True))
else:
    print("No NaN values found in 'body' column.")

# Analyze rows with NaNs in 'rule'
print("\n--- Analysis for 'rule' column NaNs ---")
rule_nan_rows = df_trn[df_trn['rule'].isnull()]
if not rule_nan_rows.empty:
    print(f"Number of rows with NaN in 'rule': {len(rule_nan_rows)}")
    print("Rule violation distribution for rows with NaN in 'rule':")
    print(rule_nan_rows['rule_violation'].value_counts(normalize=True))
else:
    print("No NaN values found in 'rule' column.")

# Analyze rows with NaNs in 'subreddit'
print("\n--- Analysis for 'subreddit' column NaNs ---")
subreddit_nan_rows = df_trn[df_trn['subreddit'].isnull()]
if not subreddit_nan_rows.empty:
    print(f"Number of rows with NaN in 'subreddit': {len(subreddit_nan_rows)}")
    print("Rule violation distribution for rows with NaN in 'subreddit':")
    print(subreddit_nan_rows['rule_violation'].value_counts(normalize=True))
else:
    print("No NaN values found in 'subreddit' column.")

# Analyze rows where ANY of the example columns are NaN
print("\n--- Analysis for Example Columns NaNs ---")
example_only_cols = [col for col in text_feature_cols if 'example' in col]
df_any_example_nan = df_trn[df_trn[example_only_cols].isnull().any(axis=1)]
if not df_any_example_nan.empty:
    print(f"Number of rows with NaN in ANY example column: {len(df_any_example_nan)}")
    print("Rule violation distribution for rows with NaN in ANY example column:")
    print(df_any_example_nan['rule_violation'].value_counts(normalize=True))
else:
    print("No NaN values found in any example column.")

# Overall rule_violation distribution (for comparison)
print(f"\n--- Overall rule_violation distribution: ---")
print(df_trn['rule_violation'].value_counts(normalize=True))

--- Comprehensive NaN Inspection for All Text Feature Columns ---

--- NaN Counts per Text Feature Column ---
body                  0
rule                  0
subreddit             0
positive_example_1    0
positive_example_2    0
negative_example_1    0
negative_example_2    0
dtype: int64

--- Analysis for 'body' column NaNs ---
No NaN values found in 'body' column.

--- Analysis for 'rule' column NaNs ---
No NaN values found in 'rule' column.

--- Analysis for 'subreddit' column NaNs ---
No NaN values found in 'subreddit' column.

--- Analysis for Example Columns NaNs ---
No NaN values found in any example column.

--- Overall rule_violation distribution: ---
rule_violation
1    0.508132
0    0.491868
Name: proportion, dtype: float64


In [4]:
N_EPOCHS = 8
k_folds = 5
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# -----------------------------
# Dataset
# -----------------------------
class MultiInputDataset(Dataset):
    def __init__(self, df, tokenizer, max_len=128, is_test=False): # Renamed df_trn to df for generality
        self.df = df
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.is_test = is_test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        item = {}
        for field in ["text_to_classify", "rule", "subreddit"]:
            encoded = self.tokenizer(
                row[field],
                truncation=True,
                padding='max_length',
                max_length=self.max_len,
                return_tensors="pt"
            )

            for key in encoded:
                item[f"{field}_{key}"] = encoded[key].squeeze(0)
        if not self.is_test:
          item["label"] = torch.tensor(row["rule_violation"], dtype=torch.float32)
        return item

# -----------------------------
# Model
# -----------------------------
class MultiInputBERT(nn.Module):
    def __init__(self, model_name=MODEL_PATH):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(0.3)
        self.classifier = nn.Sequential(
            nn.Linear(768 * 3, 256),
            nn.ReLU(),
            nn.Linear(256, 1) # Output a single logit
        )

    def forward(self, inputs):
        cls_outputs = []
        for field in ["text_to_classify", "rule", "subreddit"]:
            out = self.bert(
                input_ids=inputs[f"{field}_input_ids"],
                attention_mask=inputs[f"{field}_attention_mask"]
            )
            cls_outputs.append(out.last_hidden_state[:, 0])  # CLS token
        x = torch.cat(cls_outputs, dim=1)
        x = self.dropout(x)
        return self.classifier(x) # Return raw logits

In [5]:
# -----------------------------
# Training and Evaluation
# -----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

oof_preds = np.zeros(len(df_trn))
test_preds_folds = [] # This is correct

for fold, (train_idx_orig, val_idx_orig) in enumerate(skf.split(df_trn, df_trn["rule_violation"])):
    print(f"\n----- Fold {fold+1} -----")
    # train_df = df_trn.iloc[train_idx].reset_index(drop=True)

    # Create original train and validation DataFrames for this fold
    # These are the original body, rules, subreddits, and examples
    fold_train_df_orig = df_trn.iloc[train_idx_orig].reset_index(drop=True)
    fold_val_df_orig = df_trn.iloc[val_idx_orig].reset_index(drop=True)

    # Expand the tarining data for this fold
    expanded_train_data = []
    for idx, row in fold_train_df_orig.iterrows():
        rule_text = getText(row['rule'])
        subreddit_text = getText(row['subreddit'])
        # Add original body as a training sample
        expanded_train_data.append({
            'text_to_classify': getText(row['body']),
            'rule': rule_text,
            'subreddit': subreddit_text,
            'rule_violation': row['rule_violation']
        })
        # Add positive examples
        expanded_train_data.append({
            'text_to_classify': getText(row['positive_example_1']),
            'rule': rule_text,
            'subreddit': subreddit_text,
            'rule_violation': 1.0
        })
        expanded_train_data.append({
            'text_to_classify': getText(row['positive_example_2']),
            'rule': rule_text,
            'subreddit': subreddit_text,
            'rule_violation': 1.0
        })
        # Add negative examples
        expanded_train_data.append({
            'text_to_classify': getText(row['negative_example_1']),
            'rule': rule_text,
            'subreddit': subreddit_text,
            'rule_violation': 0.0
        })
        expanded_train_data.append({
            'text_to_classify': getText(row['negative_example_2']),
            'rule': rule_text,
            'subreddit': subreddit_text,
            'rule_violation': 0.0
        })

    # Create the expanded training DataFrame for this fold
    fold_train_df_expanded = pd.DataFrame(expanded_train_data)
    fold_train_df_expanded = fold_train_df_expanded[fold_train_df_expanded['text_to_classify'] != ''].reset_index(drop=True)

    # 3. Prepare the VALIDATION data for this fold (using original body)
    # Map 'body' to 'text_to_classify' for the validation set
    fold_val_df_for_model = fold_val_df_orig.copy()
    fold_val_df_for_model['text_to_classify'] = fold_val_df_for_model['body']

    # 4. Create Datasets and DataLoaders
    train_dataset = MultiInputDataset(fold_train_df_expanded, tokenizer) # Train on expanded data
    val_dataset = MultiInputDataset(fold_val_df_for_model, tokenizer) # Validate on original body

    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=8) # Use a consistent batch size

    test_loader = DataLoader(MultiInputDataset(df_tst, tokenizer, is_test=True), batch_size=8, shuffle=False)


    model = MultiInputBERT().to(device)
    optimizer = AdamW(model.parameters(), lr=1e-6)
    criterion = nn.BCEWithLogitsLoss()

    num_training_steps_per_fold = len(train_loader) * N_EPOCHS
    num_warmup_steps_per_fold = int(num_training_steps_per_fold * 0.05)

    # Initialize the scheduler
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps_per_fold,
        num_training_steps=num_training_steps_per_fold
    )

    # Training Loop for this fold
    best_auc = -1.0 # Track best AUC for this fold
    best_model_state = None # To save the best model for this fold

    for epoch in range(N_EPOCHS):
        model.train()
        total_loss = 0
        for batch in tqdm(train_loader, desc=f"Training Epoch {epoch+1}"):
            inputs = {k: v.to(device) for k, v in batch.items() if k != "label"}
            labels = batch["label"].to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            logits = outputs.squeeze(-1)

            loss = criterion(logits, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            scheduler.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1} Loss: {total_loss / len(train_loader):.4f}")

        # Eval
        model.eval()
        preds_raw, labels_all = [], []
        with torch.no_grad():
            for batch in tqdm(val_loader, desc=f"Validating Epoch {epoch+1}"):
                inputs = {k: v.to(device) for k, v in batch.items() if k != "label"}
                labels = batch["label"].to(device)
                outputs = model(inputs)
                logits = outputs.squeeze(-1) # Squeeze to [batch_size]

                probs = torch.sigmoid(logits).detach().cpu().tolist()
                preds_raw.extend(probs)
                labels_all.extend(labels.cpu().tolist())

            # Hard labels (for classification report, optional)
            preds = [int(p > 0.5) for p in preds_raw]

        # Print metrics
        print(classification_report(labels_all, preds, digits=3, zero_division=0))

        curr_auc = roc_auc_score(labels_all, preds_raw)
        print(f"AUC Score: {curr_auc:.4f}")

        # Save the best model for this fold based on validation AUC
        if curr_auc > best_auc:
            best_auc = curr_auc
            best_model_state = model.state_dict() # Save model weights
            print(f"  -> New best Val AUC for Fold {fold+1}: {best_auc:.4f}")

    # 6. Load best model state for this fold
    model.load_state_dict(best_model_state) # Use best_model_state
    print(f"Fold {fold+1} Best Val AUC: {best_auc:.4f}")

    # Make OOF predictions for this fold's validation set
    model.eval()
    fold_val_preds_list = []
    fold_val_true_list = []
    with torch.no_grad():
        for batch in tqdm(val_loader, desc=f"Fold {fold + 1} OOF Prediction"):
            inputs = {k: v.to(device) for k, v in batch.items() if k != "label"}
            labels = batch["label"].to(device)
            outputs = model(inputs)
            logits = outputs.squeeze(-1)  # Squeeze to [batch_size]
            probs = torch.sigmoid(logits).detach().cpu().tolist()
            fold_val_preds_list.extend(probs)
            fold_val_true_list.extend(labels.cpu().tolist())

    # Sanity check: Calculate AUC for this fold's OOF predictions
    oof_fold_auc_check = roc_auc_score(fold_val_true_list, fold_val_preds_list)
    print(f"Fold {fold + 1} OOF AUC Check: {oof_fold_auc_check:.4f} (This is the true validation AUC for this fold)")

    # Assign predictions to the correct indices in the global oof_preds array
    oof_preds[val_idx_orig] = np.array(fold_val_preds_list)  # Use val_idx_orig from kf.split
    # Make predictions on the TEST set using this fold's best model
    test_fold_preds = []
    with torch.no_grad():
        for batch in tqdm(test_loader, desc=f"Fold {fold + 1} Test Prediction"):
            inputs = {k: v.to(device) for k, v in batch.items()}
            outputs = model(inputs)
            logits = outputs.squeeze(-1)  # Squeeze to [batch_size]
            probs = torch.sigmoid(logits).detach().cpu().tolist()
            test_fold_preds.extend(probs)

    test_preds_folds.append(test_fold_preds)  # Store test predictions from this fold

# -----------------------------
# Final Calculation and Submission
# -----------------------------
overall_oof_auc = roc_auc_score(df_trn['rule_violation'], oof_preds)
print(f"\n--- Overall {k_folds}-Fold OOF AUC: {overall_oof_auc:.4f} ---")

# Average test predictions across all folds
final_test_predictions = np.mean(test_preds_folds, axis=0)

# Create final submission file
submission = pd.DataFrame({
    "row_id": df_tst["row_id"],
    "rule_violation": final_test_predictions
})
submission.to_csv("submission.csv", index=False) # Save with a distinct name
print("K-Fold multi-input submission.csv created successfully!")
print(submission.head(10))


----- Fold 1 -----


2025-08-06 03:23:39.238464: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1754450619.431608      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1754450619.494440      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Some weights of XLMRobertaModel were not initialized from the model checkpoint at /kaggle/input/xlm-roberta-base-offline/xlm_roberta_base_offline and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Training Epoch 1: 100%|██████████| 1015/1015 [06:29<00:00,  2.61it/s]


Epoch 1 Loss: 0.6855


Validating Epoch 1: 100%|██████████| 51/51 [00:05<00:00, 10.16it/s]


              precision    recall  f1-score   support

         0.0      0.925     0.430     0.587       200
         1.0      0.636     0.966     0.767       206

    accuracy                          0.702       406
   macro avg      0.780     0.698     0.677       406
weighted avg      0.778     0.702     0.678       406

AUC Score: 0.7641
  -> New best Val AUC for Fold 1: 0.7641


Training Epoch 2: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 2 Loss: 0.6008


Validating Epoch 2: 100%|██████████| 51/51 [00:05<00:00, 10.17it/s]


              precision    recall  f1-score   support

         0.0      0.846     0.685     0.757       200
         1.0      0.742     0.879     0.804       206

    accuracy                          0.783       406
   macro avg      0.794     0.782     0.781       406
weighted avg      0.793     0.783     0.781       406

AUC Score: 0.8498
  -> New best Val AUC for Fold 1: 0.8498


Training Epoch 3: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 3 Loss: 0.4822


Validating Epoch 3: 100%|██████████| 51/51 [00:05<00:00, 10.12it/s]


              precision    recall  f1-score   support

         0.0      0.890     0.685     0.774       200
         1.0      0.750     0.917     0.825       206

    accuracy                          0.803       406
   macro avg      0.820     0.801     0.800       406
weighted avg      0.819     0.803     0.800       406

AUC Score: 0.8746
  -> New best Val AUC for Fold 1: 0.8746


Training Epoch 4: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 4 Loss: 0.3908


Validating Epoch 4: 100%|██████████| 51/51 [00:05<00:00, 10.20it/s]


              precision    recall  f1-score   support

         0.0      0.870     0.705     0.779       200
         1.0      0.758     0.898     0.822       206

    accuracy                          0.803       406
   macro avg      0.814     0.802     0.801       406
weighted avg      0.813     0.803     0.801       406

AUC Score: 0.8777
  -> New best Val AUC for Fold 1: 0.8777


Training Epoch 5: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 5 Loss: 0.3144


Validating Epoch 5: 100%|██████████| 51/51 [00:05<00:00, 10.11it/s]


              precision    recall  f1-score   support

         0.0      0.867     0.715     0.784       200
         1.0      0.763     0.893     0.823       206

    accuracy                          0.805       406
   macro avg      0.815     0.804     0.803       406
weighted avg      0.814     0.805     0.804       406

AUC Score: 0.8745


Training Epoch 6: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 6 Loss: 0.2670


Validating Epoch 6: 100%|██████████| 51/51 [00:04<00:00, 10.20it/s]


              precision    recall  f1-score   support

         0.0      0.877     0.715     0.788       200
         1.0      0.765     0.903     0.829       206

    accuracy                          0.810       406
   macro avg      0.821     0.809     0.808       406
weighted avg      0.821     0.810     0.808       406

AUC Score: 0.8629


Training Epoch 7: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 7 Loss: 0.2372


Validating Epoch 7: 100%|██████████| 51/51 [00:05<00:00, 10.20it/s]


              precision    recall  f1-score   support

         0.0      0.847     0.750     0.796       200
         1.0      0.782     0.869     0.823       206

    accuracy                          0.810       406
   macro avg      0.815     0.809     0.809       406
weighted avg      0.814     0.810     0.810       406

AUC Score: 0.8626


Training Epoch 8: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 8 Loss: 0.2333


Validating Epoch 8: 100%|██████████| 51/51 [00:04<00:00, 10.20it/s]


              precision    recall  f1-score   support

         0.0      0.847     0.750     0.796       200
         1.0      0.782     0.869     0.823       206

    accuracy                          0.810       406
   macro avg      0.815     0.809     0.809       406
weighted avg      0.814     0.810     0.810       406

AUC Score: 0.8587
Fold 1 Best Val AUC: 0.8777


Fold 1 OOF Prediction: 100%|██████████| 51/51 [00:04<00:00, 10.22it/s]


Fold 1 OOF AUC Check: 0.8587 (This is the true validation AUC for this fold)


Fold 1 Test Prediction: 100%|██████████| 2/2 [00:00<00:00, 14.49it/s]



----- Fold 2 -----


Some weights of XLMRobertaModel were not initialized from the model checkpoint at /kaggle/input/xlm-roberta-base-offline/xlm_roberta_base_offline and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Training Epoch 1: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 1 Loss: 0.6820


Validating Epoch 1: 100%|██████████| 51/51 [00:04<00:00, 10.20it/s]


              precision    recall  f1-score   support

         0.0      0.747     0.545     0.630       200
         1.0      0.650     0.820     0.725       206

    accuracy                          0.685       406
   macro avg      0.698     0.683     0.678       406
weighted avg      0.698     0.685     0.678       406

AUC Score: 0.7244
  -> New best Val AUC for Fold 2: 0.7244


Training Epoch 2: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 2 Loss: 0.5700


Validating Epoch 2: 100%|██████████| 51/51 [00:04<00:00, 10.29it/s]


              precision    recall  f1-score   support

         0.0      0.819     0.725     0.769       200
         1.0      0.760     0.845     0.800       206

    accuracy                          0.786       406
   macro avg      0.790     0.785     0.785       406
weighted avg      0.789     0.786     0.785       406

AUC Score: 0.8476
  -> New best Val AUC for Fold 2: 0.8476


Training Epoch 3: 100%|██████████| 1015/1015 [06:27<00:00,  2.62it/s]


Epoch 3 Loss: 0.3992


Validating Epoch 3: 100%|██████████| 51/51 [00:04<00:00, 10.25it/s]


              precision    recall  f1-score   support

         0.0      0.856     0.745     0.797       200
         1.0      0.780     0.879     0.826       206

    accuracy                          0.813       406
   macro avg      0.818     0.812     0.812       406
weighted avg      0.818     0.813     0.812       406

AUC Score: 0.8638
  -> New best Val AUC for Fold 2: 0.8638


Training Epoch 4: 100%|██████████| 1015/1015 [06:28<00:00,  2.62it/s]


Epoch 4 Loss: 0.2739


Validating Epoch 4: 100%|██████████| 51/51 [00:05<00:00, 10.19it/s]


              precision    recall  f1-score   support

         0.0      0.870     0.735     0.797       200
         1.0      0.776     0.893     0.831       206

    accuracy                          0.815       406
   macro avg      0.823     0.814     0.814       406
weighted avg      0.822     0.815     0.814       406

AUC Score: 0.8696
  -> New best Val AUC for Fold 2: 0.8696


Training Epoch 5: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 5 Loss: 0.2190


Validating Epoch 5: 100%|██████████| 51/51 [00:04<00:00, 10.26it/s]


              precision    recall  f1-score   support

         0.0      0.862     0.750     0.802       200
         1.0      0.784     0.883     0.831       206

    accuracy                          0.818       406
   macro avg      0.823     0.817     0.817       406
weighted avg      0.823     0.818     0.817       406

AUC Score: 0.8652


Training Epoch 6: 100%|██████████| 1015/1015 [06:27<00:00,  2.62it/s]


Epoch 6 Loss: 0.2151


Validating Epoch 6: 100%|██████████| 51/51 [00:04<00:00, 10.25it/s]


              precision    recall  f1-score   support

         0.0      0.880     0.730     0.798       200
         1.0      0.775     0.903     0.834       206

    accuracy                          0.818       406
   macro avg      0.827     0.816     0.816       406
weighted avg      0.826     0.818     0.816       406

AUC Score: 0.8677


Training Epoch 7: 100%|██████████| 1015/1015 [06:27<00:00,  2.62it/s]


Epoch 7 Loss: 0.2122


Validating Epoch 7: 100%|██████████| 51/51 [00:04<00:00, 10.26it/s]


              precision    recall  f1-score   support

         0.0      0.869     0.765     0.814       200
         1.0      0.796     0.888     0.839       206

    accuracy                          0.828       406
   macro avg      0.832     0.827     0.827       406
weighted avg      0.832     0.828     0.827       406

AUC Score: 0.8759
  -> New best Val AUC for Fold 2: 0.8759


Training Epoch 8: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 8 Loss: 0.2071


Validating Epoch 8: 100%|██████████| 51/51 [00:05<00:00, 10.17it/s]


              precision    recall  f1-score   support

         0.0      0.873     0.755     0.810       200
         1.0      0.790     0.893     0.838       206

    accuracy                          0.825       406
   macro avg      0.831     0.824     0.824       406
weighted avg      0.831     0.825     0.824       406

AUC Score: 0.8787
  -> New best Val AUC for Fold 2: 0.8787
Fold 2 Best Val AUC: 0.8787


Fold 2 OOF Prediction: 100%|██████████| 51/51 [00:04<00:00, 10.21it/s]


Fold 2 OOF AUC Check: 0.8787 (This is the true validation AUC for this fold)


Fold 2 Test Prediction: 100%|██████████| 2/2 [00:00<00:00, 14.57it/s]



----- Fold 3 -----


Some weights of XLMRobertaModel were not initialized from the model checkpoint at /kaggle/input/xlm-roberta-base-offline/xlm_roberta_base_offline and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Training Epoch 1: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 1 Loss: 0.6866


Validating Epoch 1: 100%|██████████| 51/51 [00:04<00:00, 10.21it/s]


              precision    recall  f1-score   support

         0.0      0.775     0.430     0.553       200
         1.0      0.614     0.879     0.723       206

    accuracy                          0.658       406
   macro avg      0.694     0.654     0.638       406
weighted avg      0.693     0.658     0.639       406

AUC Score: 0.7661
  -> New best Val AUC for Fold 3: 0.7661


Training Epoch 2: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 2 Loss: 0.6046


Validating Epoch 2: 100%|██████████| 51/51 [00:05<00:00, 10.19it/s]


              precision    recall  f1-score   support

         0.0      0.840     0.605     0.703       200
         1.0      0.698     0.888     0.782       206

    accuracy                          0.749       406
   macro avg      0.769     0.747     0.743       406
weighted avg      0.768     0.749     0.743       406

AUC Score: 0.8613
  -> New best Val AUC for Fold 3: 0.8613


Training Epoch 3: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 3 Loss: 0.4655


Validating Epoch 3: 100%|██████████| 51/51 [00:05<00:00, 10.13it/s]


              precision    recall  f1-score   support

         0.0      0.881     0.665     0.758       200
         1.0      0.737     0.913     0.816       206

    accuracy                          0.791       406
   macro avg      0.809     0.789     0.787       406
weighted avg      0.808     0.791     0.787       406

AUC Score: 0.8657
  -> New best Val AUC for Fold 3: 0.8657


Training Epoch 4: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 4 Loss: 0.3457


Validating Epoch 4: 100%|██████████| 51/51 [00:05<00:00, 10.19it/s]


              precision    recall  f1-score   support

         0.0      0.842     0.825     0.833       200
         1.0      0.833     0.850     0.841       206

    accuracy                          0.837       406
   macro avg      0.838     0.837     0.837       406
weighted avg      0.838     0.837     0.837       406

AUC Score: 0.8642


Training Epoch 5: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 5 Loss: 0.2732


Validating Epoch 5: 100%|██████████| 51/51 [00:04<00:00, 10.21it/s]


              precision    recall  f1-score   support

         0.0      0.876     0.775     0.822       200
         1.0      0.803     0.893     0.846       206

    accuracy                          0.835       406
   macro avg      0.840     0.834     0.834       406
weighted avg      0.839     0.835     0.834       406

AUC Score: 0.8651


Training Epoch 6: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 6 Loss: 0.2302


Validating Epoch 6: 100%|██████████| 51/51 [00:04<00:00, 10.24it/s]


              precision    recall  f1-score   support

         0.0      0.861     0.835     0.848       200
         1.0      0.844     0.869     0.856       206

    accuracy                          0.852       406
   macro avg      0.853     0.852     0.852       406
weighted avg      0.852     0.852     0.852       406

AUC Score: 0.8662
  -> New best Val AUC for Fold 3: 0.8662


Training Epoch 7: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 7 Loss: 0.2181


Validating Epoch 7: 100%|██████████| 51/51 [00:04<00:00, 10.20it/s]


              precision    recall  f1-score   support

         0.0      0.859     0.820     0.839       200
         1.0      0.833     0.869     0.850       206

    accuracy                          0.845       406
   macro avg      0.846     0.844     0.845       406
weighted avg      0.845     0.845     0.845       406

AUC Score: 0.8827
  -> New best Val AUC for Fold 3: 0.8827


Training Epoch 8: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 8 Loss: 0.2090


Validating Epoch 8: 100%|██████████| 51/51 [00:04<00:00, 10.25it/s]


              precision    recall  f1-score   support

         0.0      0.866     0.805     0.834       200
         1.0      0.823     0.879     0.850       206

    accuracy                          0.842       406
   macro avg      0.844     0.842     0.842       406
weighted avg      0.844     0.842     0.842       406

AUC Score: 0.8772
Fold 3 Best Val AUC: 0.8827


Fold 3 OOF Prediction: 100%|██████████| 51/51 [00:04<00:00, 10.23it/s]


Fold 3 OOF AUC Check: 0.8772 (This is the true validation AUC for this fold)


Fold 3 Test Prediction: 100%|██████████| 2/2 [00:00<00:00, 14.66it/s]



----- Fold 4 -----


Some weights of XLMRobertaModel were not initialized from the model checkpoint at /kaggle/input/xlm-roberta-base-offline/xlm_roberta_base_offline and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Training Epoch 1: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 1 Loss: 0.6881


Validating Epoch 1: 100%|██████████| 51/51 [00:05<00:00, 10.14it/s]


              precision    recall  f1-score   support

         0.0      0.667     0.593     0.628       199
         1.0      0.646     0.715     0.679       207

    accuracy                          0.655       406
   macro avg      0.656     0.654     0.653       406
weighted avg      0.656     0.655     0.654       406

AUC Score: 0.7517
  -> New best Val AUC for Fold 4: 0.7517


Training Epoch 2: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 2 Loss: 0.5442


Validating Epoch 2: 100%|██████████| 51/51 [00:04<00:00, 10.21it/s]


              precision    recall  f1-score   support

         0.0      0.787     0.744     0.765       199
         1.0      0.766     0.807     0.786       207

    accuracy                          0.776       406
   macro avg      0.777     0.775     0.775       406
weighted avg      0.776     0.776     0.776       406

AUC Score: 0.8731
  -> New best Val AUC for Fold 4: 0.8731


Training Epoch 3: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 3 Loss: 0.3343


Validating Epoch 3: 100%|██████████| 51/51 [00:05<00:00, 10.13it/s]


              precision    recall  f1-score   support

         0.0      0.832     0.774     0.802       199
         1.0      0.796     0.850     0.822       207

    accuracy                          0.813       406
   macro avg      0.814     0.812     0.812       406
weighted avg      0.814     0.813     0.812       406

AUC Score: 0.8810
  -> New best Val AUC for Fold 4: 0.8810


Training Epoch 4: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 4 Loss: 0.2563


Validating Epoch 4: 100%|██████████| 51/51 [00:04<00:00, 10.24it/s]


              precision    recall  f1-score   support

         0.0      0.832     0.774     0.802       199
         1.0      0.796     0.850     0.822       207

    accuracy                          0.813       406
   macro avg      0.814     0.812     0.812       406
weighted avg      0.814     0.813     0.812       406

AUC Score: 0.8790


Training Epoch 5: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 5 Loss: 0.2410


Validating Epoch 5: 100%|██████████| 51/51 [00:04<00:00, 10.22it/s]


              precision    recall  f1-score   support

         0.0      0.833     0.804     0.818       199
         1.0      0.818     0.845     0.831       207

    accuracy                          0.825       406
   macro avg      0.826     0.825     0.825       406
weighted avg      0.825     0.825     0.825       406

AUC Score: 0.8910
  -> New best Val AUC for Fold 4: 0.8910


Training Epoch 6: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 6 Loss: 0.2292


Validating Epoch 6: 100%|██████████| 51/51 [00:05<00:00, 10.18it/s]


              precision    recall  f1-score   support

         0.0      0.832     0.824     0.828       199
         1.0      0.833     0.841     0.837       207

    accuracy                          0.833       406
   macro avg      0.833     0.832     0.832       406
weighted avg      0.833     0.833     0.832       406

AUC Score: 0.8949
  -> New best Val AUC for Fold 4: 0.8949


Training Epoch 7: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 7 Loss: 0.2195


Validating Epoch 7: 100%|██████████| 51/51 [00:05<00:00, 10.19it/s]


              precision    recall  f1-score   support

         0.0      0.840     0.819     0.830       199
         1.0      0.830     0.850     0.840       207

    accuracy                          0.835       406
   macro avg      0.835     0.835     0.835       406
weighted avg      0.835     0.835     0.835       406

AUC Score: 0.8964
  -> New best Val AUC for Fold 4: 0.8964


Training Epoch 8: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 8 Loss: 0.2164


Validating Epoch 8: 100%|██████████| 51/51 [00:04<00:00, 10.23it/s]


              precision    recall  f1-score   support

         0.0      0.836     0.819     0.827       199
         1.0      0.829     0.845     0.837       207

    accuracy                          0.833       406
   macro avg      0.833     0.832     0.832       406
weighted avg      0.833     0.833     0.832       406

AUC Score: 0.8972
  -> New best Val AUC for Fold 4: 0.8972
Fold 4 Best Val AUC: 0.8972


Fold 4 OOF Prediction: 100%|██████████| 51/51 [00:04<00:00, 10.24it/s]


Fold 4 OOF AUC Check: 0.8972 (This is the true validation AUC for this fold)


Fold 4 Test Prediction: 100%|██████████| 2/2 [00:00<00:00, 14.62it/s]



----- Fold 5 -----


Some weights of XLMRobertaModel were not initialized from the model checkpoint at /kaggle/input/xlm-roberta-base-offline/xlm_roberta_base_offline and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Training Epoch 1: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 1 Loss: 0.6786


Validating Epoch 1: 100%|██████████| 51/51 [00:04<00:00, 10.26it/s]


              precision    recall  f1-score   support

         0.0      0.724     0.422     0.533       199
         1.0      0.602     0.845     0.703       206

    accuracy                          0.637       405
   macro avg      0.663     0.633     0.618       405
weighted avg      0.662     0.637     0.620       405

AUC Score: 0.6905
  -> New best Val AUC for Fold 5: 0.6905


Training Epoch 2: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 2 Loss: 0.5977


Validating Epoch 2: 100%|██████████| 51/51 [00:04<00:00, 10.23it/s]


              precision    recall  f1-score   support

         0.0      0.799     0.658     0.722       199
         1.0      0.718     0.840     0.774       206

    accuracy                          0.751       405
   macro avg      0.758     0.749     0.748       405
weighted avg      0.758     0.751     0.748       405

AUC Score: 0.8243
  -> New best Val AUC for Fold 5: 0.8243


Training Epoch 3: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 3 Loss: 0.4665


Validating Epoch 3: 100%|██████████| 51/51 [00:04<00:00, 10.24it/s]


              precision    recall  f1-score   support

         0.0      0.857     0.663     0.748       199
         1.0      0.733     0.893     0.805       206

    accuracy                          0.780       405
   macro avg      0.795     0.778     0.777       405
weighted avg      0.794     0.780     0.777       405

AUC Score: 0.8475
  -> New best Val AUC for Fold 5: 0.8475


Training Epoch 4: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 4 Loss: 0.3603


Validating Epoch 4: 100%|██████████| 51/51 [00:04<00:00, 10.20it/s]


              precision    recall  f1-score   support

         0.0      0.890     0.693     0.780       199
         1.0      0.756     0.917     0.829       206

    accuracy                          0.807       405
   macro avg      0.823     0.805     0.804       405
weighted avg      0.822     0.807     0.805       405

AUC Score: 0.8485
  -> New best Val AUC for Fold 5: 0.8485


Training Epoch 5: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 5 Loss: 0.2894


Validating Epoch 5: 100%|██████████| 51/51 [00:04<00:00, 10.23it/s]


              precision    recall  f1-score   support

         0.0      0.871     0.744     0.802       199
         1.0      0.783     0.893     0.834       206

    accuracy                          0.820       405
   macro avg      0.827     0.818     0.818       405
weighted avg      0.826     0.820     0.819       405

AUC Score: 0.8551
  -> New best Val AUC for Fold 5: 0.8551


Training Epoch 6: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 6 Loss: 0.2401


Validating Epoch 6: 100%|██████████| 51/51 [00:04<00:00, 10.27it/s]


              precision    recall  f1-score   support

         0.0      0.861     0.749     0.801       199
         1.0      0.784     0.883     0.831       206

    accuracy                          0.817       405
   macro avg      0.823     0.816     0.816       405
weighted avg      0.822     0.817     0.816       405

AUC Score: 0.8368


Training Epoch 7: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 7 Loss: 0.2124


Validating Epoch 7: 100%|██████████| 51/51 [00:04<00:00, 10.24it/s]


              precision    recall  f1-score   support

         0.0      0.855     0.769     0.810       199
         1.0      0.796     0.874     0.833       206

    accuracy                          0.822       405
   macro avg      0.826     0.821     0.821       405
weighted avg      0.825     0.822     0.822       405

AUC Score: 0.8390


Training Epoch 8: 100%|██████████| 1015/1015 [06:28<00:00,  2.61it/s]


Epoch 8 Loss: 0.2106


Validating Epoch 8: 100%|██████████| 51/51 [00:04<00:00, 10.25it/s]


              precision    recall  f1-score   support

         0.0      0.865     0.774     0.817       199
         1.0      0.802     0.883     0.841       206

    accuracy                          0.830       405
   macro avg      0.833     0.829     0.829       405
weighted avg      0.833     0.830     0.829       405

AUC Score: 0.8411
Fold 5 Best Val AUC: 0.8551


Fold 5 OOF Prediction: 100%|██████████| 51/51 [00:04<00:00, 10.26it/s]


Fold 5 OOF AUC Check: 0.8411 (This is the true validation AUC for this fold)


Fold 5 Test Prediction: 100%|██████████| 2/2 [00:00<00:00, 14.58it/s]


--- Overall 5-Fold OOF AUC: 0.8571 ---
K-Fold multi-input submission.csv created successfully!
   row_id  rule_violation
0    2029        0.027804
1    2030        0.980489
2    2031        0.984540
3    2032        0.983370
4    2033        0.984117
5    2034        0.015482
6    2035        0.983249
7    2036        0.016281
8    2037        0.019867
9    2038        0.978963



