In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
for dirname, _, filenames in os.walk('.'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

.\Jigsaw.ipynb
.\Jigsaw.py
.\Jigsaw_v14.py
.\jigsaw_v15.ipynb
.\jigsaw_v17.ipynb
.\jigsaw_v17_contrastive.ipynb
.\output.log
.\README.md
.\sample_submission.csv
.\StackTrace
.\submission.csv
.\test.csv
.\train.csv
.\.git\COMMIT_EDITMSG
.\.git\config
.\.git\description
.\.git\FETCH_HEAD
.\.git\HEAD
.\.git\index
.\.git\ORIG_HEAD
.\.git\packed-refs
.\.git\hooks\applypatch-msg.sample
.\.git\hooks\commit-msg.sample
.\.git\hooks\fsmonitor-watchman.sample
.\.git\hooks\post-update.sample
.\.git\hooks\pre-applypatch.sample
.\.git\hooks\pre-commit.sample
.\.git\hooks\pre-merge-commit.sample
.\.git\hooks\pre-push.sample
.\.git\hooks\pre-rebase.sample
.\.git\hooks\pre-receive.sample
.\.git\hooks\prepare-commit-msg.sample
.\.git\hooks\push-to-checkout.sample
.\.git\hooks\sendemail-validate.sample
.\.git\hooks\update.sample
.\.git\info\exclude
.\.git\logs\HEAD
.\.git\logs\refs\heads\main
.\.git\logs\refs\remotes\origin\HEAD
.\.git\logs\refs\remotes\origin\main
.\.git\objects\01\d3562f054ec88ad3f5709

In [2]:
import numpy as np
import pandas as pd
import torch
import torch_directml
import torch.nn as nn
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.model_selection import StratifiedKFold
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel, get_linear_schedule_with_warmup
from pytorch_metric_learning.losses import SupConLoss

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def get_device():
    # Try to detect NVIDIA CUDA GPU first
    if torch.cuda.is_available():
        device = torch.device("cuda")
        print(f"Using NVIDIA CUDA GPU: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB")
        return device

    # If no NVIDIA CUDA GPU, try to detect DirectML GPU
    try:
        if torch_directml.is_available():
            device = torch_directml.device()
            print(f"Using DirectML GPU: {device}")
            # Add a small test to ensure it's truly usable
            try:
                _ = torch.tensor([1], device=device)
            except Exception as e:
                print(f"Warning: DirectML device found but not usable ({e}). Falling back to CPU.")
                return torch.device("cpu")
            return device
        else:
            print("DirectML is NOT available.")
    except ImportError:
        print("torch_directml not installed.")
    except Exception as e:
        print(f"Error checking DirectML: {e}. Falling back to CPU.")

    # If neither GPU is found, fall back to CPU
    device = torch.device("cpu")
    print("No GPU (NVIDIA CUDA or DirectML) found. Using CPU.")
    return device

In [4]:
# -----------------------------
# Load and preprocess data
# -----------------------------
# Use Kaggle paths when running on Kaggle
# MODEL_PATH = "/kaggle/input/xlm-roberta-base-offline/xlm_roberta_base_offline"
MODEL_PATH = "C:/Users/satra/Downloads/xlm_roberta_base_offline"

# trn = "/kaggle/input/jigsaw-agile-community-rules/train.csv"
# tst = "/kaggle/input/jigsaw-agile-community-rules/test.csv"
trn = "C:/Users/satra/Downloads/jigsaw-agile-community-rules/train.csv"
tst = "C:/Users/satra/Downloads/jigsaw-agile-community-rules/test.csv"
df_trn = pd.read_csv(trn)
df_trn = df_trn.sample(frac=.01, random_state=42).reset_index(drop=True)

df_tst = pd.read_csv(tst)


def fill_empty_examples_pandas(df):
    example_cols = ['positive_example_1', 'positive_example_2', 'negative_example_1', 'negative_example_2']
    for col in example_cols:
        df[col] = df[col].fillna('').astype(str)

    df['positive_example_1'] = df['positive_example_1'].mask(df['positive_example_1'] == '', df['positive_example_2'])
    df['positive_example_2'] = df['positive_example_2'].mask(df['positive_example_2'] == '', df['positive_example_1'])

    df['negative_example_1'] = df['negative_example_1'].mask(df['negative_example_1'] == '', df['negative_example_2'])
    df['negative_example_2'] = df['negative_example_2'].mask(df['negative_example_2'] == '', df['negative_example_1'])

    return df


def getText(value):
    return str(value) if pd.notna(value) else ''


def extract_texts(row):
    return {
        "body": getText(row["body"]),
        "rule": getText(row["rule"]),
        "subreddit": getText(row["subreddit"]),
        "pos1": f"{getText(row['positive_example_1'])}",
        "pos2": f"{getText(row['positive_example_2'])}",
        "neg1": f"{getText(row['negative_example_1'])}",
        "neg2": f"{getText(row['negative_example_2'])}",
    }

df_trn = fill_empty_examples_pandas(df_trn)
df_tst = fill_empty_examples_pandas(df_tst)

df_trn["inputs"] = df_trn.apply(extract_texts, axis=1)

df_tst['text_to_classify'] = df_tst['body'].apply(getText)
df_tst["inputs"] = df_tst.apply(extract_texts, axis=1)

text_feature_cols = [
    'body',
    'rule',
    'subreddit',
    'positive_example_1',
    'positive_example_2',
    'negative_example_1',
    'negative_example_2'
]

print("--- Comprehensive NaN Inspection for All Text Feature Columns ---")

# Count NaNs for each text feature column
print("NaN Counts per Text Feature Column ---")
print(df_trn[text_feature_cols].isnull().sum())

# Analyze rows with NaNs in 'body' (most critical)
print("Analysis for 'body' column NaNs ---")
body_nan_rows = df_trn[df_trn['body'].isnull()]
if not body_nan_rows.empty:
    print(f"Number of rows with NaN in 'body': {len(body_nan_rows)}")
    print("Rule violation distribution for rows with NaN in 'body':")
    print(body_nan_rows['rule_violation'].value_counts(normalize=True))
else:
    print("No NaN values found in 'body' column.")

# Analyze rows with NaNs in 'rule'
print("Analysis for 'rule' column NaNs ---")
rule_nan_rows = df_trn[df_trn['rule'].isnull()]
if not rule_nan_rows.empty:
    print(f"Number of rows with NaN in 'rule': {len(rule_nan_rows)}")
    print("Rule violation distribution for rows with NaN in 'rule':")
    print(rule_nan_rows['rule_violation'].value_counts(normalize=True))
else:
    print("No NaN values found in 'rule' column.")

# Analyze rows with NaNs in 'subreddit'
print("Analysis for 'subreddit' column NaNs ---")
subreddit_nan_rows = df_trn[df_trn['subreddit'].isnull()]
if not subreddit_nan_rows.empty:
    print(f"Number of rows with NaN in 'subreddit': {len(subreddit_nan_rows)}")
    print("Rule violation distribution for rows with NaN in 'subreddit':")
    print(subreddit_nan_rows['rule_violation'].value_counts(normalize=True))
else:
    print("No NaN values found in any example column.")

# Overall rule_violation distribution (for comparison)
print(f"Overall rule_violation distribution: ---")
print(df_trn['rule_violation'].value_counts(normalize=True))

--- Comprehensive NaN Inspection for All Text Feature Columns ---
NaN Counts per Text Feature Column ---
body                  0
rule                  0
subreddit             0
positive_example_1    0
positive_example_2    0
negative_example_1    0
negative_example_2    0
dtype: int64
Analysis for 'body' column NaNs ---
No NaN values found in 'body' column.
Analysis for 'rule' column NaNs ---
No NaN values found in 'rule' column.
Analysis for 'subreddit' column NaNs ---
No NaN values found in any example column.
Overall rule_violation distribution: ---
rule_violation
0    0.55
1    0.45
Name: proportion, dtype: float64


In [5]:
N_EPOCHS = 2
k_folds = 5
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

# -----------------------------
# Dataset
# -----------------------------
class MultiInputDataset(Dataset):
    def __init__(self, df, tokenizer, max_len=128, is_test=False): # Renamed df_trn to df for generality
        self.df = df
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.is_test = is_test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        item = {}
        for field in ["text_to_classify", "rule", "subreddit"]:
            encoded = self.tokenizer(
                row[field],
                truncation=True,
                padding='max_length',
                max_length=self.max_len,
                return_tensors="pt"
            )

            for key in encoded:
                item[f"{field}_{key}"] = encoded[key].squeeze(0)
        if not self.is_test:
          item["label"] = torch.tensor(row["rule_violation"], dtype=torch.float32)
        return item

# -----------------------------
# Model
# -----------------------------
class MultiInputBERT(nn.Module):
    def __init__(self, model_name=MODEL_PATH, embedding_dim=256): # Added embedding_dim
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(0.3)
        
        # Projection head for contrastive learning
        self.projection_head = nn.Sequential(
            nn.Linear(768 * 3, 512), # Input is concatenated CLS tokens
            nn.ReLU(),
            nn.Linear(512, embedding_dim) # Output embedding for contrastive loss
        )
        
        # Original classifier for downstream task (can be re-attached or fine-tuned)
        self.classifier = nn.Sequential(
            nn.Linear(embedding_dim, 1) # Classifier takes the projected embedding
        )

    def forward(self, inputs):
        cls_outputs = []
        for field in ["text_to_classify", "rule", "subreddit"]:
            out = self.bert(
                input_ids=inputs[f"{field}_input_ids"],
                attention_mask=inputs[f"{field}_attention_mask"]
            )
            cls_outputs.append(out.last_hidden_state[:, 0])  # CLS token
        
        x = torch.cat(cls_outputs, dim=1)
        x = self.dropout(x)
        
        # Get embeddings from projection head
        embeddings = self.projection_head(x)
        
        # Get logits from classifier (for downstream task)
        logits = self.classifier(embeddings)
        
        return logits, embeddings # Return both

In [6]:
# -----------------------------
# Training and Evaluation
# -----------------------------
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = get_device()
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

oof_preds = np.zeros(len(df_trn))
test_preds_folds = [] # This is correct

for fold, (train_idx_orig, val_idx_orig) in enumerate(skf.split(df_trn, df_trn["rule_violation"])):
    print(f"----- Fold {fold+1} -----")
    # train_df = df_trn.iloc[train_idx].reset_index(drop=True)

    # Create original train and validation DataFrames for this fold
    # These are the original body, rules, subreddits, and examples
    fold_train_df_orig = df_trn.iloc[train_idx_orig].reset_index(drop=True)
    fold_val_df_orig = df_trn.iloc[val_idx_orig].reset_index(drop=True)

    # Expand the tarining data for this fold
    expanded_train_data = []
    for idx, row in fold_train_df_orig.iterrows():
        rule_text = getText(row['rule'])
        subreddit_text = getText(row['subreddit'])
        # Add original body as a training sample
        expanded_train_data.append({
            'text_to_classify': getText(row['body']),
            'rule': rule_text,
            'subreddit': subreddit_text,
            'rule_violation': row['rule_violation']
        })
        # Add positive examples
        expanded_train_data.append({
            'text_to_classify': getText(row['positive_example_1']),
            'rule': rule_text,
            'subreddit': subreddit_text,
            'rule_violation': 1.0
        })
        expanded_train_data.append({
            'text_to_classify': getText(row['positive_example_2']),
            'rule': rule_text,
            'subreddit': subreddit_text,
            'rule_violation': 1.0
        })
        # Add negative examples
        expanded_train_data.append({
            'text_to_classify': getText(row['negative_example_1']),
            'rule': rule_text,
            'subreddit': subreddit_text,
            'rule_violation': 0.0
        })
        expanded_train_data.append({
            'text_to_classify': getText(row['negative_example_2']),
            'rule': rule_text,
            'subreddit': subreddit_text,
            'rule_violation': 0.0
        })

    # Create the expanded training DataFrame for this fold
    fold_train_df_expanded = pd.DataFrame(expanded_train_data)
    fold_train_df_expanded = fold_train_df_expanded[fold_train_df_expanded['text_to_classify'] != ''].reset_index(drop=True)

    # 3. Prepare the VALIDATION data for this fold (using original body)
    # Map 'body' to 'text_to_classify' for the validation set
    fold_val_df_for_model = fold_val_df_orig.copy()
    fold_val_df_for_model['text_to_classify'] = fold_val_df_for_model['body']

    # 4. Create Datasets and DataLoaders
    train_dataset = MultiInputDataset(fold_train_df_expanded, tokenizer) # Train on expanded data
    val_dataset = MultiInputDataset(fold_val_df_for_model, tokenizer) # Validate on original body

    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=8) # Use a consistent batch size

    test_loader = DataLoader(MultiInputDataset(df_tst, tokenizer, is_test=True), batch_size=8, shuffle=False)


    model = MultiInputBERT().to(device)
    optimizer = AdamW(model.parameters(), lr=1e-6)
    
    # Change criterion to SupervisedContrastiveLoss
    criterion = SupConLoss(temperature=0.07) # Common temperature value
    
    num_training_steps_per_fold = len(train_loader) * N_EPOCHS
    num_warmup_steps_per_fold = int(num_training_steps_per_fold * 0.05)

    # Initialize the scheduler
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps_per_fold,
        num_training_steps=num_training_steps_per_fold
    )

    # Training Loop for this fold
    best_auc = -1.0 # Track best AUC for this fold
    best_model_state = None # To save the best model for this fold

    for epoch in range(N_EPOCHS):
        model.train()
        total_loss = 0
        for batch in tqdm(train_loader, desc=f"Training Epoch {epoch+1}"):
            inputs = {k: v.to(device) for k, v in batch.items() if k != "label"}
            labels = batch["label"].to(device)

            optimizer.zero_grad()
            
            # Get both logits and embeddings
            logits, embeddings = model(inputs) 
            
            # Calculate contrastive loss
            loss = criterion(embeddings, labels) 
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            scheduler.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1} Loss: {total_loss / len(train_loader):.4f}")

        # Eval (for monitoring, still using AUC on classification task)
        model.eval()
        preds_raw, labels_all = [], []
        with torch.no_grad():
            for batch in tqdm(val_loader, desc=f"Validating Epoch {epoch+1}"):
                inputs = {k: v.to(device) for k, v in batch.items() if k != "label"}
                labels = batch["label"].to(device)
                
                # Get logits for evaluation
                logits, _ = model(inputs) 
                logits = logits.squeeze(-1) # Squeeze to [batch_size]

                probs = torch.sigmoid(logits).detach().cpu().tolist()
                preds_raw.extend(probs)
                labels_all.extend(labels.cpu().tolist())

            # Hard labels (for classification report, optional)
            preds = [int(p > 0.5) for p in preds_raw]

        # Print metrics
        print(classification_report(labels_all, preds, digits=3, zero_division=0))

        curr_auc = roc_auc_score(labels_all, preds_raw)
        print(f"AUC Score: {curr_auc:.4f}")

        # Save the best model for this fold based on validation AUC
        if curr_auc > best_auc:
            best_auc = curr_auc
            best_model_state = model.state_dict() # Save model weights
            print(f"  -> New best Val AUC for Fold {fold+1}: {best_auc:.4f}")

    # 6. Load best model state for this fold
    model.load_state_dict(best_model_state) # Use best_model_state
    print(f"Fold {fold+1} Best Val AUC: {best_auc:.4f}")

    # Make OOF predictions for this fold's validation set
    model.eval()
    fold_val_preds_list = []
    fold_val_true_list = []
    with torch.no_grad():
        for batch in tqdm(val_loader, desc=f"Fold {fold + 1} OOF Prediction"):
            inputs = {k: v.to(device) for k, v in batch.items() if k != "label"}
            labels = batch["label"].to(device)
            logits, _ = model(inputs)
            logits = logits.squeeze(-1)  # Squeeze to [batch_size]
            probs = torch.sigmoid(logits).detach().cpu().tolist()
            fold_val_preds_list.extend(probs)
            fold_val_true_list.extend(labels.cpu().tolist())

    # Sanity check: Calculate AUC for this fold's OOF predictions
    oof_fold_auc_check = roc_auc_score(fold_val_true_list, fold_val_preds_list)
    print(f"Fold {fold + 1} OOF AUC Check: {oof_fold_auc_check:.4f} (This is the true validation AUC for this fold)")

    # Assign predictions to the correct indices in the global oof_preds array
    oof_preds[val_idx_orig] = np.array(fold_val_preds_list)  # Use val_idx_orig from kf.split
    # Make predictions on the TEST set using this fold's best model
    test_fold_preds = []
    with torch.no_grad():
        for batch in tqdm(test_loader, desc=f"Fold {fold + 1} Test Prediction"):
            inputs = {k: v.to(device) for k, v in batch.items()}
            logits, _ = model(inputs)
            logits = logits.squeeze(-1)  # Squeeze to [batch_size]
            probs = torch.sigmoid(logits).detach().cpu().tolist()
            test_fold_preds.extend(probs)

    test_preds_folds.append(test_fold_preds)  # Store test predictions from this fold

# -----------------------------
# Final Calculation and Submission
# -----------------------------
overall_oof_auc = roc_auc_score(df_trn["rule_violation"], oof_preds)
print(f"Overall {k_folds}-Fold OOF AUC: {overall_oof_auc:.4f} ---")

# Average test predictions across all folds
final_test_predictions = np.mean(test_preds_folds, axis=0)

# Create final submission file
submission = pd.DataFrame({
    "row_id": df_tst["row_id"],
    "rule_violation": final_test_predictions
})
submission.to_csv("submission.csv", index=False) # Save with a distinct name
print("K-Fold multi-input submission.csv created successfully!")
print(submission.head(10))

Using DirectML GPU: privateuseone:0
----- Fold 1 -----


Some weights of XLMRobertaModel were not initialized from the model checkpoint at C:/Users/satra/Downloads/xlm_roberta_base_offline and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return forward_call(*args, **kwargs)
  torch._foreach_lerp_(device_exp_avgs, device_grads, 1 - beta1)
Training Epoch 1: 100%|██████████| 10/10 [02:47<00:00, 16.75s/it]


Epoch 1 Loss: 2.9567


Validating Epoch 1: 100%|██████████| 1/1 [00:03<00:00,  3.21s/it]


              precision    recall  f1-score   support

         0.0      0.000     0.000     0.000         2
         1.0      0.500     1.000     0.667         2

    accuracy                          0.500         4
   macro avg      0.250     0.500     0.333         4
weighted avg      0.250     0.500     0.333         4

AUC Score: 0.5000
  -> New best Val AUC for Fold 1: 0.5000


  return forward_call(*args, **kwargs)
Training Epoch 2: 100%|██████████| 10/10 [02:48<00:00, 16.86s/it]


Epoch 2 Loss: 3.1502


Validating Epoch 2: 100%|██████████| 1/1 [00:03<00:00,  3.36s/it]


              precision    recall  f1-score   support

         0.0      0.000     0.000     0.000         2
         1.0      0.500     1.000     0.667         2

    accuracy                          0.500         4
   macro avg      0.250     0.500     0.333         4
weighted avg      0.250     0.500     0.333         4

AUC Score: 0.0000
Fold 1 Best Val AUC: 0.5000


  return forward_call(*args, **kwargs)
Fold 1 OOF Prediction: 100%|██████████| 1/1 [00:03<00:00,  3.32s/it]


Fold 1 OOF AUC Check: 0.0000 (This is the true validation AUC for this fold)


  return forward_call(*args, **kwargs)
Fold 1 Test Prediction: 100%|██████████| 2/2 [00:07<00:00,  3.71s/it]


----- Fold 2 -----


Some weights of XLMRobertaModel were not initialized from the model checkpoint at C:/Users/satra/Downloads/xlm_roberta_base_offline and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return forward_call(*args, **kwargs)
Training Epoch 1: 100%|██████████| 10/10 [03:08<00:00, 18.83s/it]


Epoch 1 Loss: 3.2391


Validating Epoch 1: 100%|██████████| 1/1 [00:03<00:00,  3.18s/it]


              precision    recall  f1-score   support

         0.0      0.000     0.000     0.000         2
         1.0      0.500     1.000     0.667         2

    accuracy                          0.500         4
   macro avg      0.250     0.500     0.333         4
weighted avg      0.250     0.500     0.333         4

AUC Score: 0.5000
  -> New best Val AUC for Fold 2: 0.5000


  return forward_call(*args, **kwargs)
Training Epoch 2: 100%|██████████| 10/10 [03:21<00:00, 20.12s/it]


Epoch 2 Loss: 2.9362


Validating Epoch 2: 100%|██████████| 1/1 [00:03<00:00,  3.22s/it]


              precision    recall  f1-score   support

         0.0      0.000     0.000     0.000         2
         1.0      0.500     1.000     0.667         2

    accuracy                          0.500         4
   macro avg      0.250     0.500     0.333         4
weighted avg      0.250     0.500     0.333         4

AUC Score: 0.5000
Fold 2 Best Val AUC: 0.5000


  return forward_call(*args, **kwargs)
Fold 2 OOF Prediction: 100%|██████████| 1/1 [00:03<00:00,  3.17s/it]


Fold 2 OOF AUC Check: 0.5000 (This is the true validation AUC for this fold)


  return forward_call(*args, **kwargs)
Fold 2 Test Prediction: 100%|██████████| 2/2 [00:07<00:00,  3.82s/it]


----- Fold 3 -----


Some weights of XLMRobertaModel were not initialized from the model checkpoint at C:/Users/satra/Downloads/xlm_roberta_base_offline and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return forward_call(*args, **kwargs)
Training Epoch 1: 100%|██████████| 10/10 [02:55<00:00, 17.57s/it]


Epoch 1 Loss: 3.2165


Validating Epoch 1: 100%|██████████| 1/1 [00:03<00:00,  3.15s/it]


              precision    recall  f1-score   support

         0.0      0.000     0.000     0.000         2
         1.0      0.500     1.000     0.667         2

    accuracy                          0.500         4
   macro avg      0.250     0.500     0.333         4
weighted avg      0.250     0.500     0.333         4

AUC Score: 0.5000
  -> New best Val AUC for Fold 3: 0.5000


  return forward_call(*args, **kwargs)
Training Epoch 2: 100%|██████████| 10/10 [03:05<00:00, 18.54s/it]


Epoch 2 Loss: 3.3106


Validating Epoch 2: 100%|██████████| 1/1 [00:03<00:00,  3.29s/it]


              precision    recall  f1-score   support

         0.0      0.000     0.000     0.000         2
         1.0      0.500     1.000     0.667         2

    accuracy                          0.500         4
   macro avg      0.250     0.500     0.333         4
weighted avg      0.250     0.500     0.333         4

AUC Score: 0.5000
Fold 3 Best Val AUC: 0.5000


  return forward_call(*args, **kwargs)
Fold 3 OOF Prediction: 100%|██████████| 1/1 [00:03<00:00,  3.16s/it]


Fold 3 OOF AUC Check: 0.5000 (This is the true validation AUC for this fold)


  return forward_call(*args, **kwargs)
Fold 3 Test Prediction: 100%|██████████| 2/2 [00:07<00:00,  3.98s/it]


----- Fold 4 -----


Some weights of XLMRobertaModel were not initialized from the model checkpoint at C:/Users/satra/Downloads/xlm_roberta_base_offline and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  return forward_call(*args, **kwargs)
Training Epoch 1: 100%|██████████| 10/10 [02:42<00:00, 16.21s/it]


Epoch 1 Loss: 3.1020


Validating Epoch 1: 100%|██████████| 1/1 [00:03<00:00,  3.23s/it]


              precision    recall  f1-score   support

         0.0      1.000     0.500     0.667         2
         1.0      0.667     1.000     0.800         2

    accuracy                          0.750         4
   macro avg      0.833     0.750     0.733         4
weighted avg      0.833     0.750     0.733         4

AUC Score: 0.5000
  -> New best Val AUC for Fold 4: 0.5000


  return forward_call(*args, **kwargs)
Training Epoch 2: 100%|██████████| 10/10 [02:41<00:00, 16.14s/it]


Epoch 2 Loss: 3.1916


Validating Epoch 2: 100%|██████████| 1/1 [00:03<00:00,  3.08s/it]


              precision    recall  f1-score   support

         0.0      1.000     0.500     0.667         2
         1.0      0.667     1.000     0.800         2

    accuracy                          0.750         4
   macro avg      0.833     0.750     0.733         4
weighted avg      0.833     0.750     0.733         4

AUC Score: 0.5000
Fold 4 Best Val AUC: 0.5000


  return forward_call(*args, **kwargs)
Fold 4 OOF Prediction: 100%|██████████| 1/1 [00:03<00:00,  3.09s/it]


Fold 4 OOF AUC Check: 0.5000 (This is the true validation AUC for this fold)


  return forward_call(*args, **kwargs)
Fold 4 Test Prediction: 100%|██████████| 2/2 [00:07<00:00,  3.76s/it]
Some weights of XLMRobertaModel were not initialized from the model checkpoint at C:/Users/satra/Downloads/xlm_roberta_base_offline and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


----- Fold 5 -----


  return forward_call(*args, **kwargs)
Training Epoch 1: 100%|██████████| 10/10 [02:39<00:00, 15.97s/it]


Epoch 1 Loss: 2.6613


Validating Epoch 1: 100%|██████████| 1/1 [00:03<00:00,  3.20s/it]


              precision    recall  f1-score   support

         0.0      0.000     0.000     0.000         3
         1.0      0.250     1.000     0.400         1

    accuracy                          0.250         4
   macro avg      0.125     0.500     0.200         4
weighted avg      0.062     0.250     0.100         4

AUC Score: 1.0000
  -> New best Val AUC for Fold 5: 1.0000


  return forward_call(*args, **kwargs)
Training Epoch 2: 100%|██████████| 10/10 [03:00<00:00, 18.10s/it]


Epoch 2 Loss: 3.3046


Validating Epoch 2: 100%|██████████| 1/1 [00:03<00:00,  3.18s/it]


              precision    recall  f1-score   support

         0.0      0.000     0.000     0.000         3
         1.0      0.250     1.000     0.400         1

    accuracy                          0.250         4
   macro avg      0.125     0.500     0.200         4
weighted avg      0.062     0.250     0.100         4

AUC Score: 1.0000
Fold 5 Best Val AUC: 1.0000


  return forward_call(*args, **kwargs)
Fold 5 OOF Prediction: 100%|██████████| 1/1 [00:03<00:00,  3.16s/it]


Fold 5 OOF AUC Check: 1.0000 (This is the true validation AUC for this fold)


  return forward_call(*args, **kwargs)
Fold 5 Test Prediction: 100%|██████████| 2/2 [00:07<00:00,  3.89s/it]

Overall 5-Fold OOF AUC: 0.4141 ---
K-Fold multi-input submission.csv created successfully!
   row_id  rule_violation
0    2029        0.536665
1    2030        0.533946
2    2031        0.536456
3    2032        0.536439
4    2033        0.536737
5    2034        0.536973
6    2035        0.536562
7    2036        0.536686
8    2037        0.536687
9    2038        0.536781



