# Speech-based Grammar Score Prediction

## Importing Libraries

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchaudio
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
import transformers
from transformers import AutoFeatureExtractor, AutoModel, Wav2Vec2FeatureExtractor
from tqdm import tqdm
import warnings
import random
from torch.cuda.amp import autocast, GradScaler
import gc

# Suppress warnings
warnings.filterwarnings('ignore')

In [None]:
# Mount Google Drive (for Google Colab)
try:
    from google.colab import drive
    drive.mount('/content/drive')
    IN_COLAB = True
except:
    IN_COLAB = False
    print("Not running in Colab")

# Set random seed for reproducibility
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Configuration parameters
CONFIG = {
    # Update directories to point to your dataset location
    "train_audio_dir": "/content/drive/MyDrive/dataset/audios_train/" if IN_COLAB else "./audios_train/",
    "test_audio_dir": "/content/drive/MyDrive/dataset/audios_test/" if IN_COLAB else "./audios_test/",
    "train_csv": "/content/drive/MyDrive/dataset/train.csv" if IN_COLAB else "./train.csv",
    "test_csv": "/content/drive/MyDrive/dataset/test.csv" if IN_COLAB else "./test.csv",
    "sample_submission": "/content/drive/MyDrive/dataset/sample_submission.csv" if IN_COLAB else "./sample_submission.csv",
    "output_dir": "/content/drive/MyDrive/dataset/output" if IN_COLAB else "./output",
    "model_save_path": "/content/drive/MyDrive/dataset/output/grammar_model.pt" if IN_COLAB else "./output/grammar_model.pt",
    "submission_path": "/content/drive/MyDrive/dataset/output/submission.csv" if IN_COLAB else "./output/submission.csv",

    # Model paths for ensemble
    "model_save_paths": [
        "/content/drive/MyDrive/dataset/output/model_fold_1.pt" if IN_COLAB else "./output/model_fold_1.pt",
        "/content/drive/MyDrive/dataset/output/model_fold_2.pt" if IN_COLAB else "./output/model_fold_2.pt",
        "/content/drive/MyDrive/dataset/output/model_fold_3.pt" if IN_COLAB else "./output/model_fold_3.pt",
    ],

    # Audio processing parameters
    "target_sample_rate": 16000,
    "max_audio_length": 10,  # 10 seconds

    # Model parameters - using WavLM base instead of plus for stability
    "base_model": "microsoft/wavlm-base",

    # Training parameters
    "batch_size": 2,  # Small batch size
    "accumulation_steps": 4,
    "epochs": 10,
    "learning_rate": 2e-5,
    "weight_decay": 0.01,
    "validation_size": 0.2,
    "use_augmentation": True,
    "use_mixed_precision": True,

    # K-fold parameters
    "n_folds": 3,
    "ensemble_weights": [0.4, 0.3, 0.3],
}

# Create output directory if it doesn't exist
os.makedirs(CONFIG["output_dir"], exist_ok=True)

# 1. Audio Processing

In [None]:
def load_and_process_audio(file_path, target_sr=16000, max_len=10, augment=False):
    """Load and preprocess audio file"""
    try:
        # Load audio with torchaudio
        waveform, sample_rate = torchaudio.load(file_path)

        # Resample if necessary
        if sample_rate != target_sr:
            resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sr)
            waveform = resampler(waveform)

        # Convert to mono if stereo
        if waveform.shape[0] > 1:
            waveform = torch.mean(waveform, dim=0, keepdim=True)

        # Normalize audio
        peak = torch.abs(waveform).max()
        if peak > 0:
            waveform = waveform / peak

        # Apply simple augmentation
        if augment:
            # Add noise
            if random.random() < 0.3:
                noise_level = random.uniform(0.001, 0.005)
                noise = torch.randn_like(waveform) * noise_level
                waveform = waveform + noise

            # Time shift
            if random.random() < 0.3:
                shift_samples = int(random.uniform(-0.1, 0.1) * waveform.shape[1])
                if shift_samples > 0:
                    waveform = torch.cat([torch.zeros(1, shift_samples), waveform[:, :-shift_samples]], dim=1)
                elif shift_samples < 0:
                    shift_samples = abs(shift_samples)
                    waveform = torch.cat([waveform[:, shift_samples:], torch.zeros(1, shift_samples)], dim=1)

        # Fix length - maximum 10 seconds (160,000 samples)
        max_samples = target_sr * max_len
        if waveform.shape[1] > max_samples:
            # Take random segment for training, or middle segment for inference
            if augment:
                start = random.randint(0, waveform.shape[1] - max_samples)
            else:
                start = (waveform.shape[1] - max_samples) // 2
            waveform = waveform[:, start:start + max_samples]
        elif waveform.shape[1] < max_samples:
            # Pad with zeros to maximum length
            padding = torch.zeros(1, max_samples - waveform.shape[1])
            waveform = torch.cat([waveform, padding], dim=1)

        return waveform

    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        # Return silent audio as fallback
        return torch.zeros(1, target_sr * max_len)


# 2. Dataset Class

In [None]:
class AudioDataset(Dataset):
    def __init__(self, csv_data, audio_dir, feature_extractor, is_test=False, use_augmentation=False):
        """Dataset for grammar scoring from audio"""
        # Load CSV data
        if isinstance(csv_data, str):
            self.data = pd.read_csv(csv_data)
        else:
            self.data = csv_data.copy()

        self.audio_dir = audio_dir
        self.feature_extractor = feature_extractor
        self.is_test = is_test
        self.use_augmentation = use_augmentation
        self.target_sr = CONFIG["target_sample_rate"]
        self.max_length = CONFIG["max_audio_length"]

        print(f"Dataset initialized with {len(self.data)} samples. Augmentation: {use_augmentation}")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Get audio path
        audio_path = os.path.join(self.audio_dir, self.data.iloc[idx]['filename'])

        # Load and process audio
        waveform = load_and_process_audio(
            audio_path,
            self.target_sr,
            self.max_length,
            self.use_augmentation
        )

        # Process with feature extractor
        inputs = self.feature_extractor(
            waveform.squeeze().numpy(),
            sampling_rate=self.target_sr,
            return_tensors="pt"
        )

        # Get features
        input_values = inputs.input_values.squeeze()

        # Build result dictionary
        if not self.is_test:
            label = torch.tensor(self.data.iloc[idx]['label'], dtype=torch.float)
            return {
                'input_values': input_values,
                'labels': label,
                'idx': idx
            }
        else:
            return {
                'input_values': input_values,
                'filename': self.data.iloc[idx]['filename'],
                'idx': idx
            }




# 3. Custom Collate Function

In [None]:
def collate_fn(batch):
    """Custom collate function for batching"""
    # Get max length in batch
    max_len = max(x['input_values'].shape[0] for x in batch)

    # Prepare tensors for batch
    batch_size = len(batch)
    input_values = torch.zeros(batch_size, max_len)
    attention_mask = torch.zeros(batch_size, max_len)

    for i, item in enumerate(batch):
        # Get input values
        input_val = item['input_values']
        length = input_val.shape[0]

        # Add to batch tensors
        input_values[i, :length] = input_val
        attention_mask[i, :length] = 1

    # Create batch dictionary
    batch_dict = {
        'input_values': input_values,
        'attention_mask': attention_mask,
        'idx': [x['idx'] for x in batch]
    }

    # Add labels for training or filenames for testing
    if 'labels' in batch[0]:
        batch_dict['labels'] = torch.stack([x['labels'] for x in batch])
    else:
        batch_dict['filenames'] = [x['filename'] for x in batch]

    return batch_dict

# 4. Model Architecture

In [None]:

class GrammarScoreModel(nn.Module):
    def __init__(self, base_model_name=CONFIG["base_model"]):
        super(GrammarScoreModel, self).__init__()

        # Load base model
        self.base_model = AutoModel.from_pretrained(base_model_name)

        # Freeze early layers
        self._freeze_layers()

        # Get hidden size
        hidden_size = self.base_model.config.hidden_size

        # Regression head
        self.regression_head = nn.Sequential(
            nn.Linear(hidden_size, 256),
            nn.GELU(),
            nn.Dropout(0.2),

            nn.Linear(256, 64),
            nn.GELU(),
            nn.Dropout(0.2),

            nn.Linear(64, 1)
        )

        # Initialize weights properly
        self._init_weights()

    def _freeze_layers(self):
        """Freeze early layers of base model"""
        # Keep only the last 3 transformer layers trainable
        trainable_layers = ['encoder.layers.9', 'encoder.layers.10', 'encoder.layers.11']

        for name, param in self.base_model.named_parameters():
            param.requires_grad = any(layer in name for layer in trainable_layers)

    def _init_weights(self):
        """Initialize weights for better convergence"""
        for m in self.regression_head.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

    def forward(self, input_values, attention_mask=None):
        """Forward pass"""
        # Pass through base model
        outputs = self.base_model(
            input_values=input_values,
            attention_mask=attention_mask
        )

        # Get hidden states
        hidden_states = outputs.last_hidden_state

        # Apply pooling
        if attention_mask is not None:
            # IMPORTANT: Make sure hidden_states and attention_mask have the same sequence length
            seq_len = hidden_states.shape[1]
            mask_len = attention_mask.shape[1]

            if mask_len != seq_len:
                # If the attention mask is longer than hidden states, truncate it
                if mask_len > seq_len:
                    attention_mask = attention_mask[:, :seq_len]
                # If the attention mask is shorter than hidden states, pad it with zeros
                else:
                    padding = torch.zeros(
                        attention_mask.shape[0],
                        seq_len - mask_len,
                        device=attention_mask.device,
                        dtype=attention_mask.dtype
                    )
                    attention_mask = torch.cat([attention_mask, padding], dim=1)

            # Now expand and apply the mask
            expanded_mask = attention_mask.unsqueeze(-1).float()
            hidden_states = hidden_states * expanded_mask
            pooled = hidden_states.sum(dim=1) / expanded_mask.sum(dim=1).clamp(min=1e-9)
        else:
            # Simple mean pooling
            pooled = hidden_states.mean(dim=1)

        # Get score through regression head
        score = self.regression_head(pooled)

        # Scale to range [1, 5]
        score = torch.sigmoid(score) * 4.0 + 1.0

        return score

# 5. Training Function for a Single Fold

In [None]:
def train_fold(fold, train_data, val_data, feature_extractor, model_path, device):
    """Train a model for a single fold"""
    print(f"\n{'='*50}\nTraining Fold {fold+1}\n{'='*50}")

    # Create datasets
    train_dataset = AudioDataset(
        train_data,
        CONFIG["train_audio_dir"],
        feature_extractor,
        use_augmentation=CONFIG["use_augmentation"]
    )

    val_dataset = AudioDataset(
        val_data,
        CONFIG["train_audio_dir"],
        feature_extractor
    )

    # Create dataloaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=CONFIG["batch_size"],
        shuffle=True,
        collate_fn=collate_fn,
        num_workers=0  # Using 0 to avoid multiprocessing issues
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=CONFIG["batch_size"],
        shuffle=False,
        collate_fn=collate_fn,
        num_workers=0
    )

    # Initialize model
    model = GrammarScoreModel().to(device)

    # Print model info
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Model has {total_params:,} total parameters, with {trainable_params:,} trainable")

    # Optimizer
    optimizer = optim.AdamW(
        model.parameters(),
        lr=CONFIG["learning_rate"],
        weight_decay=CONFIG["weight_decay"]
    )

    # Scheduler - cosine annealing
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer,
        T_0=CONFIG["epochs"],
        eta_min=CONFIG["learning_rate"] / 10
    )

    # Loss function
    criterion = nn.MSELoss()

    # Initialize trackers
    best_val_corr = -1.0
    train_losses = []
    val_losses = []
    val_correlations = []
    scaler = GradScaler() if CONFIG["use_mixed_precision"] else None

    # Training loop
    for epoch in range(CONFIG["epochs"]):
        # Training
        model.train()
        train_loss = 0
        optimizer.zero_grad()

        progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch+1}/{CONFIG['epochs']} [Train]")

        for i, batch in progress_bar:
            # Get batch data
            input_values = batch['input_values'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device).view(-1, 1)

            # Forward pass with mixed precision
            if CONFIG["use_mixed_precision"]:
                with autocast():
                    outputs = model(input_values, attention_mask)
                    loss = criterion(outputs, labels) / CONFIG["accumulation_steps"]

                # Backward pass with scaling
                scaler.scale(loss).backward()

                # Update if accumulation is complete
                if (i + 1) % CONFIG["accumulation_steps"] == 0 or (i + 1) == len(train_loader):
                    scaler.unscale_(optimizer)
                    # Clip gradients to avoid explosion
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                    scaler.step(optimizer)
                    scaler.update()
                    optimizer.zero_grad()
            else:
                # Standard forward pass
                outputs = model(input_values, attention_mask)
                loss = criterion(outputs, labels) / CONFIG["accumulation_steps"]
                loss.backward()

                # Update if accumulation is complete
                if (i + 1) % CONFIG["accumulation_steps"] == 0 or (i + 1) == len(train_loader):
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                    optimizer.step()
                    optimizer.zero_grad()

            # Update metrics
            train_loss += loss.item() * CONFIG["accumulation_steps"]

            # Update progress bar
            progress_bar.set_postfix({'loss': loss.item() * CONFIG["accumulation_steps"]})

        # Step scheduler
        scheduler.step()

        # Calculate average training loss
        avg_train_loss = train_loss / len(train_loader)
        train_losses.append(avg_train_loss)

        # Validation
        model.eval()
        val_loss = 0
        all_preds = []
        all_labels = []

        with torch.no_grad():
            for batch in tqdm(val_loader, desc="Validating"):
                # Get batch data
                input_values = batch['input_values'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device).view(-1, 1)

                # Forward pass
                outputs = model(input_values, attention_mask)
                loss = criterion(outputs, labels)

                # Update metrics
                val_loss += loss.item()
                all_preds.extend(outputs.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        # Calculate validation metrics
        avg_val_loss = val_loss / len(val_loader)
        val_losses.append(avg_val_loss)

        # Calculate Pearson correlation
        all_preds = np.array(all_preds).flatten()
        all_labels = np.array(all_labels).flatten()
        val_corr, _ = pearsonr(all_preds, all_labels)
        val_correlations.append(val_corr)

        # Print epoch results
        print(f"Epoch {epoch+1}/{CONFIG['epochs']}, Train Loss: {avg_train_loss:.4f}, "
              f"Val Loss: {avg_val_loss:.4f}, Val Pearson: {val_corr:.4f}")

        # Save best model
        if val_corr > best_val_corr:
            best_val_corr = val_corr
            torch.save(model.state_dict(), model_path)
            print(f"✓ Saved best model with validation correlation: {val_corr:.4f}")

        # Clear GPU memory
        torch.cuda.empty_cache()
        gc.collect()

    # Plot training curves
    plt.figure(figsize=(15, 5))
    plt.subplot(1, 2, 1)
    plt.plot(range(1, len(train_losses) + 1), train_losses, 'b-', label='Train Loss')
    plt.plot(range(1, len(val_losses) + 1), val_losses, 'r-', label='Val Loss')
    plt.title(f'Fold {fold+1} - Loss Curves')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True, alpha=0.3)

    plt.subplot(1, 2, 2)
    plt.plot(range(1, len(val_correlations) + 1), val_correlations, 'g-')
    plt.title(f'Fold {fold+1} - Validation Correlation')
    plt.xlabel('Epoch')
    plt.ylabel('Pearson r')
    plt.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig(os.path.join(CONFIG["output_dir"], f'fold_{fold+1}_curves.png'))
    plt.close()

    return best_val_corr


# 6. Ensemble Prediction Function

In [None]:
def generate_ensemble_predictions(model_paths, test_loader, device):
    """Generate predictions using ensemble of models"""
    # Load models
    models = []
    for path in model_paths:
        if os.path.exists(path):
            model = GrammarScoreModel().to(device)
            model.load_state_dict(torch.load(path))
            model.eval()
            models.append(model)

    if not models:
        raise ValueError("No models found for ensemble prediction")

    # Initialize predictions
    all_predictions = []
    all_filenames = []

    # Generate predictions
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Generating predictions"):
            # Get batch data
            input_values = batch['input_values'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            filenames = batch['filenames']

            # Initialize batch predictions
            batch_preds = []

            # Get predictions from each model
            for model in models:
                outputs = model(input_values, attention_mask)
                batch_preds.append(outputs.cpu().numpy())

            # Average predictions across models
            batch_preds = np.array(batch_preds)  # [n_models, batch_size, 1]
            avg_preds = np.mean(batch_preds, axis=0)

            # Add to results
            all_predictions.extend(avg_preds)
            all_filenames.extend(filenames)

    # Create submission DataFrame
    submission = pd.DataFrame({
        'filename': all_filenames,
        'label': np.array(all_predictions).flatten()
    })

    return submission

# 7. Main Execution

In [2]:
def main():
    # Start the process
    print("Starting Grammar Score Prediction...")

    # Load data
    print("Loading data...")
    train_df = pd.read_csv(CONFIG["train_csv"])
    test_df = pd.read_csv(CONFIG["test_csv"])

    print(f"Training data shape: {train_df.shape}")
    print(f"Test data shape: {test_df.shape}")

    # Print label statistics
    print("\nTraining label distribution:")
    print(train_df['label'].describe())

    # Initialize feature extractor
    print("\nInitializing feature extractor...")
    feature_extractor = AutoFeatureExtractor.from_pretrained(CONFIG["base_model"])

    # Set up K-fold
    kf = KFold(n_splits=CONFIG["n_folds"], shuffle=True, random_state=SEED)

    # Train models for each fold
    fold_scores = []

    # Process each fold
    for fold, (train_idx, val_idx) in enumerate(kf.split(train_df)):
        # Split data
        fold_train = train_df.iloc[train_idx].reset_index(drop=True)
        fold_val = train_df.iloc[val_idx].reset_index(drop=True)

        # Train fold
        fold_score = train_fold(
            fold,
            fold_train,
            fold_val,
            feature_extractor,
            CONFIG["model_save_paths"][fold],
            device
        )

        fold_scores.append(fold_score)

    # Print fold results
    print("\nCross-validation results:")
    for i, score in enumerate(fold_scores):
        print(f"Fold {i+1}: {score:.4f}")
    print(f"Mean validation correlation: {np.mean(fold_scores):.4f}")

    # Create test dataset
    test_dataset = AudioDataset(
        test_df,
        CONFIG["test_audio_dir"],
        feature_extractor,
        is_test=True
    )

    # Create test dataloader
    test_loader = DataLoader(
        test_dataset,
        batch_size=CONFIG["batch_size"],
        shuffle=False,
        collate_fn=collate_fn,
        num_workers=0
    )

    # Generate ensemble predictions
    print("\nGenerating ensemble predictions...")
    submission = generate_ensemble_predictions(
        CONFIG["model_save_paths"][:CONFIG["n_folds"]],
        test_loader,
        device
    )

    # Save submission
    submission.to_csv(CONFIG["submission_path"], index=False)
    print(f"\nSubmission saved to {CONFIG['submission_path']}")

    # Print prediction statistics
    print("\nPrediction statistics:")
    print(submission['label'].describe())
    print("\nDone!")

if __name__ == "__main__":
    main()

Mounted at /content/drive
Using device: cuda
Starting Grammar Score Prediction...
Loading data...
Training data shape: (444, 2)
Test data shape: (195, 1)

Training label distribution:
count    444.000000
mean       3.617117
std        1.114151
min        1.000000
25%        2.500000
50%        3.500000
75%        4.500000
max        5.000000
Name: label, dtype: float64

Initializing feature extractor...


preprocessor_config.json:   0%|          | 0.00/215 [00:00<?, ?B/s]


Training Fold 1
Dataset initialized with 296 samples. Augmentation: True
Dataset initialized with 148 samples. Augmentation: False


config.json:   0%|          | 0.00/2.24k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/378M [00:00<?, ?B/s]

Model has 94,595,313 total parameters, with 21,478,589 trainable


Epoch 1/10 [Train]:   0%|          | 0/148 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/378M [00:00<?, ?B/s]

Epoch 1/10 [Train]: 100%|██████████| 148/148 [03:58<00:00,  1.61s/it, loss=0.37]
Validating: 100%|██████████| 74/74 [01:50<00:00,  1.49s/it]


Epoch 1/10, Train Loss: 1.3287, Val Loss: 1.3120, Val Pearson: 0.1807
✓ Saved best model with validation correlation: 0.1807


Epoch 2/10 [Train]: 100%|██████████| 148/148 [00:44<00:00,  3.35it/s, loss=1.48]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.61it/s]


Epoch 2/10, Train Loss: 1.0461, Val Loss: 1.1231, Val Pearson: 0.5069
✓ Saved best model with validation correlation: 0.5069


Epoch 3/10 [Train]: 100%|██████████| 148/148 [00:44<00:00,  3.36it/s, loss=2.93]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.57it/s]


Epoch 3/10, Train Loss: 0.8860, Val Loss: 0.8572, Val Pearson: 0.6401
✓ Saved best model with validation correlation: 0.6401


Epoch 4/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.38it/s, loss=0.358]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.61it/s]


Epoch 4/10, Train Loss: 0.7306, Val Loss: 0.7644, Val Pearson: 0.6619
✓ Saved best model with validation correlation: 0.6619


Epoch 5/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.38it/s, loss=0.205]
Validating: 100%|██████████| 74/74 [00:15<00:00,  4.64it/s]


Epoch 5/10, Train Loss: 0.7249, Val Loss: 0.7081, Val Pearson: 0.6895
✓ Saved best model with validation correlation: 0.6895


Epoch 6/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.42it/s, loss=1.19]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.55it/s]


Epoch 6/10, Train Loss: 0.6541, Val Loss: 0.6905, Val Pearson: 0.6995
✓ Saved best model with validation correlation: 0.6995


Epoch 7/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.40it/s, loss=0.0963]
Validating: 100%|██████████| 74/74 [00:15<00:00,  4.63it/s]


Epoch 7/10, Train Loss: 0.6052, Val Loss: 0.6762, Val Pearson: 0.7054
✓ Saved best model with validation correlation: 0.7054


Epoch 8/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.37it/s, loss=1.84]
Validating: 100%|██████████| 74/74 [00:15<00:00,  4.65it/s]


Epoch 8/10, Train Loss: 0.6319, Val Loss: 0.6699, Val Pearson: 0.7087
✓ Saved best model with validation correlation: 0.7087


Epoch 9/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.40it/s, loss=0.573]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.52it/s]


Epoch 9/10, Train Loss: 0.6294, Val Loss: 0.6700, Val Pearson: 0.7092
✓ Saved best model with validation correlation: 0.7092


Epoch 10/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.40it/s, loss=0.108]
Validating: 100%|██████████| 74/74 [00:15<00:00,  4.65it/s]


Epoch 10/10, Train Loss: 0.5928, Val Loss: 0.6671, Val Pearson: 0.7103
✓ Saved best model with validation correlation: 0.7103

Training Fold 2
Dataset initialized with 296 samples. Augmentation: True
Dataset initialized with 148 samples. Augmentation: False
Model has 94,595,313 total parameters, with 21,478,589 trainable


Epoch 1/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.41it/s, loss=1.44]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.58it/s]


Epoch 1/10, Train Loss: 1.4656, Val Loss: 1.0639, Val Pearson: 0.4124
✓ Saved best model with validation correlation: 0.4124


Epoch 2/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.39it/s, loss=1.94]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.58it/s]


Epoch 2/10, Train Loss: 1.2099, Val Loss: 0.8526, Val Pearson: 0.5633
✓ Saved best model with validation correlation: 0.5633


Epoch 3/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.38it/s, loss=1.1]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.55it/s]


Epoch 3/10, Train Loss: 0.9748, Val Loss: 0.7317, Val Pearson: 0.6417
✓ Saved best model with validation correlation: 0.6417


Epoch 4/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.43it/s, loss=0.135]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.51it/s]


Epoch 4/10, Train Loss: 0.8344, Val Loss: 0.6525, Val Pearson: 0.6598
✓ Saved best model with validation correlation: 0.6598


Epoch 5/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.42it/s, loss=0.252]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.52it/s]


Epoch 5/10, Train Loss: 0.8042, Val Loss: 0.6740, Val Pearson: 0.6792
✓ Saved best model with validation correlation: 0.6792


Epoch 6/10 [Train]: 100%|██████████| 148/148 [00:44<00:00,  3.35it/s, loss=0.117]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.55it/s]


Epoch 6/10, Train Loss: 0.7951, Val Loss: 0.6262, Val Pearson: 0.6902
✓ Saved best model with validation correlation: 0.6902


Epoch 7/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.42it/s, loss=0.266]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.47it/s]


Epoch 7/10, Train Loss: 0.6574, Val Loss: 0.6409, Val Pearson: 0.6944
✓ Saved best model with validation correlation: 0.6944


Epoch 8/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.40it/s, loss=1.34]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.56it/s]


Epoch 8/10, Train Loss: 0.7101, Val Loss: 0.6598, Val Pearson: 0.6992
✓ Saved best model with validation correlation: 0.6992


Epoch 9/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.37it/s, loss=0.34]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.54it/s]


Epoch 9/10, Train Loss: 0.6795, Val Loss: 0.6563, Val Pearson: 0.6973


Epoch 10/10 [Train]: 100%|██████████| 148/148 [00:42<00:00,  3.45it/s, loss=2.26]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.48it/s]


Epoch 10/10, Train Loss: 0.7255, Val Loss: 0.6462, Val Pearson: 0.6985

Training Fold 3
Dataset initialized with 296 samples. Augmentation: True
Dataset initialized with 148 samples. Augmentation: False
Model has 94,595,313 total parameters, with 21,478,589 trainable


Epoch 1/10 [Train]: 100%|██████████| 148/148 [00:42<00:00,  3.46it/s, loss=1.26]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.53it/s]


Epoch 1/10, Train Loss: 1.3222, Val Loss: 1.1353, Val Pearson: 0.4693
✓ Saved best model with validation correlation: 0.4693


Epoch 2/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.41it/s, loss=1.7]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.51it/s]


Epoch 2/10, Train Loss: 1.0708, Val Loss: 0.9362, Val Pearson: 0.6348
✓ Saved best model with validation correlation: 0.6348


Epoch 3/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.42it/s, loss=0.16]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.48it/s]


Epoch 3/10, Train Loss: 0.8752, Val Loss: 0.7758, Val Pearson: 0.6853
✓ Saved best model with validation correlation: 0.6853


Epoch 4/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.40it/s, loss=1.45]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.51it/s]


Epoch 4/10, Train Loss: 0.8010, Val Loss: 0.6265, Val Pearson: 0.7138
✓ Saved best model with validation correlation: 0.7138


Epoch 5/10 [Train]: 100%|██████████| 148/148 [00:42<00:00,  3.45it/s, loss=3.27]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.42it/s]


Epoch 5/10, Train Loss: 0.8712, Val Loss: 0.5980, Val Pearson: 0.7359
✓ Saved best model with validation correlation: 0.7359


Epoch 6/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.40it/s, loss=0.00215]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.52it/s]


Epoch 6/10, Train Loss: 0.7146, Val Loss: 0.5667, Val Pearson: 0.7478
✓ Saved best model with validation correlation: 0.7478


Epoch 7/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.40it/s, loss=1.79]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.49it/s]


Epoch 7/10, Train Loss: 0.6634, Val Loss: 0.5257, Val Pearson: 0.7619
✓ Saved best model with validation correlation: 0.7619


Epoch 8/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.44it/s, loss=1.87]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.42it/s]


Epoch 8/10, Train Loss: 0.6608, Val Loss: 0.5284, Val Pearson: 0.7627
✓ Saved best model with validation correlation: 0.7627


Epoch 9/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.41it/s, loss=0.423]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.49it/s]


Epoch 9/10, Train Loss: 0.7514, Val Loss: 0.5213, Val Pearson: 0.7668
✓ Saved best model with validation correlation: 0.7668


Epoch 10/10 [Train]: 100%|██████████| 148/148 [00:43<00:00,  3.39it/s, loss=0.0532]
Validating: 100%|██████████| 74/74 [00:16<00:00,  4.49it/s]


Epoch 10/10, Train Loss: 0.6761, Val Loss: 0.5222, Val Pearson: 0.7644

Cross-validation results:
Fold 1: 0.7103
Fold 2: 0.6992
Fold 3: 0.7668
Mean validation correlation: 0.7255
Dataset initialized with 195 samples. Augmentation: False

Generating ensemble predictions...


Generating predictions: 100%|██████████| 98/98 [03:01<00:00,  1.85s/it]



Submission saved to /content/drive/MyDrive/dataset/output/submission.csv

Prediction statistics:
count    195.000000
mean       3.577443
std        0.809760
min        2.382479
25%        2.825603
50%        3.370092
75%        4.371732
max        4.930118
Name: label, dtype: float64

Done!


# For a more comprehensice explanation do checkout the following docs

# The above documentation and code is done by Belo Abhigyan
## For any clarifications do contact on the following :
### Linkedln
### Portfolio