In [8]:
import sys
print(sys.executable)  # Shows current Python path
!python --version
!conda --version  # If conda is installed

/anaconda/envs/jupyter_env/bin/python3.10
Python 3.10.11


conda 24.7.1


In [9]:
# Create a new conda environment (if needed)
%conda create -n audio_ml python=3.9 -y

# Activate it (may not work in all notebook environments)
%conda activate audio_ml

# Install PyTorch and dependencies
%conda install pytorch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 cudatoolkit=11.7 -c pytorch -y
%conda install numpy==1.23.5 librosa==0.10.0 scikit-learn==1.2.2 pandas==1.5.3 -y

Channels:
 - defaults
Platform: linux-64
Collecting package metadata (repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /anaconda/envs/audio_ml

  added / updated specs:
    - python=3.9


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    _libgcc_mutex-0.1          |             main           3 KB
    _openmp_mutex-5.1          |            1_gnu          21 KB
    ld_impl_linux-64-2.40      |       h12ee557_0         710 KB
    libffi-3.4.4               |       h6a678d5_1         141 KB
    libgcc-ng-11.2.0           |       h1234567_1         5.3 MB
    libgomp-11.2.0             |       h1234567_1         474 KB
    libstdcxx-ng-11.2.0        |       h1234567_1         4.7 MB
    ncurses-6.4                |       h6a678d5_0         914 KB
    openssl-3.0.16             |       h5eee18b_0         5.2 MB
    pip-25.0                   |   py39h

In [2]:
import sys
!{sys.executable} -m pip install librosa



In [2]:
pip install librosa

Note: you may need to restart the kernel to use updated packages.


In [4]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchaudio
from torch.utils.data import Dataset, DataLoader
import glob
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, recall_score, roc_auc_score
import torchaudio.transforms as T
from torchvision.models import resnet18
import librosa
import warnings
warnings.filterwarnings('ignore')

# Dataset Analysis Function
def analyze_dataset(protocols_dir, audio_dir):
    """Analyze dataset characteristics including class balance and audio durations"""
    train_files = glob.glob(f"{protocols_dir}/train_fold*.csv")
    
    class_counts = [0, 0]
    durations = []
    sample_rates = set()
    valid_files = 0
    total_files = 0
    
    for fold_file in train_files:
        annos = pd.read_csv(fold_file)
        for idx in range(len(annos)):
            total_files += 1
            clip_name = annos.iloc[idx, 0]
            audio_path = os.path.join(audio_dir, f"{clip_name}.wav")
            
            if os.path.exists(audio_path):
                try:
                    metadata = torchaudio.info(audio_path)
                    duration = metadata.num_frames / metadata.sample_rate
                    durations.append(duration)
                    sample_rates.add(metadata.sample_rate)
                    
                    label = 0 if any(k in str(annos.iloc[idx, 1]).lower() for k in ['truth', '0']) else 1
                    class_counts[label] += 1
                    valid_files += 1
                except Exception as e:
                    print(f"Error analyzing {audio_path}: {e}")
    
    duration_stats = {
        'min': min(durations),
        'max': max(durations),
        'mean': np.mean(durations),
        'median': np.median(durations),
        'std': np.std(durations)
    }
    
    print("\nDataset Analysis Results:")
    print(f"Total files: {total_files}")
    print(f"Valid files: {valid_files} ({valid_files/total_files:.1%})")
    print(f"Class distribution: {class_counts} (Real: {class_counts[0]}, Fake: {class_counts[1]})")
    print(f"Class ratio: {class_counts[1]/class_counts[0]:.2f}:1")
    print(f"Sample rates found: {sample_rates}")
    print("Duration statistics (seconds):")
    for k, v in duration_stats.items():
        print(f"  {k}: {v:.2f}")
    
    return {
        'class_counts': class_counts,
        'duration_stats': duration_stats,
        'sample_rates': sample_rates,
        'valid_ratio': valid_files/total_files
    }

# Enhanced AudioDataset Class
class AudioDataset(Dataset):
    def __init__(self, annotations_file, audio_dir, target_length=16000, augmentations=False, 
                 use_spectrogram=False, spec_params=None):
        self.annos = pd.read_csv(annotations_file)
        self.audio_dir = audio_dir
        self.target_length = target_length
        self.augmentations = augmentations
        self.use_spectrogram = use_spectrogram
        
        self.spec_params = {
            'n_mels': 64,
            'n_fft': 1024,
            'hop_length': 512,
            'f_min': 20,
            'f_max': 8000
        }
        if spec_params:
            self.spec_params.update(spec_params)
        
        self.valid_data = []
        for idx in range(len(self.annos)):
            clip_name = self.annos.iloc[idx, 0]
            audio_path = os.path.join(self.audio_dir, f"{clip_name}.wav")
            if os.path.exists(audio_path):
                label = self._process_label(self.annos.iloc[idx, 1])
                self.valid_data.append((audio_path, label))
        
        print(f"Found {len(self.valid_data)}/{len(self.annos)} valid files.")
    
    def __len__(self):
        return len(self.valid_data)
    
    def __getitem__(self, idx):
        audio_path, label = self.valid_data[idx]
        try:
            waveform, sample_rate = torchaudio.load(audio_path)
            waveform = waveform.mean(dim=0)
            waveform = torchaudio.functional.resample(waveform, sample_rate, 16000)
            
            if waveform.shape[0] > self.target_length:
                start = torch.randint(0, waveform.shape[0] - self.target_length, (1,))
                waveform = waveform[start:start+self.target_length]
            else:
                padding = self.target_length - waveform.shape[0]
                waveform = torch.nn.functional.pad(waveform, (0, padding))
            
            if self.augmentations and torch.rand(1) < 0.5:
                waveform = self._apply_augmentations(waveform, sample_rate=16000)
            
            if self.use_spectrogram:
                spectrogram = self._create_spectrogram(waveform)
                return spectrogram.float(), torch.tensor(label)
            else:
                return waveform.float(), torch.tensor(label)
        except Exception as e:
            print(f"[ERROR] Error loading {audio_path}: {e}")
            return None, None
    def time_stretch(waveform, sample_rate, rate=1.0):
        # Convert tensor to numpy array
        waveform_np = waveform.numpy()
        
        # Apply time stretching
        stretched = librosa.effects.time_stretch(waveform_np, rate=rate)
        
        # Convert back to tensor
        return torch.from_numpy(stretched).float()
    
    def _create_spectrogram(self, waveform):
        mel_spec = T.MelSpectrogram(
            sample_rate=16000,
            n_mels=self.spec_params['n_mels'],
            n_fft=self.spec_params['n_fft'],
            hop_length=self.spec_params['hop_length'],
            f_min=self.spec_params['f_min'],
            f_max=self.spec_params['f_max']
        )(waveform)
        db_spec = T.AmplitudeToDB()(mel_spec)
        return db_spec.unsqueeze(0)
    
    def _process_label(self, label_str):
        str_label = str(label_str).strip().lower()
        return 0 if any(k in str_label for k in ['truth', '0']) else 1
    
    def _apply_augmentations(self, waveform, sample_rate):
        if torch.rand(1) < 0.3:
            mask_len = torch.randint(100, 1000, (1,)).item()
            start = torch.randint(0, max(1, waveform.shape[0] - mask_len), (1,))
            waveform[start:start+mask_len] = 0
        
        if torch.rand(1) < 0.3:
            noise = torch.randn(waveform.shape) * 0.01
            waveform = waveform + noise
        
        if torch.rand(1) < 0.3:
            n_steps = torch.randint(-3, 3, (1,)).item()
            waveform = torchaudio.functional.pitch_shift(waveform, sample_rate, n_steps)
        
        if torch.rand(1) < 0.3:
            rate = 0.8 + torch.rand(1).item() * 0.4
            waveform = torchaudio.functional.time_stretch(waveform, rate)
        
        if torch.rand(1) < 0.3:
            gain = 0.5 + torch.rand(1).item()
            waveform = T.Vol(gain=gain)(waveform)

        if torch.rand(1) < 0.3:
            rate = 0.8 + torch.rand(1).item() * 0.4  # Rate between 0.8 and 1.2
            try:
                waveform = time_stretch(waveform, sample_rate, rate)
            except Exception as e:
                print(f"Time stretching failed: {e}")
    
        return waveform

# Collate Function
def audio_collate_fn(batch):
    batch = [item for item in batch if item[0] is not None]
    if not batch:
        return torch.zeros(1, 16000), torch.tensor([0])
    inputs, labels = zip(*batch)
    if inputs[0].dim() == 3:
        inputs = torch.stack(inputs)
    else:
        inputs = torch.stack(inputs)
    labels = torch.stack(labels)
    return inputs, labels

# Compute Class Weights
def compute_class_weights(protocols_dir, audio_dir):
    train_files = glob.glob(f"{protocols_dir}/train_fold*.csv")
    labels = []
    for fold_file in train_files:
        annos = pd.read_csv(fold_file)
        for idx in range(len(annos)):
            clip_name = annos.iloc[idx, 0]
            audio_path = os.path.join(audio_dir, f"{clip_name}.wav")
            if os.path.exists(audio_path):
                label = 0 if any(k in str(annos.iloc[idx, 1]).lower() for k in ['truth', '0']) else 1
                labels.append(label)
    labels = np.array(labels)
    class_counts = np.bincount(labels)
    total = len(labels)
    weights = total / (2.0 * class_counts)
    return torch.tensor(weights, dtype=torch.float)

# Compute Metrics
def compute_metrics(preds, labels):
    preds = preds.argmax(dim=1).cpu().numpy()
    labels = labels.cpu().numpy()
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds)
    recall = recall_score(labels, preds)
    auc = roc_auc_score(labels, preds)
    return acc, f1, recall, auc

# PANN CNN6 Model
class PANNCnn6(nn.Module):
    def __init__(self, num_classes=2, pretrained=True):
        super().__init__()
        
        # CNN6 architecture
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.conv_block3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.conv_block4 = nn.Sequential(
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        # Adaptive pooling to handle variable input sizes
        self.adaptive_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc1 = nn.Linear(512, 512)
        self.fc2 = nn.Linear(512, num_classes)
        self.dropout = nn.Dropout(0.5)
        
        if pretrained:
            self._load_pretrained_weights()
    
    def _load_pretrained_weights(self):
        try:
            checkpoint = torch.hub.load_state_dict_from_url(
                'https://zenodo.org/record/3987831/files/Cnn6_mAP=0.343.pth',
                map_location='cpu' if not torch.cuda.is_available() else 'cuda'
            )
            state_dict = checkpoint['model']
            
            # Load weights for each block
            self._load_block_weights(self.conv_block1, state_dict, 'conv_block1')
            self._load_block_weights(self.conv_block2, state_dict, 'conv_block2')
            self._load_block_weights(self.conv_block3, state_dict, 'conv_block3')
            self._load_block_weights(self.conv_block4, state_dict, 'conv_block4')
            
            print("Successfully loaded pretrained PANN CNN6 weights")
        except Exception as e:
            print(f"Error loading pretrained weights: {e}")
    
    def _load_block_weights(self, block, state_dict, block_prefix):
        for name, param in block.named_parameters():
            if f'{block_prefix}.{name}' in state_dict:
                param.data = state_dict[f'{block_prefix}.{name}'].data
    
    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.conv_block3(x)
        x = self.conv_block4(x)
        
        # Use adaptive pooling to handle different input sizes
        x = self.adaptive_pool(x)
        x = x.view(x.size(0), -1)  # Flatten
        
        x = self.fc1(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x
# AST Model
class ASTModel(nn.Module):
    def __init__(self, patch_size=16, num_layers=6, num_heads=8, embed_dim=768, 
                 spectrogram_height=64, spectrogram_width=None):
        super().__init__()
        
        if isinstance(patch_size, int):
            patch_size = (patch_size, patch_size)
        
        self.patch_embed = nn.Conv2d(
            in_channels=1,
            out_channels=embed_dim,
            kernel_size=patch_size,
            stride=patch_size
        )
        
        output_height = (spectrogram_height - patch_size[0]) // patch_size[0] + 1
        output_width = (spectrogram_width - patch_size[1]) // patch_size[1] + 1
        num_patches = output_height * output_width
        
        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads),
            num_layers=num_layers
        )
        self.classifier = nn.Linear(embed_dim, 2)
        self.dropout = nn.Dropout(0.1)
    
    def forward(self, x):
        x = self.patch_embed(x)
        x = x.flatten(2)
        x = x.transpose(1, 2)
        
        cls_tokens = self.cls_token.expand(x.shape[0], -1, -1)
        x = torch.cat((cls_tokens, x), dim=1)
        x = x + self.pos_embed
        x = self.dropout(x)
        x = self.transformer(x)
        return self.classifier(x[:, 0])

# ResNet Model with Dropout
class ResNetModel(nn.Module):
    def __init__(self, dropout=0.5):
        super().__init__()
        self.resnet = resnet18(pretrained=True)
        self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.resnet.fc = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(512, 2)
        )
    
    def forward(self, x):
        return self.resnet(x)

# Training Function with Model Saving
def train_model(config):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using {device}")
    
    # Compute class weights if needed
    class_weights = None
    if config['class_weighting']:
        class_weights = compute_class_weights(config['protocols_dir'], config['audio_dir'])
        class_weights = class_weights.to(device)
        print(f"Class weights: {class_weights}")
    
    # Initialize model
    if config['model_type'] == 'pann':
        model = PANNCnn6(num_classes=2, pretrained=True).to(device)
    elif config['model_type'] == 'ast':
        target_length = config['target_length']
        spectrogram_width = ((target_length - config['spec_params']['n_fft']) // config['spec_params']['hop_length']) + 1
        model = ASTModel(
            patch_size=config.get('patch_size', 16),
            num_layers=config.get('num_layers', 6),
            num_heads=config.get('num_heads', 8),
            embed_dim=config.get('embed_dim', 768),
            spectrogram_height=config['spec_params']['n_mels'],
            spectrogram_width=spectrogram_width
        ).to(device)
    elif config['model_type'] == 'resnet':
        model = ResNetModel(dropout=config.get('dropout', 0.5)).to(device)
    
    # Create checkpoint directory
    os.makedirs(config['checkpoint_dir'], exist_ok=True)
    metrics_file = os.path.join(config['checkpoint_dir'], 'metrics.csv')
    if not os.path.exists(metrics_file):
        pd.DataFrame(columns=['epoch', 'fold', 'train_loss', 'train_acc', 'train_f1', 
                            'train_recall', 'train_auc', 'val_loss', 'val_acc', 
                            'val_f1', 'val_recall', 'val_auc']).to_csv(metrics_file, index=False)
    
    # Get fold numbers
    train_files = glob.glob(f"{config['protocols_dir']}/train_fold*.csv")   
    fold_numbers = sorted([f.split("train_fold")[1].split(".csv")[0] for f in train_files])
    
    for fold in fold_numbers:
        print(f"\n=== Training Fold {fold} ===")
        
        # Initialize fresh model for each fold
        if config['model_type'] == 'pann':
            model = PANNCnn6(num_classes=2, pretrained=True).to(device)
        elif config['model_type'] == 'ast':
            model = ASTModel(
                patch_size=config.get('patch_size', 16),
                num_layers=config.get('num_layers', 6),
                num_heads=config.get('num_heads', 8),
                embed_dim=config.get('embed_dim', 768),
                spectrogram_height=config['spec_params']['n_mels'],
                spectrogram_width=((config['target_length'] - config['spec_params']['n_fft']) // 
                                 config['spec_params']['hop_length']) + 1
            ).to(device)
        elif config['model_type'] == 'resnet':
            model = ResNetModel(dropout=config.get('dropout', 0.5)).to(device)
        
        optimizer = optim.Adam(model.parameters(), lr=config['learning_rate'], weight_decay=1e-5)
        criterion = nn.CrossEntropyLoss(weight=class_weights) if config['class_weighting'] else nn.CrossEntropyLoss()
        
        scheduler = None
        if config['lr_scheduler']:
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)
        
        # Load data
        train_file = f"{config['protocols_dir']}/train_fold{fold}.csv"
        val_file = f"{config['protocols_dir']}/test_fold{fold}.csv"
        
        train_dataset = AudioDataset(
            train_file, config['audio_dir'], config['target_length'],
            config['augmentations'], config['use_spectrogram'], config.get('spec_params')
        )
        val_dataset = AudioDataset(
            val_file, config['audio_dir'], config['target_length'],
            False, config['use_spectrogram'], config.get('spec_params')
        )
        
        train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], 
                                collate_fn=audio_collate_fn, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=config['batch_size'],
                              collate_fn=audio_collate_fn)
        
        best_val_loss = float('inf')
        patience_counter = 0
        
        for epoch in range(config['num_epochs']):
            model.train()
            epoch_loss = 0
            train_preds, train_labels = [], []
            
            # Training loop
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                
                epoch_loss += loss.item()
                train_preds.append(outputs.detach())
                train_labels.append(labels)
            
            # Validation loop
            model.eval()
            val_loss = 0
            val_preds, val_labels = [], []
            with torch.no_grad():
                for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    outputs = model(inputs)
                    val_loss += criterion(outputs, labels).item()
                    val_preds.append(outputs)
                    val_labels.append(labels)
            
            # Compute metrics
            train_preds = torch.cat(train_preds)
            train_labels = torch.cat(train_labels)
            train_acc, train_f1, train_recall, train_auc = compute_metrics(train_preds, train_labels)
            train_loss = epoch_loss / len(train_loader)
            
            val_preds = torch.cat(val_preds)
            val_labels = torch.cat(val_labels)
            val_acc, val_f1, val_recall, val_auc = compute_metrics(val_preds, val_labels)
            val_loss = val_loss / len(val_loader)
            
            # Update learning rate
            if scheduler:
                scheduler.step(val_loss)
            
            # Save metrics
            metrics = {
                'epoch': epoch + 1,
                'fold': fold,
                'train_loss': train_loss,
                'train_acc': train_acc,
                'train_f1': train_f1,
                'train_recall': train_recall,
                'train_auc': train_auc,
                'val_loss': val_loss,
                'val_acc': val_acc,
                'val_f1': val_f1,
                'val_recall': val_recall,
                'val_auc': val_auc
            }
            pd.DataFrame([metrics]).to_csv(metrics_file, mode='a', header=False, index=False)
            
            # Early stopping and model saving
            if val_loss < best_val_loss - config['min_delta']:
                best_val_loss = val_loss
                patience_counter = 0
                
                # Save best model for this fold
                torch.save({
                    'epoch': epoch + 1,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'val_loss': val_loss,
                    'val_acc': val_acc,
                    'config': config
                }, os.path.join(config['checkpoint_dir'], f'best_model_fold{fold}.pth'))
                
                print(f"Fold {fold} Epoch {epoch+1}: New best model saved (val_loss: {val_loss:.4f})")
            else:
                patience_counter += 1
                if patience_counter >= config['patience']:
                    print(f"Early stopping triggered for fold {fold} at epoch {epoch + 1}")
                    break
            
            print(f"Fold {fold} Epoch {epoch+1}/{config['num_epochs']}: "
                  f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, "
                  f"Val Acc: {val_acc:.4f}")

# Main Execution
if __name__ == "__main__":
    # First analyze the dataset
    dataset_stats = analyze_dataset(
        protocols_dir='/home/azureuser/cloudfiles/code/Users/yashika22csu235/research/train_protocol',
        audio_dir='/home/azureuser/cloudfiles/code/Users/yashika22csu235/research/audio_files'
    )
    
    # Determine optimal target length based on analysis
    median_duration = dataset_stats['duration_stats']['median']
    target_length = min(int(median_duration * 16000), 88200)  # Max 5.5 seconds
    
    base_config = {
        'batch_size': 32,
        'num_epochs': 50,
        'learning_rate': 1e-4,
        'audio_dir': '/home/azureuser/cloudfiles/code/Users/yashika22csu235/research/audio_files',
        'protocols_dir': '/home/azureuser/cloudfiles/code/Users/yashika22csu235/research/train_protocol',
        'base_checkpoint_dir': '/home/azureuser/cloudfiles/code/Users/yashika22csu235/research/train_new/experiment2/',
        'target_length': target_length,
        'augmentations': True,
        'lr_scheduler': True,
        'class_weighting': True if dataset_stats['class_counts'][0] != dataset_stats['class_counts'][1] else False,
        'use_spectrogram': True,
        'early_stopping': True,
        'patience': 7,
        'min_delta': 0.001,
        'spec_params': {
            'n_mels': 64,
            'n_fft': 1024,
            'hop_length': 512,
            'f_min': 20,
            'f_max': 8000
        }
    }

    experiments = [
        {
            'name': 'pann_cnn6',
            'model_type': 'pann',
            'learning_rate': 5e-5
        },
        {
            'name': 'ast_deep',
            'model_type': 'ast',
            'num_layers': 12,
            'learning_rate': 1e-4
        },
        {
            'name': 'resnet_dropout',
            'model_type': 'resnet',
            'dropout': 0.5
        }
    ]

    for exp in experiments:
        config = base_config.copy()
        config.update(exp)
        config['checkpoint_dir'] = os.path.join(base_config['base_checkpoint_dir'], exp['name'])
        print(f"\nRunning experiment: {exp['name']}")
        print(f"Configuration: {config}")
        train_model(config)


Dataset Analysis Results:
Total files: 3291
Valid files: 2523 (76.7%)
Class distribution: [1186, 1337] (Real: 1186, Fake: 1337)
Class ratio: 1.13:1
Sample rates found: {44100}
Duration statistics (seconds):
  min: 2.00
  max: 1569.00
  mean: 7.84
  median: 5.00
  std: 48.78

Running experiment: pann_cnn6
Configuration: {'batch_size': 32, 'num_epochs': 50, 'learning_rate': 5e-05, 'audio_dir': '/home/azureuser/cloudfiles/code/Users/yashika22csu235/research/audio_files', 'protocols_dir': '/home/azureuser/cloudfiles/code/Users/yashika22csu235/research/train_protocol', 'base_checkpoint_dir': '/home/azureuser/cloudfiles/code/Users/yashika22csu235/research/train_new/experiment2/', 'target_length': 80000, 'augmentations': True, 'lr_scheduler': True, 'class_weighting': True, 'use_spectrogram': True, 'early_stopping': True, 'patience': 7, 'min_delta': 0.001, 'spec_params': {'n_mels': 64, 'n_fft': 1024, 'hop_length': 512, 'f_min': 20, 'f_max': 8000}, 'name': 'pann_cnn6', 'model_type': 'pann', 'c