In [2]:
%cd /app

/app


In [8]:
import os
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from transformers import AutoFeatureExtractor
from src.config import CFG
from src.data_loader import create_dataloader
from src.model import load_model
from src.train import train  # Importing train function from train.py


def main():
    # Set paths
    BASE_PATH = "/app/birdclef-2024"
    master_path = os.path.join(BASE_PATH, "master.csv")
    csv_file = os.path.join(BASE_PATH, "filtered_data_with_labels.csv")

    # Load the feature extractor for AST model
    print("Loading feature extractor...")
    extractor = AutoFeatureExtractor.from_pretrained(CFG.feature_extractor_name)

    # Load and prepare filtered data
    print("Loading filtered data...")
    df = pd.read_csv(csv_file).drop(columns='y')
    df["filename"] = df["filename"].str.split('/').apply(lambda x: '/'.join(x[-2:]))
    df.index = df["filename"]

    # Balance the number of samples for each class
    df_sampled = df.groupby('primary_label').sample(n=df.primary_label.value_counts().min(), random_state=42)

    # Split into train and test datasets
    print("Splitting data into train and test sets...")
    df_train, df_test = train_test_split(df_sampled, test_size=0.2, stratify=df_sampled['primary_label'], random_state=42)

    # Load and prepare master data
    print("Loading master data...")
    master = pd.read_csv(master_path).iloc[:, 1:]
    master.index = master["filename"]

    # Align train and test sets with master data using their index
    df_train = master.loc[df_train.index].reset_index(drop=True)
    df_test = master.loc[df_test.index].reset_index(drop=True)

    classes = df_train['primary_label'].unique()
    class_weights = compute_class_weight(class_weight='balanced', classes=classes, y=df_train['primary_label'])
    
    # Compute num_labels dynamically based on the unique values in 'primary_label'
    num_labels = len(df_train['primary_label'].unique())
    print(f"Number of unique labels: {num_labels}")

    # Load the model with the correct num_labels
    print("Loading model...")
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = load_model(CFG.model_name, num_labels=num_labels, device=device)

    # Move model to GPU and convert to float16
    model = model.to(device)  # Convert model to float16
    print(f"Model is running on: {device}")
    
    # Check train and test sizes and unique labels
    print(f"Train set size: {len(df_train)} samples")
    print(f"Test set size: {len(df_test)} samples")
    print(f"Unique labels in train set: {len(df_train['primary_label'].unique())}")
    print(f"Unique labels in test set: {len(df_test['primary_label'].unique())}")

    # Create DataLoader for training
    print("Creating DataLoader for training data...")
    train_loader = create_dataloader(csv_file=csv_file, base_path=BASE_PATH, batch_size=CFG.batch_size, extractor=extractor)

    # Create DataLoader for testing
    print("Creating DataLoader for testing data...")
    test_loader = create_dataloader(csv_file=csv_file, base_path=BASE_PATH, batch_size=CFG.batch_size, extractor=extractor, shuffle=False, test=True)


    # Convert class weights to a tensor
    class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device) # Ensure it's on the correct device
    
    # Start training by calling train function from train.py
    train(model, train_loader, test_loader, device, CFG, class_weights_tensor)


In [9]:
main()

Loading feature extractor...
Loading filtered data...
Splitting data into train and test sets...
Loading master data...
Number of unique labels: 89
Loading model...
Loading model with num_labels = 89
Original classifier structure: ASTMLPHead(
  (layernorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
  (dense): Linear(in_features=768, out_features=527, bias=True)
)
Classifier input features: 768
Model is running on: cuda
Train set size: 1922 samples
Test set size: 481 samples
Unique labels in train set: 89
Unique labels in test set: 89
Creating DataLoader for training data...
Creating DataLoader for testing data...


Epoch 1/30: 100%|██████████| 481/481 [03:51<00:00,  2.07batch/s]


Epoch 1/30 - Training Loss: 4.5019


Evaluating: 100%|██████████| 121/121 [00:16<00:00,  7.54batch/s, accuracy=0.0229, f1_macro=0.0127, f1_weighted=0.02]    


Epoch 1/30
Validation Loss: 4.4529
Validation Accuracy: 0.0229
Validation Macro F1: 0.0127
Validation Weighted F1: 0.0200
New best model found at epoch 1. Saving model...
Model saved to ./saved_models/best_model_epoch_1.pth


Epoch 2/30: 100%|██████████| 481/481 [03:52<00:00,  2.07batch/s]


Epoch 2/30 - Training Loss: 4.4065


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.61batch/s, accuracy=0.0478, f1_macro=0.0293, f1_weighted=0.0455]  


Epoch 2/30
Validation Loss: 4.3788
Validation Accuracy: 0.0478
Validation Macro F1: 0.0293
Validation Weighted F1: 0.0455
New best model found at epoch 2. Saving model...
Model saved to ./saved_models/best_model_epoch_2.pth


Epoch 3/30: 100%|██████████| 481/481 [03:50<00:00,  2.09batch/s]


Epoch 3/30 - Training Loss: 4.3178


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.61batch/s, accuracy=0.0894, f1_macro=0.0537, f1_weighted=0.0861]


Epoch 3/30
Validation Loss: 4.3179
Validation Accuracy: 0.0894
Validation Macro F1: 0.0537
Validation Weighted F1: 0.0861
New best model found at epoch 3. Saving model...
Model saved to ./saved_models/best_model_epoch_3.pth


Epoch 4/30: 100%|██████████| 481/481 [03:49<00:00,  2.09batch/s]


Epoch 4/30 - Training Loss: 4.2139


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.62batch/s, accuracy=0.143, f1_macro=0.0896, f1_weighted=0.138] 


Epoch 4/30
Validation Loss: 4.2388
Validation Accuracy: 0.1435
Validation Macro F1: 0.0896
Validation Weighted F1: 0.1384
New best model found at epoch 4. Saving model...
Model saved to ./saved_models/best_model_epoch_4.pth


Epoch 5/30: 100%|██████████| 481/481 [03:50<00:00,  2.09batch/s]


Epoch 5/30 - Training Loss: 4.1082


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.59batch/s, accuracy=0.208, f1_macro=0.129, f1_weighted=0.201]


Epoch 5/30
Validation Loss: 4.1627
Validation Accuracy: 0.2079
Validation Macro F1: 0.1288
Validation Weighted F1: 0.2011
New best model found at epoch 5. Saving model...
Model saved to ./saved_models/best_model_epoch_5.pth


Epoch 6/30: 100%|██████████| 481/481 [03:48<00:00,  2.11batch/s]


Epoch 6/30 - Training Loss: 4.0052


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.73batch/s, accuracy=0.239, f1_macro=0.161, f1_weighted=0.235]


Epoch 6/30
Validation Loss: 4.0763
Validation Accuracy: 0.2391
Validation Macro F1: 0.1612
Validation Weighted F1: 0.2348
New best model found at epoch 6. Saving model...
Model saved to ./saved_models/best_model_epoch_6.pth


Epoch 7/30: 100%|██████████| 481/481 [03:48<00:00,  2.11batch/s]


Epoch 7/30 - Training Loss: 3.9032


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.74batch/s, accuracy=0.262, f1_macro=0.173, f1_weighted=0.255]


Epoch 7/30
Validation Loss: 4.0052
Validation Accuracy: 0.2620
Validation Macro F1: 0.1729
Validation Weighted F1: 0.2548
New best model found at epoch 7. Saving model...
Model saved to ./saved_models/best_model_epoch_7.pth


Epoch 8/30: 100%|██████████| 481/481 [03:48<00:00,  2.11batch/s]


Epoch 8/30 - Training Loss: 3.7948


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.72batch/s, accuracy=0.264, f1_macro=0.177, f1_weighted=0.258]


Epoch 8/30
Validation Loss: 3.9391
Validation Accuracy: 0.2640
Validation Macro F1: 0.1770
Validation Weighted F1: 0.2583
New best model found at epoch 8. Saving model...
Model saved to ./saved_models/best_model_epoch_8.pth


Epoch 9/30: 100%|██████████| 481/481 [03:48<00:00,  2.11batch/s]


Epoch 9/30 - Training Loss: 3.6913


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.74batch/s, accuracy=0.318, f1_macro=0.218, f1_weighted=0.311]


Epoch 9/30
Validation Loss: 3.8442
Validation Accuracy: 0.3181
Validation Macro F1: 0.2175
Validation Weighted F1: 0.3110
New best model found at epoch 9. Saving model...
Model saved to ./saved_models/best_model_epoch_9.pth


Epoch 10/30: 100%|██████████| 481/481 [03:47<00:00,  2.11batch/s]


Epoch 10/30 - Training Loss: 3.5790


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.72batch/s, accuracy=0.356, f1_macro=0.242, f1_weighted=0.348]


Epoch 10/30
Validation Loss: 3.7736
Validation Accuracy: 0.3555
Validation Macro F1: 0.2420
Validation Weighted F1: 0.3481
New best model found at epoch 10. Saving model...
Model saved to ./saved_models/best_model_epoch_10.pth


Epoch 11/30: 100%|██████████| 481/481 [03:48<00:00,  2.11batch/s]


Epoch 11/30 - Training Loss: 3.5106


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.75batch/s, accuracy=0.358, f1_macro=0.251, f1_weighted=0.35] 


Epoch 11/30
Validation Loss: 3.7769
Validation Accuracy: 0.3576
Validation Macro F1: 0.2511
Validation Weighted F1: 0.3499


Epoch 12/30: 100%|██████████| 481/481 [03:47<00:00,  2.11batch/s]


Epoch 12/30 - Training Loss: 3.5010


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.74batch/s, accuracy=0.349, f1_macro=0.237, f1_weighted=0.341]


Epoch 12/30
Validation Loss: 3.7599
Validation Accuracy: 0.3493
Validation Macro F1: 0.2375
Validation Weighted F1: 0.3409
New best model found at epoch 12. Saving model...
Model saved to ./saved_models/best_model_epoch_12.pth


Epoch 13/30: 100%|██████████| 481/481 [03:48<00:00,  2.11batch/s]


Epoch 13/30 - Training Loss: 3.4853


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.76batch/s, accuracy=0.343, f1_macro=0.23, f1_weighted=0.335] 


Epoch 13/30
Validation Loss: 3.7582
Validation Accuracy: 0.3430
Validation Macro F1: 0.2304
Validation Weighted F1: 0.3354
New best model found at epoch 13. Saving model...
Model saved to ./saved_models/best_model_epoch_13.pth


Epoch 14/30: 100%|██████████| 481/481 [03:47<00:00,  2.12batch/s]


Epoch 14/30 - Training Loss: 3.4604


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.75batch/s, accuracy=0.37, f1_macro=0.258, f1_weighted=0.361] 


Epoch 14/30
Validation Loss: 3.7420
Validation Accuracy: 0.3701
Validation Macro F1: 0.2577
Validation Weighted F1: 0.3609
New best model found at epoch 14. Saving model...
Model saved to ./saved_models/best_model_epoch_14.pth


Epoch 15/30: 100%|██████████| 481/481 [03:47<00:00,  2.12batch/s]


Epoch 15/30 - Training Loss: 3.4576


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.73batch/s, accuracy=0.366, f1_macro=0.252, f1_weighted=0.359]


Epoch 15/30
Validation Loss: 3.7367
Validation Accuracy: 0.3659
Validation Macro F1: 0.2524
Validation Weighted F1: 0.3588
New best model found at epoch 15. Saving model...
Model saved to ./saved_models/best_model_epoch_15.pth


Epoch 16/30: 100%|██████████| 481/481 [03:47<00:00,  2.11batch/s]


Epoch 16/30 - Training Loss: 3.4447


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.76batch/s, accuracy=0.364, f1_macro=0.252, f1_weighted=0.356]


Epoch 16/30
Validation Loss: 3.7250
Validation Accuracy: 0.3638
Validation Macro F1: 0.2521
Validation Weighted F1: 0.3557
New best model found at epoch 16. Saving model...
Model saved to ./saved_models/best_model_epoch_16.pth


Epoch 17/30: 100%|██████████| 481/481 [03:47<00:00,  2.11batch/s]


Epoch 17/30 - Training Loss: 3.4283


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.74batch/s, accuracy=0.337, f1_macro=0.229, f1_weighted=0.326]


Epoch 17/30
Validation Loss: 3.7275
Validation Accuracy: 0.3368
Validation Macro F1: 0.2287
Validation Weighted F1: 0.3264


Epoch 18/30: 100%|██████████| 481/481 [03:49<00:00,  2.10batch/s]


Epoch 18/30 - Training Loss: 3.4182


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.75batch/s, accuracy=0.345, f1_macro=0.234, f1_weighted=0.337]


Epoch 18/30
Validation Loss: 3.7225
Validation Accuracy: 0.3451
Validation Macro F1: 0.2337
Validation Weighted F1: 0.3375
New best model found at epoch 18. Saving model...
Model saved to ./saved_models/best_model_epoch_18.pth


Epoch 19/30: 100%|██████████| 481/481 [03:49<00:00,  2.10batch/s]


Epoch 19/30 - Training Loss: 3.4166


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.75batch/s, accuracy=0.347, f1_macro=0.233, f1_weighted=0.34] 


Epoch 19/30
Validation Loss: 3.7225
Validation Accuracy: 0.3472
Validation Macro F1: 0.2332
Validation Weighted F1: 0.3395
New best model found at epoch 19. Saving model...
Model saved to ./saved_models/best_model_epoch_19.pth


Epoch 20/30: 100%|██████████| 481/481 [03:48<00:00,  2.10batch/s]


Epoch 20/30 - Training Loss: 3.4113


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.75batch/s, accuracy=0.353, f1_macro=0.24, f1_weighted=0.344] 


Epoch 20/30
Validation Loss: 3.6962
Validation Accuracy: 0.3534
Validation Macro F1: 0.2399
Validation Weighted F1: 0.3444
New best model found at epoch 20. Saving model...
Model saved to ./saved_models/best_model_epoch_20.pth


Epoch 21/30: 100%|██████████| 481/481 [03:47<00:00,  2.12batch/s]


Epoch 21/30 - Training Loss: 3.4103


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.73batch/s, accuracy=0.349, f1_macro=0.237, f1_weighted=0.341]


Epoch 21/30
Validation Loss: 3.6976
Validation Accuracy: 0.3493
Validation Macro F1: 0.2367
Validation Weighted F1: 0.3409


Epoch 22/30: 100%|██████████| 481/481 [03:48<00:00,  2.11batch/s]


Epoch 22/30 - Training Loss: 3.4049


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.76batch/s, accuracy=0.347, f1_macro=0.234, f1_weighted=0.34] 


Epoch 22/30
Validation Loss: 3.6955
Validation Accuracy: 0.3472
Validation Macro F1: 0.2342
Validation Weighted F1: 0.3395
New best model found at epoch 22. Saving model...
Model saved to ./saved_models/best_model_epoch_22.pth


Epoch 23/30: 100%|██████████| 481/481 [03:47<00:00,  2.11batch/s]


Epoch 23/30 - Training Loss: 3.3884


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.74batch/s, accuracy=0.37, f1_macro=0.257, f1_weighted=0.362] 


Epoch 23/30
Validation Loss: 3.6891
Validation Accuracy: 0.3701
Validation Macro F1: 0.2571
Validation Weighted F1: 0.3623
New best model found at epoch 23. Saving model...
Model saved to ./saved_models/best_model_epoch_23.pth


Epoch 24/30: 100%|██████████| 481/481 [03:48<00:00,  2.11batch/s]


Epoch 24/30 - Training Loss: 3.4002


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.74batch/s, accuracy=0.343, f1_macro=0.235, f1_weighted=0.337]


Epoch 24/30
Validation Loss: 3.6930
Validation Accuracy: 0.3430
Validation Macro F1: 0.2352
Validation Weighted F1: 0.3375


Epoch 25/30: 100%|██████████| 481/481 [03:47<00:00,  2.11batch/s]


Epoch 25/30 - Training Loss: 3.3932


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.75batch/s, accuracy=0.362, f1_macro=0.247, f1_weighted=0.352]


Epoch 25/30
Validation Loss: 3.6933
Validation Accuracy: 0.3617
Validation Macro F1: 0.2468
Validation Weighted F1: 0.3518


Epoch 26/30: 100%|██████████| 481/481 [03:48<00:00,  2.10batch/s]


Epoch 26/30 - Training Loss: 3.3960


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.75batch/s, accuracy=0.368, f1_macro=0.255, f1_weighted=0.36] 


Epoch 26/30
Validation Loss: 3.6928
Validation Accuracy: 0.3680
Validation Macro F1: 0.2548
Validation Weighted F1: 0.3602


Epoch 27/30: 100%|██████████| 481/481 [03:47<00:00,  2.11batch/s]


Epoch 27/30 - Training Loss: 3.3954


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.74batch/s, accuracy=0.37, f1_macro=0.257, f1_weighted=0.364] 


Epoch 27/30
Validation Loss: 3.6861
Validation Accuracy: 0.3701
Validation Macro F1: 0.2565
Validation Weighted F1: 0.3636
New best model found at epoch 27. Saving model...
Model saved to ./saved_models/best_model_epoch_27.pth


Epoch 28/30: 100%|██████████| 481/481 [03:47<00:00,  2.11batch/s]


Epoch 28/30 - Training Loss: 3.3927


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.74batch/s, accuracy=0.362, f1_macro=0.25, f1_weighted=0.354] 


Epoch 28/30
Validation Loss: 3.7000
Validation Accuracy: 0.3617
Validation Macro F1: 0.2501
Validation Weighted F1: 0.3540


Epoch 29/30: 100%|██████████| 481/481 [03:47<00:00,  2.12batch/s]


Epoch 29/30 - Training Loss: 3.4014


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.74batch/s, accuracy=0.341, f1_macro=0.235, f1_weighted=0.334]


Epoch 29/30
Validation Loss: 3.6975
Validation Accuracy: 0.3410
Validation Macro F1: 0.2346
Validation Weighted F1: 0.3340


Epoch 30/30: 100%|██████████| 481/481 [03:48<00:00,  2.10batch/s]


Epoch 30/30 - Training Loss: 3.3811


Evaluating: 100%|██████████| 121/121 [00:15<00:00,  7.76batch/s, accuracy=0.362, f1_macro=0.243, f1_weighted=0.353]

Epoch 30/30
Validation Loss: 3.6889
Validation Accuracy: 0.3617
Validation Macro F1: 0.2432
Validation Weighted F1: 0.3533



