In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import math
import os
import cv2
import IPython.display as ipd
import librosa
import librosa.display
import torch
import numpy as np
import torch.nn.functional as F
import torchvision
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms, models
import torch.nn as nn
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split

In [2]:
esc_10=True
pretext=False
proposed=True

In [3]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print(device)

cuda:0


In [4]:
trainPath = './environmental-sound-classification-50/audio/audio/44100'
trainData = pd.read_csv('./environmental-sound-classification-50/esc50.csv')

if esc_10:
    new_df = trainData[trainData["esc10"]==True].reset_index()
    categories = list(set(list(new_df.category.values)))
    for i in range(len(new_df)):
        cat = new_df.loc[i, "category"]
        new_df.loc[i, "target"] = categories.index(cat)
    trainData = new_df

print(len(trainData))
trainData.head()

400


Unnamed: 0,index,filename,fold,target,category,esc10,src_file,take
0,0,1-100032-A-0.wav,1,0,dog,True,100032,A
1,14,1-110389-A-0.wav,1,0,dog,True,110389,A
2,24,1-116765-A-41.wav,1,9,chainsaw,True,116765,A
3,54,1-17150-A-12.wav,1,8,crackling_fire,True,17150,A
4,55,1-172649-A-40.wav,1,7,helicopter,True,172649,A


In [5]:
# From the image provided, we know there are 5 parent categories.
# We need to map the 'category' column to these new parent categories with values from 0 to 4.

# Define the mappings based on the parent categories provided in the image.
if esc_10:
    parent_category_mappings = {
        'crackling_fire': 0,
        'rain': 0,
        'clock_tick': 1,
        'sneezing': 2,
        'sea_waves': 0,
        'crying_baby': 2,
        'rooster': 0,
        'chainsaw': 1,
        'helicopter': 1,
        'dog': 0
    }
else:
    parent_category_mappings = {
        'dog': 0,
        'rooster': 0,
        'pig': 0,
        'cow': 0,
        'frog': 0,
        'cat': 0,
        'hen': 0,
        'insects': 0,
        'sheep': 0,
        'crow': 0,
        'rain': 1,
        'sea_waves': 1,
        'crackling_fire': 1,
        'crickets': 1,
        'chirping_birds': 1,
        'water_drops': 1,
        'wind': 1,
        'pouring_water': 1,
        'toilet_flush': 1,
        'thunderstorm': 1,
        'crying_baby': 2,
        'sneezing': 2,
        'clapping': 2,
        'breathing': 2,
        'coughing': 2,
        'footsteps': 2,
        'laughing': 2,
        'brushing_teeth': 2,
        'snoring': 2,
        'drinking_sipping': 2,
        'door_wood_knock': 3,
        'mouse_click': 3,
        'keyboard_typing': 3,
        'door_wood_creaks': 3,
        'can_opening': 3,
        'washing_machine': 3,
        'vacuum_cleaner': 3,
        'clock_alarm': 3,
        'clock_tick': 3,
        'glass_breaking': 3,
        'helicopter': 4,
        'chainsaw': 4,
        'siren': 4,
        'car_horn': 4,
        'engine': 4,
        'train': 4,
        'church_bells': 4,
        'airplane': 4,
        'fireworks': 4,
        'hand_saw': 4,
    }
print(len(parent_category_mappings))

10


In [6]:
import pandas as pd

# Define a function to perform the entire process in one cell
def update_target_values(data, mappings):
    # Load the dataset

    # Map the 'category' in the dataframe to the new parent category values
    data['parent_category'] = data['category'].map(mappings)

    # Replace the 'target' values with the new 'parent_category' values
    data['target'] = data['parent_category'].astype(int)

    # Drop the 'parent_category' column as it is no longer needed
    data.drop('parent_category', axis=1, inplace=True)

    # Save the updated dataframe to a new csv file
    
    return data

# Call the function with the original file path and category mappings
if pretext==True:
    trainData = update_target_values(trainData, parent_category_mappings)

In [7]:
trainData.head()

Unnamed: 0,index,filename,fold,target,category,esc10,src_file,take
0,0,1-100032-A-0.wav,1,0,dog,True,100032,A
1,14,1-110389-A-0.wav,1,0,dog,True,110389,A
2,24,1-116765-A-41.wav,1,9,chainsaw,True,116765,A
3,54,1-17150-A-12.wav,1,8,crackling_fire,True,17150,A
4,55,1-172649-A-40.wav,1,7,helicopter,True,172649,A


In [8]:
class Dataset(Dataset):
    def __init__(self, dataframe, fold=None, val=False, test=False):
        
        self.fold = fold  
        all_folds = [1, 2, 3, 4, 5]
        test_fold = 5-fold
        all_folds.remove(test_fold)
        if test==False:
            df = dataframe[dataframe['fold'].isin(all_folds)]
            train_df, val_df = train_test_split(df, test_size=0.1, random_state=42)
            if val ==True:
                self.dataframe = val_df
            else:
                self.dataframe = train_df
               
        elif test==True:
            self.dataframe = dataframe[dataframe['fold'] == test_fold]
    def __getitem__(self, index):
        path_to_file = self.get_path_to_file(index)
        signal = self.preprocess_signal(path_to_file)

        x = np.stack([cv2.resize(signal, (224, 224)) for _ in range(3)])
        y = self.dataframe.target.values[index]
        return torch.tensor(x, dtype=torch.float), int(y)
    
    def get_path_to_file(self, index):
        return f'./environmental-sound-classification-50//audio/audio/16000/{self.dataframe.filename.values[index]}'
    def preprocess_signal(self, path_to_file):
        signal, _ = librosa.load(path_to_file, sr=16000)
        signal = librosa.feature.melspectrogram(y=signal)
        signal = librosa.power_to_db(signal)
        return signal

    def __len__(self):
        return self.dataframe.shape[0]

In [9]:
counts = trainData.target.value_counts()
num_classes = len(counts)
counts

target
0    40
9    40
8    40
7    40
2    40
6    40
3    40
5    40
4    40
1    40
Name: count, dtype: int64

In [10]:
num_classes = len(set(trainData['target'].values))
num_classes

10

In [11]:
batch_size = 32
fold = 0
trainSet = Dataset(trainData, fold=fold)
valSet = Dataset(trainData, fold=fold, val=True)
testSet = Dataset(trainData, fold=fold, test=True)
trainLoader = DataLoader(trainSet, batch_size=batch_size, shuffle=True)
valLoader = DataLoader(valSet , batch_size=batch_size)
testLoader = DataLoader(testSet , batch_size=batch_size)

print('Training set: {}, Validation set: {}, Test Set: {}'.format(len(trainSet), len(valSet), len(testSet)))

print("Folds of training set:", set(list(trainSet.dataframe["fold"].values)))
print("Folds of validation set:", set(list(valSet.dataframe["fold"].values)))
print("Folds of test set:", set
(list(testSet.dataframe["fold"].values)))


Training set: 288, Validation set: 32, Test Set: 80
Folds of training set: {1, 2, 3, 4}
Folds of validation set: {1, 2, 3, 4}
Folds of test set: {5}


In [12]:
print(trainSet[0][0].shape)
print(valSet[0][0].shape)
print(testSet[0][0].shape)

torch.Size([3, 224, 224])
torch.Size([3, 224, 224])
torch.Size([3, 224, 224])


In [13]:
class CustomEffNet(nn.Module):
    def __init__(self, num_classes=10):
        super(CustomEffNet, self).__init__()
        # Load a pre-trained EfficientNet
        self.effnet = models.efficientnet_b1(pretrained=True)
        # Freeze all layers in EfficientNet
        for param in self.effnet.parameters():
            param.requires_grad = True #Set true to unfreeze

        # Get the input features of the original classifier
        in_features = self.effnet.classifier[1].in_features

        # Replace the classifier with custom layers
        self.effnet.classifier = nn.Sequential(
            nn.Linear(in_features, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, num_classes),
        )

        # Unfreeze the classifier layers
        for param in self.effnet.classifier.parameters():
            param.requires_grad = True

    def forward(self, x):
        return self.effnet(x)

# Create the model
#model = CustomEffNet(num_classes=50)
#model.to(device)
def get_pretext_loaded(model):
    backbone_weights = torch.load('./effnetb1_pretext_3_esc10.pth', map_location='cpu')
    model_dict = dict(model.effnet.state_dict())
    for key, weights in backbone_weights["model_state"].items():
        a = key[7:]
        if a.find('classifier') == -1:
            model_dict[a] = weights
    
    model.effnet.load_state_dict(model_dict)
    a = model_dict['features.0.0.weight'][0]
    b = backbone_weights["model_state"]["effnet.features.0.0.weight"][0]
    if np.array_equal(np.array(a), np.array(b)):
        print("Backbone Loaded")
    else:
        print("Backbone not loaded")
    return model


In [14]:
if esc_10:
    
    model = CustomEffNet(num_classes=num_classes)
else:
    model = CustomEffNet(num_classes=num_classes)
model



CustomEffNet(
  (effnet): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
              (scale_a

In [17]:
from time import time 
start_time = time()
folds = 5
for fold in range(4, folds):
    batch_size = 32
    trainSet = Dataset(trainData, fold=fold)
    valSet = Dataset(trainData, fold=fold, val=True)
    testSet = Dataset(trainData, fold=fold, test=True)
    trainLoader = DataLoader(trainSet, batch_size=batch_size, shuffle=True)
    valLoader = DataLoader(valSet , batch_size=batch_size)
    testLoader = DataLoader(testSet , batch_size=batch_size)

    print('Training set: {}, Validation set: {}, Test Set: {}'.format(len(trainSet), len(valSet), len(testSet)))
    print("Folds of training set:", set(list(trainSet.dataframe["fold"].values)))
    print("Folds of validation set:", set(list(valSet.dataframe["fold"].values)))
    print("Folds of test set:", set(list(testSet.dataframe["fold"].values)))
    print("Classfying", num_classes, "classes.")
    
    model = get_pretext_loaded(CustomEffNet(num_classes=num_classes))

    if torch.cuda.device_count() >= 2:
        print(f"Using {torch.cuda.device_count()} GPUs!")
        device = torch.device("cuda")
        model = torch.nn.DataParallel(model)  # Wrap the model for multi-GPU use
        model.to(device)
    else:
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        model.to(device)
        print("Using single GPU or CPU")
    epochs = 50
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    cost = torch.nn.CrossEntropyLoss()
    best_val_accuracy = 0.0
    best_val_loss = 10000
    checkpoint_path = f"effnetb1_baseline_model_esc50{fold+1}.pth"
    

    if os.path.isfile(checkpoint_path):
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint['model_state'])
        optimizer.load_state_dict(checkpoint['optimizer_state'])
        best_val_accuracy = checkpoint['best_val_accuracy']
        start_epoch = checkpoint['epoch'] + 1
        print("Resuming training from epoch: "+str(start_epoch) +" with best_val_accuracy: "+str(best_val_accuracy))
        
    else:
        start_epoch = 0
    
    for epoch in range(start_epoch, epochs):
        train_loss = 0
        val_loss = 0
        train_correct = 0
        val_correct = 0
        
        model.train()
        for x, y in tqdm(trainLoader):
            optimizer.zero_grad()
            x,y = x.to(device),y.to(device)
            pred = model(x)
            loss = cost(pred, y)
            train_loss += cost(pred, y).item()
            train_correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            for x, y in tqdm(valLoader):
                x,y = x.to(device),y.to(device)
                pred = model(x)
                loss = cost(pred, y)
                val_loss += cost(pred, y).item()
                val_correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        train_loss = train_loss/len(trainLoader)
        val_loss = val_loss/len(valLoader)
        train_accuracy = train_correct / len(trainData)
        val_accuracy = val_correct / len(valSet)
        
        if val_loss < best_val_loss:
            print(f"Validation loss has improved from {best_val_loss} to {val_loss}")
            best_val_loss = val_loss
            
        print("epoch = %d, train_loss = %.5f, val_loss = %.5f, train_accuracy = %.5f, val_accuracy = %.5f" % (epoch, train_loss, val_loss, train_accuracy, val_accuracy))
        
        #saving the model when the val_accuracy improves
        if val_accuracy >= best_val_accuracy:
            if val_accuracy == best_val_accuracy and val_loss > best_val_loss:
                continue
            print(f"Validation Accuracy improved from {best_val_accuracy:.5f} to {val_accuracy:.5f}. Saving checkpoint.")
            best_val_accuracy = val_accuracy
            torch.save({
                'epoch': epoch,
                'model_state': model.state_dict(),
                'optimizer_state': optimizer.state_dict(),
                'best_val_accuracy': best_val_accuracy
            }, f"effnetb1_baseline_model_esc50{fold+1}.pth")
            
            
    #getting final best validation accuracy here
    
    checkpoint_path =  f"effnetb1_baseline_model_esc50{fold+1}.pth"
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state'])
    model.eval()
    test_correct = 0
    for x, y in tqdm(testLoader):
        x,y = x.to(device),y.to(device)
        pred = model(x)
        pred = torch.softmax(pred, axis=1)       
        test_correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_accuracy = test_correct / len(testSet)
    print(f"Test accuracy for fold: {fold+1} is: {test_accuracy*100}")
        
end_time = time()
total_time = end_time - start_time
print(f'Total Training Time: {total_time:.2f} seconds')

Training set: 288, Validation set: 32, Test Set: 80
Folds of training set: {2, 3, 4, 5}
Folds of validation set: {2, 3, 4, 5}
Folds of test set: {1}
Classfying 10 classes.


Backbone Loaded
Using single GPU or CPU
Resuming training from epoch: 12 with best_val_accuracy: 1.0


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Validation loss has improved from 10000 to 0.10018189996480942
epoch = 12, train_loss = 0.04672, val_loss = 0.10018, train_accuracy = 0.71750, val_accuracy = 0.96875


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 13, train_loss = 0.04425, val_loss = 0.12805, train_accuracy = 0.71000, val_accuracy = 0.96875


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 14, train_loss = 0.05883, val_loss = 0.16547, train_accuracy = 0.71250, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 15, train_loss = 0.03260, val_loss = 0.18160, train_accuracy = 0.71750, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 16, train_loss = 0.04320, val_loss = 0.18350, train_accuracy = 0.71500, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 17, train_loss = 0.02807, val_loss = 0.12683, train_accuracy = 0.71750, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 18, train_loss = 0.02576, val_loss = 0.10401, train_accuracy = 0.71500, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 19, train_loss = 0.01418, val_loss = 0.12223, train_accuracy = 0.71750, val_accuracy = 0.96875


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Validation loss has improved from 0.10018189996480942 to 0.06588531285524368
epoch = 20, train_loss = 0.03562, val_loss = 0.06589, train_accuracy = 0.71500, val_accuracy = 0.96875


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Validation loss has improved from 0.06588531285524368 to 0.062379609793424606
epoch = 21, train_loss = 0.01626, val_loss = 0.06238, train_accuracy = 0.71750, val_accuracy = 0.96875


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Validation loss has improved from 0.062379609793424606 to 0.04496088624000549
epoch = 22, train_loss = 0.04170, val_loss = 0.04496, train_accuracy = 0.71500, val_accuracy = 1.00000
Validation Accuracy improved from 1.00000 to 1.00000. Saving checkpoint.


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Validation loss has improved from 0.04496088624000549 to 0.04269970953464508
epoch = 23, train_loss = 0.00761, val_loss = 0.04270, train_accuracy = 0.72000, val_accuracy = 1.00000
Validation Accuracy improved from 1.00000 to 1.00000. Saving checkpoint.


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Validation loss has improved from 0.04269970953464508 to 0.042496711015701294
epoch = 24, train_loss = 0.01184, val_loss = 0.04250, train_accuracy = 0.71750, val_accuracy = 1.00000
Validation Accuracy improved from 1.00000 to 1.00000. Saving checkpoint.


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 25, train_loss = 0.00907, val_loss = 0.05138, train_accuracy = 0.72000, val_accuracy = 1.00000


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Validation loss has improved from 0.042496711015701294 to 0.031719762831926346
epoch = 26, train_loss = 0.01240, val_loss = 0.03172, train_accuracy = 0.71750, val_accuracy = 1.00000
Validation Accuracy improved from 1.00000 to 1.00000. Saving checkpoint.


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 27, train_loss = 0.00684, val_loss = 0.03676, train_accuracy = 0.72000, val_accuracy = 1.00000


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 28, train_loss = 0.01635, val_loss = 0.04001, train_accuracy = 0.71750, val_accuracy = 1.00000


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 29, train_loss = 0.00932, val_loss = 0.06272, train_accuracy = 0.72000, val_accuracy = 1.00000


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 30, train_loss = 0.02194, val_loss = 0.10191, train_accuracy = 0.71750, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 31, train_loss = 0.00526, val_loss = 0.12716, train_accuracy = 0.72000, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 32, train_loss = 0.01904, val_loss = 0.11850, train_accuracy = 0.71500, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 33, train_loss = 0.01833, val_loss = 0.16449, train_accuracy = 0.71750, val_accuracy = 0.90625


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 34, train_loss = 0.02205, val_loss = 0.22774, train_accuracy = 0.71750, val_accuracy = 0.87500


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 35, train_loss = 0.01704, val_loss = 0.21755, train_accuracy = 0.71750, val_accuracy = 0.90625


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 36, train_loss = 0.00744, val_loss = 0.19049, train_accuracy = 0.72000, val_accuracy = 0.90625


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 37, train_loss = 0.00390, val_loss = 0.19171, train_accuracy = 0.72000, val_accuracy = 0.90625


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 38, train_loss = 0.00384, val_loss = 0.12617, train_accuracy = 0.72000, val_accuracy = 0.90625


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 39, train_loss = 0.00294, val_loss = 0.10371, train_accuracy = 0.72000, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 40, train_loss = 0.00357, val_loss = 0.17363, train_accuracy = 0.72000, val_accuracy = 0.87500


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 41, train_loss = 0.00621, val_loss = 0.19920, train_accuracy = 0.72000, val_accuracy = 0.90625


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 42, train_loss = 0.00462, val_loss = 0.09918, train_accuracy = 0.72000, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 43, train_loss = 0.00440, val_loss = 0.13479, train_accuracy = 0.72000, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 44, train_loss = 0.00790, val_loss = 0.12949, train_accuracy = 0.72000, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 45, train_loss = 0.00374, val_loss = 0.13140, train_accuracy = 0.72000, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 46, train_loss = 0.00435, val_loss = 0.18511, train_accuracy = 0.72000, val_accuracy = 0.90625


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 47, train_loss = 0.00454, val_loss = 0.17409, train_accuracy = 0.72000, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 48, train_loss = 0.00272, val_loss = 0.16833, train_accuracy = 0.72000, val_accuracy = 0.93750


  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

epoch = 49, train_loss = 0.00753, val_loss = 0.16756, train_accuracy = 0.71750, val_accuracy = 0.93750


  0%|          | 0/3 [00:00<?, ?it/s]

Test accuracy for fold: 5 is: 98.75
Total Training Time: 99.72 seconds


In [20]:
checkpoint_path = "efficientnet_baseline_model_fold5.pth"
checkpoint = torch.load(checkpoint_path)
model.load_state_dict(checkpoint['model_state'])
model.eval()
test_correct = 0
for x, y in tqdm(valLoader):
    x,y = x.to(device),y.to(device)
    pred = model(x)
    #print(x.shape, y.shape, pred.shape)
   # m = nn.Softmax(dim=1)
    
    #output = m(pred)
    pred = torch.softmax(pred, axis=1)
    #print(pred.shape, y.shape)
    #print(pred[31], y[31])
    #print(pred.argmax(1))
    test_correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_accuracy = test_correct / len(valSet)
test_accuracy

  0%|          | 0/13 [00:00<?, ?it/s]

0.7225

In [43]:
output.shape

torch.Size([16, 50])

In [50]:
print(pred[0])

tensor([1.0268e-04, 3.3234e-06, 5.6489e-06, 4.4848e-07, 9.9799e-01, 7.1323e-07,
        1.7499e-04, 1.4712e-06, 2.0768e-06, 9.8565e-05, 2.7042e-07, 1.1238e-08,
        1.1068e-06, 2.2016e-05, 7.2095e-06, 3.4180e-05, 4.5126e-07, 3.0162e-05,
        3.1720e-08, 5.1297e-08, 1.7508e-06, 1.3165e-06, 1.4295e-05, 7.0202e-06,
        1.4579e-06, 2.9428e-06, 4.3808e-04, 7.4460e-04, 2.7552e-06, 1.0887e-04,
        1.1663e-07, 6.7473e-06, 8.4361e-05, 3.2064e-07, 2.0250e-06, 4.8093e-07,
        4.3509e-08, 7.8027e-07, 1.2158e-05, 2.5891e-07, 1.3009e-08, 2.9017e-08,
        1.5319e-06, 9.9189e-08, 2.0839e-07, 3.5299e-08, 9.7745e-08, 3.3408e-08,
        1.4835e-07, 9.3706e-05], device='cuda:0', grad_fn=<SelectBackward0>)


In [53]:
print(y.shape)

torch.Size([16])
