In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
cd /content/gdrive/MyDrive/PhanLoaiAmThanhDongVat

/content/gdrive/MyDrive/PhanLoaiAmThanhDongVat


In [None]:
import sys
sys.path.append('/content/gdrive/My Drive')

In [None]:
import pandas as pd
import torch
import copy
import time
from pathlib import Path
from torch import nn
import tensorflow as tf
import torchvision.transforms as transforms
import torchvision.models as models
import torchaudio
import torch.nn.functional as F
from torch.utils.data import random_split
from torch.utils.tensorboard import SummaryWriter
from audio_util_load import AudioUtil
from sound_classification_dataset import SoundDS
import sklearn
from sklearn.model_selection import KFold
import numpy as np
import gc


In [None]:
import os

In [None]:

# Read metadata file
metadata_file = '/content/gdrive/MyDrive/PhanLoaiAmThanhDongVat/animalnewlabelled.csv'
df = pd.read_csv(metadata_file)
df.head()

Unnamed: 0,Filename,Label,ClassID,Path
0,Dolphin_17.wav,Dolphin,6,/content/gdrive/MyDrive/PhanLoaiAmThanhDongVat...
1,Dolphin_20.wav,Dolphin,6,/content/gdrive/MyDrive/PhanLoaiAmThanhDongVat...
2,Dolphin_16.wav,Dolphin,6,/content/gdrive/MyDrive/PhanLoaiAmThanhDongVat...
3,Dolphin_13.wav,Dolphin,6,/content/gdrive/MyDrive/PhanLoaiAmThanhDongVat...
4,Dolphin_39.wav,Dolphin,6,/content/gdrive/MyDrive/PhanLoaiAmThanhDongVat...


In [None]:
df.shape

(2177, 4)

In [None]:
writer = SummaryWriter()
myds = SoundDS(df)
# For fold results
results = {}

In [None]:
# Random split of 80:20 between training and validation
num_items = len(myds)
num_train = round(num_items * 0.8)
num_val = num_items - num_train
train_ds, val_ds = random_split(myds, [num_train, num_val])

# Create training and validation data loaders
train_dl = torch.utils.data.DataLoader(train_ds, batch_size=64, shuffle=True)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size=64, shuffle=False)

In [None]:
# Load the pre-trained ResNet-50 model
model = models.densenet161(pretrained=True)



In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
for param in model.parameters():
    param.requires_grad = False

model.classifier = nn.Sequential(
               nn.Linear(2208, 128),
               nn.ReLU(inplace=True),
               nn.Dropout(0.2),
               nn.Linear(128, 13),

               )

model = model.to(device)
# Check that it is on Cuda
next(model.parameters()).device

device(type='cpu')

In [None]:
# ----------------------------
# Inference
# ----------------------------
def inference (model, test_dl):
    correct_prediction = 0
    total_prediction = 0

    # Disable gradient updates
    with torch.no_grad():
        for data in test_dl:
            # Get the input features and target labels, and put them on the GPU
            inputs, labels = data[0].to(device), data[1].to(device)

            # Normalize the inputs
            inputs_m, inputs_s = inputs.mean(), inputs.std()
            inputs = (inputs - inputs_m) / inputs_s

            # Get predictions
            outputs = model(inputs)

            # Get the predicted class with the highest score
            _, prediction = torch.max(outputs,1)
            # Count of predictions that matched the target label
            correct_prediction += (prediction == labels).sum().item()
            total_prediction += prediction.shape[0]

    acc = correct_prediction/total_prediction
    print(f'Test Accuracy: {acc:.2f}, Total items: {total_prediction}')

In [None]:
def training(model, train_dl, num_epochs):
    # Loss Function, Optimizer and Scheduler
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=0.0001)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001,
                                                steps_per_epoch=int(len(train_dl)),
                                                epochs=num_epochs,
                                                anneal_strategy='linear')

    # Repeat for each epoch
    for epoch in range(num_epochs):
        running_loss = 0.0
        correct_prediction = 0
        total_prediction = 0

        # Repeat for each batch in the training set
        for i, data in enumerate(train_dl):
            # Get the input features and target labels, and put them on the GPU
            inputs, labels = data[0].to(device), data[1].to(device)

            # Normalize the inputs
            inputs_m, inputs_s = inputs.mean(), inputs.std()
            inputs = (inputs - inputs_m) / inputs_s

            # Zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            scheduler.step()

            # Keep stats for Loss and Accuracy
            running_loss += loss.item()

            # Get the predicted class with the highest score
            _, prediction = torch.max(outputs,1)
            # Count of predictions that matched the target label
            correct_prediction += (prediction == labels).sum().item()
            total_prediction += prediction.shape[0]
            #if i % 10 == 0:    # print every 10 mini-batches
            #    print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 10))
            del inputs_m
            del inputs_s
            del inputs,labels
            gc.collect()
        # Print stats at the end of the epoch
        num_batches = len(train_dl)
        avg_loss = running_loss / num_batches
        avg_acc = correct_prediction/total_prediction
        writer.add_scalar("Loss/train", avg_loss, epoch)
        writer.add_scalar("Acc/train", avg_acc, epoch)
        print(f'Epoch: {epoch}, Loss: {avg_loss:.2f}, Accuracy: {avg_acc:.2f}')

    print('Finished Training')


In [None]:
num_epochs=50
training(model, train_dl, num_epochs)

Epoch: 0, Loss: 2.53, Accuracy: 0.12


KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), '/content/gdrive/MyDrive/PhanLoaiAmThanhDongVat/densenet161foranimalNew.pt')

In [None]:
# Run inference on trained model with the validation set load best model weights
model_inf =models.densenet161(pretrained=True)
for param in model_inf.parameters():
    param.requires_grad = False
model_inf.classifier= nn.Sequential(
               nn.Linear(2208, 128),
               nn.ReLU(inplace=True),
               nn.Dropout(0.2),
               nn.Linear(128, 13),

               )
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_inf = model_inf.to(device)
model_inf.load_state_dict(torch.load('/content/gdrive/MyDrive/PhanLoaiAmThanhDongVat/densenet161foranimalNew.pt'))
model_inf.eval()
inference(model_inf, val_dl)

  model_inf.load_state_dict(torch.load('/content/gdrive/MyDrive/PhanLoaiAmThanhDongVat/densenet161foranimalNew.pt'))


Test Accuracy: 0.82, Total items: 435


In [None]:
# Load the saved state dictionary
state_dict = torch.load('/content/gdrive/MyDrive/PhanLoaiAmThanhDongVat/densenet161foranimalNew.pt')

# Create an instance of your model
model = models.densenet161(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
model.classifier = nn.Sequential(
               nn.Linear(2208, 128),
               nn.ReLU(inplace=True),
               nn.Dropout(0.2),
               nn.Linear(128, 13),
               )
# Load the state dictionary into the model
model.load_state_dict(state_dict)
model.eval()
inputs = AudioUtil.open('/content/gdrive/MyDrive/PhanLoaiAmThanhDongVat/DuLieuKiemTra/lion-roar-sound-effect.wav')
inputs = AudioUtil.rechannel(inputs,2)
inputs=AudioUtil.resample(inputs,44100)
inputs=AudioUtil.pad_trunc(inputs,4000)
inputs = AudioUtil.spectro_gram(inputs, n_mels=224, n_fft=4096, hop_len=None)

inputs = F.interpolate(inputs.unsqueeze(0), size=(224, 224), mode='bilinear').squeeze(0)
class_labels=[]
class_labels=["sư tử","gấu","Mèo","gà","bò","chó","cá heo","lừa","voi","ếch","ngựa","khỉ","cừu"]

        # Convert 2 channels to 3 channels by duplicating one of the channels
inputs = torch.cat([inputs, inputs[:1, :, :]], dim=0)
    # Make prediction using the model
with torch.no_grad():
     inputs=inputs.unsqueeze(0).to("cpu")
     inputs_m, inputs_s = inputs.mean(), inputs.std()
     inputs = (inputs - inputs_m) / inputs_s
     outputs = model(inputs)
     _, prediction = torch.max(outputs, 1)
rs=prediction[0].item()
print(f'Dự đoán nhãn : {rs}')
print("có thể là con : "+class_labels[rs])

  state_dict = torch.load('/content/gdrive/MyDrive/PhanLoaiAmThanhDongVat/densenet161foranimalNew.pt')


Dự đoán nhãn : 0
có thể là con : sư tử


In [None]:
cp /content/dolphin4_09.wav /content/gdrive/MyDrive/PhanLoaiAmThanhDongVat/DuLieuKiemTra/dolphin4_09.wav