## Модель классификации шум/ чистый звук

In [1]:
import numpy as np
import os
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch
from early_stopping import EarlyStopping 
import torchvision.models as models
import torchvision
from torchvision import datasets, models, transforms

In [2]:
def create_classification_dataset(dir_path):
    dir_path ="train"
    clean_path ="clean"
    noisy_path ="noisy"
    train_packages =os.listdir(os.path.join(dir_path, clean_path))
    train =[]
    for train_package in train_packages:
        clean_package =os.path.join(dir_path,clean_path,train_package)
        mel_files =os.listdir(clean_package)
        for mel_file in mel_files:
            mel_clean =os.path.join(clean_package,mel_file)
            mel_noisy =os.path.join(dir_path, noisy_path,train_package,mel_file)
            train.append({"path" :mel_clean, "label": 0})
            train.append({"path" :mel_noisy, "label": 1})
    return pd.DataFrame.from_dict(train)        

In [3]:
# train_dataset =create_classification_dataset("train")
# train_dataset =train_dataset.sample(frac=1)
# train_dataset=train_dataset.reset_index(drop=True)
# train_dataset.to_csv("train_dataset.csv")
# val_dataset =create_classification_dataset("val")
# val_dataset =val_dataset.sample(frac=1)
# val_dataset=val_dataset.reset_index(drop=True)
# val_dataset.to_csv("val_dataset.csv")

In [4]:
def preprocess_numpy(numpy_array):
        numpy_array =(numpy_array -numpy_array.min())/ (numpy_array.max()- numpy_array.min()) *255
        img =Image.fromarray(numpy_array.astype(np.uint8))
        img = img.convert("RGB")
        return img

In [5]:
class DenoisingClassificationDataset(Dataset):    
    """Sound denoising dataset."""

    def __init__(self, train_path, transform=None):
        self.train_path = train_path
        self.data =pd.read_csv(self.train_path)
        self.transform = transform

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        data_row =self.data.iloc[idx]
        path =data_row["path"]
        label= data_row["label"]
        array =np.load(path)
        if(self.transform):
            img = self.transform(array)
        return img,label

In [6]:
def augmentation_simple(image_size):
    return torchvision.transforms.Compose([
        torchvision.transforms.Lambda(preprocess_numpy),
        torchvision.transforms.Resize(image_size),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(
            [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])


def augmentation_standart(image_size):
    return torchvision.transforms.Compose([
        torchvision.transforms.Lambda(preprocess_numpy),
        torchvision.transforms.Resize(image_size),
        torchvision.transforms.RandomHorizontalFlip(),
        torchvision.transforms.ColorJitter(0.3,0.3,0,0),
        torchvision.transforms.RandomVerticalFlip(),
        torchvision.transforms.RandomRotation(10),
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Normalize(
            [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])



In [7]:
train_data=DenoisingClassificationDataset("train_dataset.csv",augmentation_standart((756,80)))
val_data=DenoisingClassificationDataset("val_dataset.csv",augmentation_simple((756,80)))

In [8]:
train_loader =DataLoader(train_data, batch_size=64, shuffle=True)
val_loader =DataLoader(val_data, batch_size=64, shuffle=True)

In [9]:
num_epochs =50
learning_rate =0.001
early_stopping =EarlyStopping(checkpoint_name="classifier_noise.pth")

In [10]:
model =models.resnet18(pretrained=True)

In [11]:
for ind, param in enumerate(model.named_parameters()):
            layer = param[0].split('.')[0]
            if (layer == 'layer4'):
                param[1].requires_grad = True
            else:
                param[1].requires_grad = False

In [12]:
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 1)
model=model.cuda()

In [13]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,
                             weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 10, gamma=0.1, last_epoch=-1)

In [14]:
def cross_validation(model,criterion, val_loader):
    epoch_loss =0
    epoch_accuracy =0
    count =0
    total_number =0
    with torch.no_grad():
        for images, labels in val_loader:
            model.eval()
            images = images.cuda()
            labels = labels.type(torch.FloatTensor).cuda()

            # ===================forward=====================
            output = model(images)
            output = output.squeeze()
            loss = criterion(output, labels)
            epoch_loss+=loss.cpu().detach().numpy()
            preds = np.where(output.cpu().detach().numpy() > 0.5, 1, 0)
            accuracy = np.sum(preds == labels.cpu().detach().numpy())
            total_number +=images.shape[0]
            epoch_accuracy +=accuracy
         

        # ===================log========================
        print('epoch val loss:{:.4f}, accuracy:{:.4f}'.format(epoch_loss/total_number, epoch_accuracy/total_number))
    return epoch_loss/total_number

In [15]:
for epoch in range(num_epochs):
    print("Epoch: {}/{}".format(epoch, num_epochs), 'LR:', scheduler.get_lr())
    epoch_train_loss =0
    epoch_accuracy =0
    model.train()
    total_number =0
    for images, labels in train_loader:
        images = images.cuda()
        labels = labels.type(torch.FloatTensor).cuda()
#         print(images.shape,labels.shape)
        # ===================forward=====================
        output = model(images)
        output = output.squeeze()
        loss = criterion(output, labels)
        epoch_train_loss+=loss.cpu().detach().numpy()

        preds = np.where(output.cpu().detach().numpy() > 0.5, 1, 0)
        accuracy = np.sum(preds == labels.cpu().detach().numpy())
        total_number +=images.shape[0]
        epoch_accuracy +=accuracy
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # ===================log========================
    print('epoch train loss:{:.4f}, accuracy:{:.4f}'.format(epoch_train_loss/total_number, epoch_accuracy/total_number))
    epoch_loss=cross_validation(model,criterion, val_loader)
    early_stopping(epoch_loss, model)
    scheduler.step()
    
    if early_stopping.early_stop:
        print("Early stopping")
        break
      


Epoch: 0/50 LR: [0.001]
epoch train loss:0.0024, accuracy:0.9407
epoch val loss:0.0022, accuracy:0.9600
Epoch: 1/50 LR: [0.001]
epoch train loss:0.0018, accuracy:0.9570
epoch val loss:0.0013, accuracy:0.9709
Epoch: 2/50 LR: [0.001]
epoch train loss:0.0015, accuracy:0.9628
epoch val loss:0.0013, accuracy:0.9709
EarlyStopping counter: 1 out of 7
Epoch: 3/50 LR: [0.001]
epoch train loss:0.0014, accuracy:0.9647
epoch val loss:0.0011, accuracy:0.9756
Epoch: 4/50 LR: [0.001]
epoch train loss:0.0014, accuracy:0.9659
epoch val loss:0.0013, accuracy:0.9654
EarlyStopping counter: 1 out of 7
Epoch: 5/50 LR: [0.001]
epoch train loss:0.0013, accuracy:0.9689
epoch val loss:0.0011, accuracy:0.9772
EarlyStopping counter: 2 out of 7
Epoch: 6/50 LR: [0.001]
epoch train loss:0.0013, accuracy:0.9697
epoch val loss:0.0016, accuracy:0.9677
EarlyStopping counter: 3 out of 7
Epoch: 7/50 LR: [0.001]
epoch train loss:0.0013, accuracy:0.9702
epoch val loss:0.0012, accuracy:0.9766
EarlyStopping counter: 4 out of 