<a href="https://colab.research.google.com/github/jsansao/s3nr/blob/main/GeracaoMascarasVozSintetica_UNET.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Segmentação de imagens espectrográficas com UNET

## Geração do dataset de treinamento

### Carregando bibliotecas para geração dos espectrogramas e máscaras

In [1]:
pip install librosa matplotlib




In [2]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

### Carregando o dataset de treinamento

In [3]:
!git clone https://github.com/jsansao/synthvoice_unet.git

Cloning into 'synthvoice_unet'...
remote: Enumerating objects: 5241, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Total 5241 (delta 0), reused 3 (delta 0), pack-reused 5238[K
Receiving objects: 100% (5241/5241), 372.10 MiB | 25.53 MiB/s, done.
Resolving deltas: 100% (127/127), done.
Updating files: 100% (6423/6423), done.


In [4]:
def calcular_espectrograma(file_path):
    # Carregar o arquivo WAV
    audio_signal, sample_rate = librosa.load(file_path)

    # Calcular o espectrograma usando a STFT
    n_fft = 1024
    hop_length = int(n_fft * 0.1)  # Overlap de 90%

    array_length = len(audio_signal)
    #trecho = array_length // 8;  # corta pedaço do sinal

    trecho = array_length ;  # corta pedaço do sinal

    stft = librosa.stft(audio_signal[:trecho], n_fft=n_fft, hop_length=hop_length)

    # Converter o espectrograma para escala de decibéis
    #spectrogram = librosa.amplitude_to_db(np.abs(stft))

    # Calculate the number of elements to remove (20% from start and end)
    remove_percentage = 0.2

    num_elements_to_remove = int(len(stft) * remove_percentage)

    # Slice the array to remove elements (20% from start and end)
    trimmed_stft = stft[:, num_elements_to_remove:-num_elements_to_remove]
    spectrogram = np.abs(trimmed_stft)

    return spectrogram

In [5]:
def calcular_espectrograma_mascara(diretorio, diretorio_semruido, diretorio_saida, diretorio_mascara, limiar):

  # Listar todos os arquivos no diretório
  arquivos_wav = [f for f in os.listdir(diretorio) if f.endswith('.wav')]

  #espectrogramas_array = []

  # Iterar sobre cada arquivo e calcular o espectrograma
  for arquivo in arquivos_wav[:]:

    caminho_arquivo = os.path.join(diretorio, arquivo)
    caminho_arquivo_semruido = os.path.join(diretorio_semruido, arquivo)

    espectrograma = calcular_espectrograma(caminho_arquivo)
    espectrograma_semruido = calcular_espectrograma(caminho_arquivo_semruido)

    mascara = np.zeros_like(espectrograma_semruido)
    mascara[(espectrograma_semruido/np.max(espectrograma_semruido)) >= limiar] = 255

    #img_espectrograma = Image.fromarray(librosa.amplitude_to_db(espectrograma))
    espectrograma_int = ((espectrograma)/np.max(espectrograma) * 255).astype(np.uint8)
    img_espectrograma = Image.fromarray(espectrograma_int).convert('RGB')
    img_mascara = Image.fromarray(mascara.astype(np.uint8))

    arquivo_semextensao, _ = os.path.splitext(arquivo)

    arquivo_saida = arquivo_semextensao + '.png'

    caminho_arquivo_saida = os.path.join(diretorio_saida, arquivo_saida)
    caminho_mascara = os.path.join(diretorio_mascara, arquivo_saida)


    img_espectrograma.save(caminho_arquivo_saida)
    img_mascara.save(caminho_mascara)




In [9]:
def calcular_espectrograma_npy_mascara(diretorio, diretorio_semruido, diretorio_saida, diretorio_mascara, limiar):

  # Listar todos os arquivos no diretório
  arquivos_wav = [f for f in os.listdir(diretorio) if f.endswith('.wav')]

  #espectrogramas_array = []

  # Iterar sobre cada arquivo e calcular o espectrograma
  for arquivo in arquivos_wav[:]:

    caminho_arquivo = os.path.join(diretorio, arquivo)
    caminho_arquivo_semruido = os.path.join(diretorio_semruido, arquivo)

    espectrograma = calcular_espectrograma(caminho_arquivo)
    espectrograma_semruido = calcular_espectrograma(caminho_arquivo_semruido)

    mascara = np.zeros_like(espectrograma_semruido)
    mascara[(espectrograma_semruido/np.max(espectrograma_semruido))>= limiar] = 255

    arquivo_semextensao, _ = os.path.splitext(arquivo)

    arquivo_saida = arquivo_semextensao + '.npy'


    caminho_arquivo_saida = os.path.join(diretorio_saida, arquivo_saida)
    caminho_mascara = os.path.join(diretorio_mascara, arquivo_saida)

    np.save(caminho_arquivo_saida, espectrograma)
    np.save(caminho_mascara, mascara)


### Geração de imagens

In [10]:
import os


!rm -fr train

diretorio = '/content/synthvoice_unet/amostras_jitter_SNR/'
diretorio_semruido = '/content/synthvoice_unet/amostras_jitter_SNR_semruido/'

diretorio_saida = '/content/train/imgs/'
diretorio_mascara = '/content/train/masks/'

diretorio_saida_png = '/content/train/imgs_png/'
diretorio_mascara_png = '/content/train/masks_png/'


if not os.path.exists(diretorio_saida):
        os.makedirs(diretorio_saida)

if not os.path.exists(diretorio_mascara):
        os.makedirs(diretorio_mascara)

if not os.path.exists(diretorio_saida_png):
        os.makedirs(diretorio_saida_png)

if not os.path.exists(diretorio_mascara_png):
        os.makedirs(diretorio_mascara_png)

limiar = 0.1


calcular_espectrograma_npy_mascara(diretorio, diretorio_semruido, diretorio_saida, diretorio_mascara, limiar)

calcular_espectrograma_mascara(diretorio, diretorio_semruido, diretorio_saida_png, diretorio_mascara_png, limiar)



## Treinamento UNET

In [11]:
!git clone https://github.com/jsansao/UNet-Pytorch-Customdataset.git

Cloning into 'UNet-Pytorch-Customdataset'...
remote: Enumerating objects: 65, done.[K
remote: Counting objects: 100% (65/65), done.[K
remote: Compressing objects: 100% (52/52), done.[K
remote: Total 65 (delta 23), reused 42 (delta 9), pack-reused 0[K
Receiving objects: 100% (65/65), 24.22 MiB | 13.66 MiB/s, done.
Resolving deltas: 100% (23/23), done.


In [12]:
!rm -rf /content/UNet-Pytorch-Customdataset/data/

In [13]:
cd /content/UNet-Pytorch-Customdataset/

/content/UNet-Pytorch-Customdataset


In [14]:
!mv /content/train/ /content/UNet-Pytorch-Customdataset/data/

In [15]:
!python train.py --epochs 5 --batch-size 16 --scale 1.0 --validation 20.0 -c 2

INFO: Using device cuda
INFO: Creating dataset with 3211 examples
INFO: Scanning mask files to determine unique values
100% 3211/3211 [00:24<00:00, 132.10it/s]
INFO: Unique mask values: [0.0, 255.0]
Epoch 1/5:   1% 16/2569 [00:09<25:22,  1.68img/s]loss: 1.2111356258392334
Epoch 1/5:   1% 32/2569 [00:11<13:02,  3.24img/s]loss: 1.1095190048217773
Epoch 1/5:   2% 48/2569 [00:12<08:20,  5.03img/s]loss: 1.041003704071045
Epoch 1/5:   2% 64/2569 [00:14<06:54,  6.05img/s]loss: 0.9836644530296326
Epoch 1/5:   3% 80/2569 [00:16<06:06,  6.79img/s]loss: 0.9068325161933899
Epoch 1/5:   4% 96/2569 [00:17<05:37,  7.34img/s]loss: 0.8487563133239746
Epoch 1/5:   4% 112/2569 [00:19<05:17,  7.74img/s]loss: 0.8038882613182068
Epoch 1/5:   5% 128/2569 [00:21<05:04,  8.02img/s]loss: 0.7818914651870728
Epoch 1/5:   6% 144/2569 [00:23<04:55,  8.19img/s]loss: 0.6933959722518921
Epoch 1/5:   6% 160/2569 [00:25<04:49,  8.32img/s]loss: 0.6484963893890381
Epoch 1/5:   7% 176/2569 [00:27<04:43,  8.44img/s]loss: 0.

In [None]:
!python predict.py --model ./checkpoints/checkpoint_epoch5.pth -s 1.0 -i /content/UNet-Pytorch-Customdataset/data/imgs/SNR0218J000S000VaF220_1.npy --viz --output ./0_OUT.jpg

INFO: Loading model ./checkpoints/checkpoint_epoch10.pth
INFO: Using device cuda
INFO: Model loaded!
INFO: Predicting image /content/UNet-Pytorch-Customdataset/data/imgs/SNR0218J000S000VaF220_1.npy ...
INFO: Mask saved to ./0_OUT.jpg
INFO: Visualizing results for image /content/UNet-Pytorch-Customdataset/data/imgs/SNR0218J000S000VaF220_1.npy, close to continue...
Figure(640x480)


In [17]:
!cp  -r /content/UNet-Pytorch-Customdataset/checkpoints/ /content/drive/MyDrive/dataset/

In [18]:
!mkdir /content/UNet-Pytorch-Customdataset/masks_predict/

In [19]:

!python predict.py --model ./checkpoints/checkpoint_epoch5.pth -s 1.0 -t 0.95 -i /content/UNet-Pytorch-Customdataset/data/imgs/*.npy

[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
INFO: Predicting image /content/UNet-Pytorch-Customdataset/data/imgs/SNR0181J100S000ViF220_1.npy ...
INFO: Mask saved to /content/UNet-Pytorch-Customdataset/data/imgs/SNR0181J100S000ViF220_1_OUT.png
INFO: Predicting image /content/UNet-Pytorch-Customdataset/data/imgs/SNR0182J000S000VaF120_1.npy ...
INFO: Mask saved to /content/UNet-Pytorch-Customdataset/data/imgs/SNR0182J000S000VaF120_1_OUT.png
INFO: Predicting image /content/UNet-Pytorch-Customdataset/data/imgs/SNR0182J000S000VuF220_1.npy ...
INFO: Mask saved to /content/UNet-Pytorch-Customdataset/data/imgs/SNR0182J000S000VuF220_1_OUT.png
INFO: Predicting image /content/UNet-Pytorch-Customdataset/data/imgs/SNR0182J040S000VaF220_1.npy ...
INFO: Mask saved to /content/UNet-Pytorch-Customdataset/data/imgs/SNR0182J040S000VaF220_1_OUT.png
INFO: Predicting image /content/UNet-Pytorch-Customdataset/data/imgs/SNR0182J060S000ViF220_1.npy ...
INFO: Mask saved to /content/U

## Backup no Drive

In [20]:
!mv /content/UNet-Pytorch-Customdataset/data/imgs/*.png /content/UNet-Pytorch-Customdataset/masks_predict

In [21]:
!cp -r /content/UNet-Pytorch-Customdataset/masks_predict/ /content/drive/MyDrive/temp/

In [23]:
!cp -r /content/UNet-Pytorch-Customdataset/data/imgs_png/ /content/drive/MyDrive/temp/

In [26]:
!cp -r /content/UNet-Pytorch-Customdataset/data/masks_png/ /content/drive/MyDrive/temp/