# Install pix2pix

In [None]:
!git clone https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix

In [None]:
import os
os.chdir('pytorch-CycleGAN-and-pix2pix/')

In [None]:
!pip install -r requirements.txt

# Pretrained models

Download one of the official pretrained models with:

-   `bash ./scripts/download_pix2pix_model.sh [edges2shoes, sat2map, map2sat, facades_label2photo, and day2night]`

Or add your own pretrained model to `./checkpoints/{NAME}_pretrained/latest_net_G.pt`

In [None]:
!bash ./scripts/download_pix2pix_model.sh day2night

# Training

In [None]:
!python train.py --dataroot ./datasets/facades --name facades_pix2pix --model pix2pix --direction BtoA

# Data processing

In [None]:
import os
import numpy as np
import torchaudio
from pydub import AudioSegment
import matplotlib.pyplot as plt
import librosa
import scipy.io.wavfile as wavfile
import scipy
import cv2

In [None]:
# def audio_to_spectrogram(filename, n_fft=2048):
#     waveform, sample_rate = torchaudio.load(filename, normalize=True)
    
#     melspec_transform = torchaudio.transforms.MelSpectrogram(
#         sample_rate=sample_rate,
#         n_fft=n_fft
#     )
    
#     waveform = torch.mean(waveform, 0)
    
#     spectro = melspec_transform(waveform)
#     # spectro = librosa.power_to_db(spectro)
#     # plt.imshow(spectro)
    
#     return spectro, sample_rate
    
# def spectrogram_to_audio(spectro, n_fft=2048, sample_rate=44100):
#     inv_melspec_transform = torchaudio.transforms.InverseMelScale(
#         sample_rate=sample_rate, 
#         n_stft=int(n_fft/2 + 1)
#     )
#     spectro = inv_melspec_transform(spectro)
    
#     grifflim_transform = torchaudio.transforms.GriffinLim(n_fft=n_fft)
#     audio = grifflim_transform(spectro)

#     torchaudio.save('data/attenborough/our_planet_1_4_NEW.wav', audio, sample_rate)

def waveform_to_spectrogram(filename):
    waveform, sample_rate = librosa.load(filename)
    
    D = np.abs(librosa.stft(waveform))**2
    spectro = librosa.feature.melspectrogram(
        y=waveform, 
        sr=sample_rate, 
        S=D
    )
    
    return spectro, sample_rate


def spectrogram_to_audio(spectro, filename, sample_rate=44100):
    waveform = librosa.feature.inverse.mel_to_audio(spectro)
    
    # Conver to tensor so we can save using torchaudio
    waveform = np.reshape(waveform, (1, -1))
    waveform = torch.from_numpy(waveform)
    
    torchaudio.save(filename, waveform, sample_rate)
    
    
def spectrogram_to_image(spectro, filename, img_size=(128,128)):
    # Convert to decibel scale
    spectro_db = librosa.power_to_db(spectro)
    spectro_db = cv2.resize(spectro_db, dsize=img_size)
    
    # Rescale pixel values to be between 0 and 255
    spectro_db = spectro_db - np.min(spectro_db)
    spectro_db = spectro_db * 255.0 / np.max(spectro_db)

    # plt.imshow(spectro_db)
    cv2.imwrite(filename, spectro_db)

In [None]:
dir_input = 'data/attenborough'
dir_output = 'data/gtts'


# # Convert all files into .wav format
# def convert_to_wav(dir_name):
#     TOTAL_FILES = len(os.listdir(dir_name))
#     for i, filename in enumerate(os.scandir(dir_name)):
#         fn = os.path.splitext(filename)[0]
#         fn = filename.path.split("\\")[-1].split(".")[0]

#         sound = AudioSegment.from_mp3(filename.path)
#         sound.export(dir_name + "_wav/" + fn + ".wav", format="wav")

#         if (i + 1) % 100 == 0:
#             print("Converted: ", i + 1, "/", TOTAL_FILES, " files")


# convert_to_wav(dir_input)
# convert_to_wav(dir_output)


# Convert audio files to spectrogram
def wav_to_spectro(dir_name):
    TOTAL_FILES = len(os.listdir(dir_name))
    for i, filename in enumerate(os.scandir(dir_name)):
        fn = os.path.splitext(filename)[0]
        fn = filename.path.split("\\")[-1].split(".")[0]

        spectro, sample_rate = waveform_to_spectrogram(filename.path)
        spectrogram_to_image(spectro, dir_name + "_spectro/" + fn + ".png")

        if (i + 1) % 100 == 0:
            print("Converted: ", i + 1, "/", TOTAL_FILES, " files")


wav_to_spectro(dir_input)
wav_to_spectro(dir_output)