In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip ./drive/MyDrive/vit_sr/pickled16.zip -d .

In [None]:
!rm ./UnzippedDataset/train/113166.mp3

In [None]:
import os
import math
import glob
import pickle

import librosa
from scipy import signal

import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf
from tensorflow.keras.layers import Conv1D, Conv1DTranspose, BatchNormalization, LeakyReLU, Add

In [None]:
def build_generator(input_size=(32, 129)):
    inputs = tf.keras.Input(input_size)

    # 32 x 129
    x = Conv1D(256, 7, strides=2,padding='same')(inputs)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)
    
    x_skip_1 = x

    x = Conv1D(512, 5, strides=2,padding='same')(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    x_skip_2 = x

    x = Conv1D(512, 3, strides=2,padding='same')(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    x_skip_3 = x

    x = Conv1D(1024, 3, strides=2, padding='same')(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    

    x = Conv1DTranspose(512, 3,strides=2, padding='same')(x)
    x = Add()([x, x_skip_3])

    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    

    x = Conv1DTranspose(512, 5,strides=2, padding='same')(x)

    x = Add()([x, x_skip_2])

    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    

    x = Conv1DTranspose(256, 7,strides=2, padding='same')(x)
    x = Add()([x, x_skip_1])

    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv1DTranspose(128, 9,strides=2, padding='same')(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    model = tf.keras.Model(inputs, x)
    return model

In [None]:
def build_discriminator():
    input = tf.keras.Input((32, 128))

    x = Conv1D(1024, 7, strides=2, padding='same')(input)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv1D(1024, 5, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = Conv1D(1024, 3, strides=2, padding='same')(x)
    x = LeakyReLU(alpha=0.2)(x)

    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(2048)(x)
    x = tf.keras.layers.Dense(1,activation='sigmoid')(x)

    model = tf.keras.Model(input, x)
    return model

In [None]:
generator = build_generator()

discriminator = build_discriminator()
d_optim = tf.keras.optimizers.Adam(learning_rate=10e-7, beta_1=0.5)
discriminator.compile(optimizer=d_optim, loss='binary_crossentropy',metrics=['accuracy'])


z = tf.keras.Input((32, 129))
fake = generator(z)
discriminator.trainable = False
valid = discriminator(fake)

combined = tf.keras.Model(z, [fake, valid])

c_optim = tf.keras.optimizers.Adam(learning_rate=10e-4, beta_1=0.5)
# define the loss for the combined model as the adversarial loss and the content loss (MSE) with weighting
combined.compile(optimizer=c_optim, loss=['mse', 'binary_crossentropy'], loss_weights=[0.1, 0.001])

In [None]:
def convert_audio(path):
    song,sr = librosa.load(path,sr=16000)
    stft = librosa.stft(np.asarray(song), n_fft=512, window='hamming', hop_length=256)
    spectrogram = librosa.amplitude_to_db(np.abs(stft))
    spectrogram = (spectrogram - np.min(spectrogram)) / (np.max(spectrogram) - np.min(spectrogram))
    
    DIM = 32
    PTS = spectrogram.shape[1]//DIM
    lb = []
    hb = []
    for i in range(PTS):
        lb.append([spectrogram[:129,i*DIM:(i+1)*DIM]])
        hb.append([spectrogram[129:,i*DIM:(i+1)*DIM]])
    lb = np.array(lb)
    hb = np.array(hb)

    lb = lb.reshape(-1,32,129)
    hb = hb.reshape(-1,32,128)

    lb = lb[:32,:,:]
    hb = hb[:32,:,:]
    return lb,hb

In [None]:
def train(generator,discriminator,gan,epochs,batch_size,train_path):
    d_losses = []
    g_losses = []

    ones=np.ones(batch_size*32)
    zeros=np.zeros(batch_size*32)
    files = glob.glob(train_path+"/*.mp3")

    for epoch in range(epochs):
      np.random.shuffle(files)
      batch_lb = []
      batch_hb = []
      for file in np.random.choice(files,batch_size):
        lb,hb = convert_audio(file)
        batch_lb.append(lb)
        batch_hb.append(hb)
      batch_lb = np.array(batch_lb)
      batch_hb = np.array(batch_hb)
      
      try:
        batch_lb = batch_lb.reshape(-1,32,129)
        batch_hb = batch_hb.reshape(-1,32,128)
      except:
        continue
      
      fake_hb=generator.predict(batch_lb)

      d_loss_real, d_acc_real = discriminator.train_on_batch(batch_hb,ones)
      d_loss_fake,d_acc_fake = discriminator.train_on_batch(fake_hb,zeros)

      d_loss = 0.5 * (d_loss_real + d_loss_fake)

      g_loss = gan.train_on_batch(batch_lb, [batch_hb, ones])

      d_losses.append(d_loss)
      g_losses.append(g_loss)

      if epoch % 1 == 0:
          print(f"epoch: {epoch+1}/{epochs}, d_loss: {d_loss}, g_loss: {g_loss}")

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
j = 0
while True:
  train(generator, discriminator, combined, 100, 16, "./UnzippedDataset/train")
  combined.loss.loss_weights=[0.1, 0.001+j*0.002]
  generator.save(f'gen_norm_{j}.h5')
  j += 1