In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#Directory of the python scripts that need to be imported - passed to sys.path.append()
SCRIPTS_PATH = '/content/drive/My Drive/TimbreTransformer/Scripts'

In [None]:
#The working directory passed to os.chdir()
DEFAULT_PATH = '/content/drive/My Drive/TimbreTransformer'

In [None]:
PARAMS = {'model_name':'flute_v1', 'input_audio': 'Data/Violin.mp3', 'output_audio': 'Data/Transformed.wav'}

In [None]:
import sys
sys.path.append(SCRIPTS_PATH)
import os
os.chdir(DEFAULT_PATH)

In [None]:
import import_audio
import process_audio
import custom_loss
import tensorflow as tf
import numpy as np
import model
import scipy
import scipy.io.wavfile
import json

In [None]:
gan = model.Model(PARAMS['model_name']).load_from_file()

In [None]:
input_audio = import_audio.Audio(PARAMS['input_audio'], n_fft=gan.model_params['n_fft'], srate=gan.model_params['srate'], shuffle_spec = False, shuffle_audio = False)



In [None]:
def predict(spectrogram, generator, input_shape = (129,500,1)):

    input, scaling = process_audio.partition(spectrogram, input_shape = input_shape)

    model_prediction = generator.predict(input)

    combined = np.squeeze(process_audio.combine(model_prediction, scaling))

    return combined

In [None]:
def spectrogram_to_audio(spec, phase, n_fft = 256, srate = 22050):
    """
    Creates a complex valued array that will be transformed to the final audio through the inverse fourier transform
    The complex array has value of Spec[a,b]x(cos(phase[a,b])+i sin(phase[a,b]))
    """
    a = spec.shape[1]
    b = phase.shape[1]

    #Prevents out of bounds error if the spec and phase arrays are of different sizes by using the minimum value
    max_length = min(a,b)
    
    ft = np.multiply(spec[:,:max_length], np.cos(phase[:,:max_length])) + 1j*np.multiply(spec[:,:max_length], np.sin(phase[:,:max_length]))

    audio = scipy.signal.istft(ft, fs = srate, nperseg = n_fft)[1]

    return audio

In [None]:
data = spectrogram_to_audio(predict(spectrogram=input_audio.ft.spec, generator = gan.generator, input_shape = gan.input_shape),phase = input_audio.ft.phase, n_fft = gan.model_params['n_fft'], srate = gan.model_params['srate'])

In [None]:
#Scale audio to range from 0 - 1
data = np.divide(data, np.amax(np.abs(data)))

In [None]:
scipy.io.wavfile.write(PARAMS['output_audio'], rate = gan.model_params['srate'],  data= data)