In [None]:
import os
import numpy as np
from os.path import join
import librosa
import subprocess
import spleeter
import shutil
from python_speech_features import mfcc
import torch
import torchaudio
import torchaudio.functional as F
import torchaudio.transforms as T
import pickle

In [None]:
class Audio_Util:
  def __init__(self,rate,mono,max_dur):
    super(Audio_Util).__init__()
    self.rate = rate
    self.mono = mono
    self.max_dur = max_dur
    
  def split_aud(self,filename):
    !spleeter separate -p spleeter:2stems -o output/ {filename}

  def load_aud(self,filename):
    sig,sr = librosa.load(filename,mono = self.mono,sr = self.rate)
    return ((sig,sr))

  def wave2mfcc(self,path):
    aud = self.load_aud(path)
    sig,sr = aud
    mfcc_feat = mfcc(sig,sr)
    mfcc_data= np.swapaxes(mfcc_feat, 0 ,1)
    mfcc_data = torch.Tensor(mfcc_data)
    return (mfcc_data)

  def spectro_gram(self,path,n_mels=64, n_fft=20480, hop_len=None):
    aud = self.load_aud(path)
    sig,sr = aud
    sig = torch.Tensor(sig)
    #top_db = 80

    spec = T.MelSpectrogram(sr, n_fft=n_fft, hop_length=hop_len, n_mels=n_mels)(sig)
    spec = T.AmplitudeToDB(top_db=80)(spec)
    spec = torch.add(spec[0],spec[1])
    return spec

  def wavenet_encode(self, file):
    ckpts = "/content/gdrive/MyDrive/wavenet-ckpt/wavenet-ckpt/model.ckpt-200000"
    sr = 22050
    audio = u.load_audio(file,sample_length=6615000,sr = sr)
    #audio = librosa.load(file, sr=sr, mono=True, duration=300)
    #audio = np.array(audio)
    encoding = fastgen.encode(audio, ckpts, len(audio))
    return encoding

  def main(self,song):
    filename = '/content/gdrive/MyDrive/music_data_X/'+song
    try:
      output_dict = {}
      # raw = self.wavenet_encode(filename)
      # output_dict["original"] = raw
      raw = self.spectro_gram(filename)
      output_dict["original"] = raw
      self.split_aud(filename)
      song = song[:-4]
      output_file = "/content/output/"+song
      for file in os.listdir(output_file):
        name = file[:-4]
        if(name == "vocals"):
          path = os.path.join(output_file,file)
          output_dict[name] = self.wave2mfcc(path)
        else:
          path = os.path.join(output_file,file)
          output_dict[name] = self.wavenet_encode(path)
      shutil.rmtree(output_file)
      return output_dict
    except Exception as e:
      print(e)

  def df_loader(self,df,column_name):
    file_list = df[column_name].tolist()
    for file in file_list:
      curr_dict = self.main(file)
      file = file[:-4]
      file += ".pickle"
      filename = os.path.join("/content/gdrive/MyDrive/Dictionary_data",file)
      with open(filename, 'wb') as handle:
        pickle.dump(curr_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
Audio_dl = Audio_Util(22050,False,300)
Audio_dl.df_loader(data,"URI_Path")

In [None]:
def flat(file):
  filepath = '/content/gdrive/MyDrive/Dict_y_data/'+file
  filepath2 = '/content/gdrive/MyDrive/Original_y_dict/'+file
  output_dict = pd.read_pickle(filepath)
  output_dict2 = pd.read_pickle(filepath2)
  output_dict['vocals'] = output_dict['vocals'].numpy()
  output_dict2['original'] = output_dict2['original'].numpy()
  output_dict['original'] = output_dict2['original']
  for vector in output_dict:
    #output_dict[vector] = output_dict[vector].numpy()
    min_v = np.min(output_dict[vector])
    range_v = np.max(output_dict[vector]) - min_v
    output_dict[vector] = (output_dict[vector] - min_v) / range_v
  output_dict['accompaniment']=output_dict['accompaniment'].reshape(1,-1)
  output_dict['vocals']=output_dict['vocals'].reshape(1,-1)
  output_dict['original']=output_dict['original'].reshape(1,-1)
  return output_dict

def flat_loader(df,column_name):
    file_list = df[column_name].tolist()
    for file in file_list:
      print(file)
      file = file[:-4]
      file += ".pickle"
      curr_dict = flat(file)
      filename = os.path.join("/content/gdrive/MyDrive/Vocals_Accompain",file)
      with open(filename, 'wb') as handle:
        pickle.dump(curr_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
def reshape_stack(df, column_name):
  file_list = df[column_name].tolist()
  for file in file_list:
    print(file)
    file = file[:-4]
    file += ".pickle"
    filename = os.path.join("/content/gdrive/MyDrive/Dict_y_data",file)
    output_dict = pd.read_pickle(filename)
    curr_dict={}
    for key in output_dict:
      key_s = output_dict[key].shape[1]
      if key_s>16384:
        curr_dict[key] = output_dict[key][0][:16384].reshape(128,128)
      else :
        pad = 16384 - key_s
        curr_dict[key] = np.pad(output_dict[key][0], (0,pad), 'constant', constant_values=0).reshape(128,128)
    final = np.stack((curr_dict['accompaniment'], curr_dict['original'],curr_dict['vocals']))
    with open(filename, 'wb') as handle:
        pickle.dump(final, handle, protocol=pickle.HIGHEST_PROTOCOL)