In [None]:
!nvidia-smi -L

In [None]:
!apt install ffmpeg
!pip install spleeter
!pip install youtube-dl
from IPython.display import Audio
import librosa
import librosa.display
import youtube_dl
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob


folder = '/content/gdrive/MyDrive/DeepHouse/'
from google.colab import drive 
drive.mount('/content/gdrive/', force_remount=True) 

# Build Dataset

In [None]:
def splitSong(songPath,outPath , splitSize=300):
  !ffmpeg -i $songPath -f segment -segment_time 300 -c copy $outPath%03d.m4a

def splitSongs(path, out, remove=True):
  # iterate over songs, splitting into chunks
  for song in [f for f in os.listdir(path) if f.endswith('.m4a')]:
    split = song.split('.')[0]
    print('Splitting ' + str(split))
    splitSong(songPath=path+song,outPath=path+split+'_')
    !mkdir $out/$split/ # make the directory to ultimately save to
    
    # if remove, delete base song
    if remove:
      os.remove(path+song)

def downloadPlaylist(path, url):
    dlPath = "\'"+path+"%(playlist_index)s.%(ext)s\'"
    !youtube-dl -f 'bestaudio[ext=m4a]' -o $dlPath $url

def createTmpDir(path):
  ! rm -rf $path; mkdir $path

def saveBars(path, name, out):
  # load the song into librosa
  y, sr = librosa.load(path, sr=None)
  y_harmonic, y_percussive = librosa.effects.hpss(y)
  tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=sr)

  # create our time chunks, make all 16 beat bars, and only keep every 4
  bars_frame = [(beats[beat], beats[beat+15]) for beat in list(range(len(beats)-15))][0::4] 
  bars=librosa.frames_to_time(bars_frame)

  # save our cuts
  for idx, bar in enumerate(bars):
    barOut = out+name.split('_')[0]+'/'+ name.split('_')[1]+'_' +str(idx) + '_' + str(round(tempo,0)).split('.')[0] + '.m4a'
    length =  bar[1] - bar[0] # end - beginning
    start = bar[0]
    ! ffmpeg -ss $start -i $path -t $length -c copy $barOut

 # get a set of all 'parsed' files to skip them
def getExists(path):
  preproc = set() # do this once since listdir takes time
  for dir in os.listdir(path):
    for f in os.listdir(os.path.join(path, dir)):
      # change to file format in 'tmp file'
      preproc.add(dir+'_'+f.split('_')[0]) 

  return preproc

def processSongs(path, out, bpmCut=False): # path is location of src; out is dest
  songs = [f for f in os.listdir(path) if f.endswith('.m4a')]
  preproc = getExists(out) # set of all to skip, to allow for restarts 

  # for all songs
  for idx, song in enumerate(songs):
    name = song.split('.')[0]

    # process if new, else skip
    if name not in preproc: 
      print('Processing ' +str(round(100*idx/len(songs),2))+' | ' + str(song))
      # split the song delete the audio
      !spleeter separate -o $path $path$song -c m4a 

      # overwrite the song with the split vocals
      output = path+name+'/accompaniment.m4a'
      if bpmCut:
        saveBars(path=output, name=name, out=out)
      else:
        cp $output $out/$song

      # and delete the vocals + newly created folder
      newFolder = path+song.split('.')[0]
      ! rm -rf $newFolder
    else:
      print('Skipping ' +str(round(100 * idx/len(songs),2))+' | ' + str(song))

def createData(url, folder):
  # make a temp folder to save the songs in on disk
  tmpPath = '/content/musicDLtmp/'
  createTmpDir(tmpPath)
  
  # download the youtube playlist, and split the songs
  downloadPlaylist(tmpPath, url)
  splitSongs(tmpPath, folder) # 5 min chunks (max parse time for spleeter=10mins)

  # for each song, remove vocals, extract bpm and save the cut bars 
  processSongs(path=tmpPath, out=folder)

url = '' # youtube playlist here
dlFolder = '/content/gdrive/MyDrive/DeepHouse/1-Input/'
createData(url=url, folder=dlFolder)

# Train Prior on Dataset


Inspired by @Zaags Training Prior 1.5

In [None]:
%load_ext tensorboard
%tensorboard --logdir /content/gdrive/MyDrive/DeepHouse/deephouse_prior/logs

In [None]:
lemodel = 'deephouse' # your model name
leeventlog = False

audioDir = folder + '1-Input/DL/'

In [None]:
!pip install git+https://github.com/openai/jukebox.git
!git clone https://github.com/openai/jukebox.git
!pip install av
!pip install tensorboardX 
import os

lemodelpath = folder + lemodel + '_prior/checkpoint_latest.pth.tar'
leaudio_files_dir = dlFolder
lepriorname = lemodel + '_prior'

print(lemodelpath,leaudio_files_dir,lepriorname,os.path.isfile(lemodelpath))

############### To Change to Google Drive

fin = open("/usr/local/lib/python3.7/dist-packages/jukebox/utils/logger.py", "rt")
data = fin.read()
data = data.replace('logdir = f"{hps.local_logdir}/{hps.name}"', 'logdir = f"/content/gdrive/MyDrive/DeepHouse/{hps.name}"')
fin.close()
fin = open("/usr/local/lib/python3.7/dist-packages/jukebox/utils/logger.py", "wt")
fin.write(data)
fin.close()

############### To Sample with Google Drive

fin = open("/usr/local/lib/python3.7/dist-packages/jukebox/hparams.py", "rt")
data = fin.read()
fin.close()

data += lemodel + """_prior = Hyperparams()   
""" + lemodel + """_prior.update(small_prior)
""" + lemodel + """_prior.restore_prior='""" + lemodelpath + """'
""" + lemodel + """_prior.n_ctx=8192
""" + lemodel + """_prior.l_bins=2048
""" + lemodel + """_prior.level=2
""" + lemodel + """_prior.labels=False
HPARAMS_REGISTRY['""" + lemodel + """_prior'] = """ + lemodel + """_prior
"""

data = data.replace('min_duration=60.0','min_duration=24.0')
data = data.replace('max_duration=600.0','max_duration=666.0')

fin = open("/usr/local/lib/python3.7/dist-packages/jukebox/hparams.py", "wt")
fin.write(data)
fin.close()


####################### step print

fin = open("/content/jukebox/jukebox/train.py", "rt")
data = fin.read()
fin.close()

data = data.replace('log_inputs(orig_model, logger, x_in, y, x_out, hps)','log_inputs(orig_model, logger, x_in, y, x_out, hps);print(colored("steps:" + str(logger.iters),"green"))')
data = """from termcolor import colored

""" + data

if leeventlog == False:
  data = data.replace('logger.flush()','#logger.flush()')
  
fin = open("/content/jukebox/jukebox/train.py", "wt")
fin.write(data)
fin.close()

In [None]:
lehps = ('vqvae,small_prior,all_fp16,cpu_ema','vqvae,'+lepriorname+',all_fp16,cpu_ema')[os.path.isfile(lemodelpath)]
print(("new training model",lepriorname + " model found")[os.path.isfile(lemodelpath)])

!python jukebox/jukebox/train.py \
--prior --test --train --aug_shift --aug_blend \
--hps=$lehps \
--save_iters=500 \
--levels=3 \
--level=2 \
--labels=False \
--name=$lepriorname \
--sample_length=1048576     \
--bs=7 \
--audio_files_dir=$leaudio_files_dir  \
--n_ctx=8192 \
--weight_decay=0.01 

#--restore_prior="path/to/checkpoint" --lr_use_linear_decay --lr_start_linear_decay={already_trained_steps} --lr_decay={decay_steps_as_needed}

# Create Sample

In [None]:
# Stopped mid level = 2? use continue
# Stopped mid level = 1 or 0? use upsample

lemode = 'ancestral'     # 'ancestral','primed','continue','cutcontinue','upsample'

lecount = 15
lesample_length_in_seconds = 90
lesampling_temperature = .98
lehop = [.75,1,.125]                 #default [.5,.5,.125], optimized [1,1,0.0625]

lepath = '/content/gdrive/MyDrive/DeepHouse/2-Output/'

leprompt_length_in_seconds=0  
# leaudio_file = '/content/gdrive/MyDrive/DeepHouse/1-Input/DL/014_060.m4a'                    

lecut = 70               # used only on cutcontinue
transpose = [0,1,2]      # used only on cutcontinue [0,1,2] = default, ex [1,1,1] all samples are copied from item 1

leexportlyrics = False
leprogress = True
leautorename = True

leartist = "unknown"
legenre = "house"
lelyrics = ""

lecustommodellyrics = False

lechunk_size = 16 
lemax_batch_size = (17,3)['5b' in lemodel]
lelower_batch_size = lecount
lelower_level_chunk_size = lechunk_size * 2

In [None]:
if lemode=='ancestral':
  leprompt_length_in_seconds=None  
  leaudio_file = None
###############################################################################
###############################################################################

codes_file=None

!pip install git+https://github.com/openai/jukebox.git

##$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$#### autosave start
import os
from glob import glob

filex = "/usr/local/lib/python3.7/dist-packages/jukebox/sample.py"
fin = open(filex, "rt")
data = fin.read()
fin.close()

newtext = '''import fire
import os
from glob import glob

from termcolor import colored
from datetime import datetime

newtosample = True'''
data = data.replace('import fire',newtext)

newtext = '''starts = get_starts(total_length, prior.n_ctx, hop_length)
        counterr = 0
        x = None
        for start in starts:'''
data = data.replace('for start in get_starts(total_length, prior.n_ctx, hop_length):',newtext)

newtext = '''global newtosample
    newtosample = (new_tokens > 0)
    if new_tokens <= 0:'''
data = data.replace('if new_tokens <= 0:',newtext)

newtext = '''counterr += 1
            datea = datetime.now()		
            zs = sample_single_window(zs, labels, sampling_kwargs, level, prior, start, hps)			
            if newtosample and counterr < len(starts):
                del x; x = None; prior.cpu(); empty_cache()
                x = prior.decode(zs[level:], start_level=level, bs_chunks=zs[level].shape[0])
                logdir = f"{hps.name}/level_{level}"
                if not os.path.exists(logdir):
                    os.makedirs(logdir)
                t.save(dict(zs=zs, labels=labels, sampling_kwargs=sampling_kwargs, x=x), f"{logdir}/data.pth.tar")
                save_wav(logdir, x, hps.sr)
                del x; prior.cuda(); empty_cache(); x = None
            dateb = datetime.now()
            timex = ((dateb-datea).total_seconds()/60.0)*(len(starts)-counterr)
            print(f"Step " + colored(counterr,'blue') + "/" + colored( len(starts),'red') + " ~ New to Sample: " + str(newtosample) + " ~ estimated remaining minutes: " + (colored('???','yellow'), colored(timex,'magenta'))[counterr > 1 and newtosample])'''
data = data.replace('zs = sample_single_window(zs, labels, sampling_kwargs, level, prior, start, hps)',newtext)


newtext = """lepath=hps.name
        if level==2:
          for filex in glob(os.path.join(lepath + '/level_2','item_*.wav')):
            os.rename(filex,filex.replace('item_',lepath.split('/')[-1] + '-'))
        if level==1:
          for filex in glob(os.path.join(lepath + '/level_1','item_*.wav')):
            os.rename(filex,filex.replace('item_',lepath.split('/')[-1] + '-L1-'))
        if level==0:
          for filex in glob(os.path.join(lepath + '/level_0','item_*.wav')):
            os.rename(filex,filex.replace('item_',lepath.split('/')[-1] + '-L0-'))
        save_html("""
if leautorename:
  data = data.replace('save_html(',newtext)

if leexportlyrics == False:
  data = data.replace('if alignments is None','#if alignments is None')
  data = data.replace('alignments = get_alignment','#alignments = get_alignment')
  data = data.replace('save_html(','#save_html(')

if leprogress == False:
  data = data.replace('print(f"Step " +','#print(f"Step " +')
  
fin = open(filex, "wt")
fin.write(data)
fin.close()
##$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$#### autosave end

###CUSTOM MODEL############ To Sample with Google Drive
if not '1b' in lemodel and not '5b' in lemodel:
  lemodelpath = '/content/gdrive/MyDrive/DeepDeepHouse/' + lemodel + '_prior/checkpoint_latest.pth.tar'
  fin = open("/usr/local/lib/python3.7/dist-packages/jukebox/hparams.py", "rt")
  data = fin.read();  fin.close()
  
  if lecustommodellyrics:
    data += lemodel + """_prior = Hyperparams()   
""" + lemodel + """_prior.update(small_single_enc_dec_prior)
""" + lemodel + """_prior.restore_prior='""" + lemodelpath + """'
""" + lemodel + """_prior.n_ctx=8192
""" + lemodel + """_prior.alignment_layer=47
""" + lemodel + """_prioralignment_head=0
""" + lemodel + """_prior.l_bins=2048
""" + lemodel + """_prior.level=2
HPARAMS_REGISTRY['""" + lemodel + """_prior'] = """ + lemodel + """_prior
"""
  else:
    data += lemodel + """_prior = Hyperparams()   
""" + lemodel + """_prior.update(small_prior)
""" + lemodel + """_prior.restore_prior='""" + lemodelpath + """'
""" + lemodel + """_prior.n_ctx=8192
""" + lemodel + """_prior.l_bins=2048
""" + lemodel + """_prior.level=2
""" + lemodel + """_prior.labels=False
HPARAMS_REGISTRY['""" + lemodel + """_prior'] = """ + lemodel + """_prior
"""
    

  data = data.replace('y_bins=(10,100)','y_bins=(604, 7898)')
  data = data.replace('min_duration=60.0','min_duration=24.0')
  data = data.replace('max_duration=600.0','max_duration=666.0')

  fin = open("/usr/local/lib/python3.7/dist-packages/jukebox/hparams.py", "wt")
  fin.write(data);  fin.close()

  fin = open("/usr/local/lib/python3.7/dist-packages/jukebox/make_models.py", "rt")
  data = fin.read(); fin.close()  

  data = data.replace("#'your_model': ","'" +lemodel + "_model': ")
  data = data.replace('("you_vqvae_here", "your_upsampler_here", ..., "you_top_level_prior_here")','("vqvae",  "upsampler_level_0", "upsampler_level_1", "' + lemodel + '_prior"),')

  fin = open("/usr/local/lib/python3.7/dist-packages/jukebox/make_models.py", "wt")
  fin.write(data); fin.close()  

  lemodel = lemodel + "_model"
###CUSTOM MODEL END############

import jukebox
import torch as t
import librosa
import os

from datetime import datetime

from IPython.display import Audio
from jukebox.make_models import make_vqvae, make_prior, MODELS, make_model
from jukebox.hparams import Hyperparams, setup_hparams
from jukebox.sample import sample_single_window, _sample, \
                           sample_partial_window, upsample, \
                           load_prompts
from jukebox.utils.dist_utils import setup_dist_from_mpi
from jukebox.utils.torch_utils import empty_cache
rank, local_rank, device = setup_dist_from_mpi()

print(datetime.now().strftime("%H:%M:%S"))

model = lemodel
hps = Hyperparams()
hps.sr = 44100
hps.n_samples = lecount 
hps.name = lepath

chunk_size = lechunk_size
max_batch_size = lemax_batch_size

hps.levels = 3
hps.hop_fraction = lehop

vqvae, *priors = MODELS[model]
vqvae = make_vqvae(setup_hparams(vqvae, dict(sample_length = 1048576)), device)
top_prior = make_prior(setup_hparams(priors[-1], dict()), vqvae, device)

# Prime song creation using an arbitrary audio sample.
mode = lemode
codes_file=None
audio_file = leaudio_file
prompt_length_in_seconds=leprompt_length_in_seconds


if os.path.exists(hps.name):
  # Identify the lowest level generated and continue from there.
  for level in [0, 1, 2]:
    data = f"{hps.name}/level_{level}/data.pth.tar"
    if os.path.isfile(data):
      mode = mode if 'continue' in mode else 'upsample'
      codes_file = data
      print(mode + 'ing from level ' + str(level))
      break
print('mode is now '+mode)

sample_hps = Hyperparams(dict(mode=mode, codes_file=codes_file, audio_file=audio_file, prompt_length_in_seconds=prompt_length_in_seconds))

sample_length_in_seconds = lesample_length_in_seconds 
hps.sample_length = (int(sample_length_in_seconds*hps.sr)//top_prior.raw_to_tokens)*top_prior.raw_to_tokens
assert hps.sample_length >= top_prior.n_ctx*top_prior.raw_to_tokens, f'Please choose a larger sampling rate'

metas = [dict(artist = leartist,
            genre = legenre,
            total_length = hps.sample_length,
            offset = 0,
            lyrics = lelyrics,
            ),
          ] * hps.n_samples
labels = [None, None, top_prior.labeller.get_batch_labels(metas, 'cuda')]


#----------------------------------------------------------2


sampling_temperature = lesampling_temperature
lower_batch_size = lelower_batch_size
max_batch_size = lemax_batch_size
lower_level_chunk_size = lelower_level_chunk_size
chunk_size = lechunk_size 
sampling_kwargs = [dict(temp=.99, fp16=True, max_batch_size=lower_batch_size,
                        chunk_size=lower_level_chunk_size),
                    dict(temp=.99, fp16=True, max_batch_size=lower_batch_size,
                         chunk_size=lower_level_chunk_size),
                    dict(temp=sampling_temperature, fp16=True, 
                         max_batch_size=max_batch_size, chunk_size=chunk_size)]

print(sample_hps.mode)
print(sample_hps.prompt_length_in_seconds)
print(hps.sr)
print(top_prior.raw_to_tokens)
print('aaaaaaaaaaaaaaaaaaaaaaaaaaaa 4.55')

if sample_hps.mode == 'ancestral':
  zs = [t.zeros(hps.n_samples,0,dtype=t.long, device='cuda') for _ in range(len(priors))]
  zs = _sample(zs, labels, sampling_kwargs, [None, None, top_prior], [2], hps)
elif sample_hps.mode == 'upsample':
  assert sample_hps.codes_file is not None
  # Load codes.
  data = t.load(sample_hps.codes_file, map_location='cpu')
  zs = [z.cuda() for z in data['zs']]
  assert zs[-1].shape[0] == hps.n_samples, f"Expected bs = {hps.n_samples}, got {zs[-1].shape[0]}"
  del data
  print('Falling through to the upsample step later in the notebook.')
elif sample_hps.mode == 'primed':
  assert sample_hps.audio_file is not None
  audio_files = sample_hps.audio_file.split(',')
  duration = (int(sample_hps.prompt_length_in_seconds*hps.sr)//top_prior.raw_to_tokens)*top_prior.raw_to_tokens
  
  x = load_prompts(audio_files, duration, hps)
  zs = top_prior.encode(x, start_level=0, end_level=len(priors), bs_chunks=x.shape[0])
  zs = _sample(zs, labels, sampling_kwargs, [None, None, top_prior], [2], hps)
elif sample_hps.mode == 'continue':
  data = t.load(sample_hps.codes_file, map_location='cpu')
  zs = [z.cuda() for z in data['zs']]
  zs = _sample(zs, labels, sampling_kwargs, [None, None, top_prior], [2], hps)
elif sample_hps.mode == 'cutcontinue':
  print('-------CUT INIT--------')
  lecutlen = (int(lecut*hps.sr)//top_prior.raw_to_tokens)*top_prior.raw_to_tokens
  print(lecutlen)
  data = t.load(codes_file, map_location='cpu')
  zabaca = [z.cuda() for z in data['zs']]
  print(zabaca)
  assert zabaca[-1].shape[0] == hps.n_samples, f"Expected bs = {hps.n_samples}, got {zs[-1].shape[0]}"
  priorsz = [top_prior] * 3
  top_raw_to_tokens = priorsz[-1].raw_to_tokens
  assert lecutlen % top_raw_to_tokens == 0, f"Cut-off duration {lecutlen} not an exact multiple of top_raw_to_tokens"
  assert lecutlen//top_raw_to_tokens <= zabaca[-1].shape[1], f"Cut-off tokens {lecutlen//priorsz[-1].raw_to_tokens} longer than tokens {zs[-1].shape[1]} in saved codes"
  zabaca = [z[:,:lecutlen//prior.raw_to_tokens] for z, prior in zip(zabaca, priorsz)]
  hps.sample_length = lecutlen
  print(zabaca)
  zs = _sample(zabaca, labels, sampling_kwargs, [None, None, top_prior], [2], hps)
  del data
  print('-------CUT OK--------')
  hps.sample_length = (int(sample_length_in_seconds*hps.sr)//top_prior.raw_to_tokens)*top_prior.raw_to_tokens
  data = t.load(sample_hps.codes_file, map_location='cpu')
  zibica = [z.cuda() for z in data['zs']]
  zubu = zibica[:]
  if transpose != [0,1,2]:
    zubu[2][0] = zibica[:][2][transpose[0]];zubu[2][1] = zibica[:][2][transpose[1]];zubu[2][2] = zibica[:][2][transpose[2]]
    zubu[1][0] = zibica[:][1][transpose[0]];zubu[1][1] = zibica[:][1][transpose[1]];zubu[1][2] = zibica[:][1][transpose[2]]
    zubu[0][0] = zibica[:][0][transpose[0]];zubu[0][1] = zibica[:][0][transpose[1]];zubu[0][2] = zibica[:][0][transpose[2]]
  zubu = _sample(zubu, labels, sampling_kwargs, [None, None, top_prior], [2], hps)
  print('-------CONTINUE AFTER CUT OK--------')
  zs = zubu
else:
  raise ValueError(f'Unknown sample mode {sample_hps.mode}.')



print(datetime.now().strftime("%H:%M:%S"))

In [None]:
print(datetime.now().strftime("%H:%M:%S"))
del top_prior
empty_cache()
top_prior=None

upsamplers = [make_prior(setup_hparams(prior, dict()), vqvae, 'cpu') for prior in priors[:-1]]
labels[:2] = [prior.labeller.get_batch_labels(metas, 'cuda') for prior in upsamplers]

zs = upsample(zs, labels, sampling_kwargs, [*upsamplers, top_prior], hps)
print(datetime.now().strftime("%H:%M:%S"))