In [None]:
import sys
import os
import torch
import torchaudio
import librosa

subfolder_path = os.path.abspath("audiocraft")
sys.path.append(subfolder_path)

import numpy as np
import json

from scipy.stats import zscore

from IPython.display import Audio, display
#from transformers import AutoProcessor, MusicgenMelodyForConditionalGeneration

import torchaudio
from audiocraft.models import MusicGen
from audiocraft.utils.notebook import display_audio
from audiocraft.utils import export

In [None]:
#This block is for loading the trained models into the audiocraft module for using them to generate music with the python package
model_name = 'with_submixes_25ep'
export.export_lm(f'/home/sd3705/music_gen_2024f/audiocraft_output_sd3705/xps/{model_name}/checkpoint.th', f'../audiocraft/checkpoints/{model_name}/state_dict.bin')
export.export_pretrained_compression_model('facebook/encodec_32khz', f'../audiocraft/checkpoints/{model_name}/compression_state_dict.bin')

model_name = 'with_submixes_description_25ep'
export.export_lm(f'/home/sd3705/music_gen_2024f/audiocraft_output_sd3705/xps/{model_name}/checkpoint.th', f'../audiocraft/checkpoints/{model_name}/state_dict.bin')
export.export_pretrained_compression_model('facebook/encodec_32khz', f'../audiocraft/checkpoints/{model_name}/compression_state_dict.bin')

In [None]:
slakh_path = "/engram/naplab/shared/Slakh2100/slakh2100_flac_redux" #Dataset path is defined (this should be edited to run in another machine)
track_path = "test/Track01881" #One track from the test set is chosen to use as the reference track

In [None]:
aud_input, sr = torchaudio.load(os.path.join(slakh_path,track_path,'stems','S02.flac')) #The acoustic piano channel of the chosen track is loaded
aud_input = aud_input[:,int(sr*1.5):int(sr*11.5)] #Only the 10 seconds of the track is used
aud_input_repeated = aud_input.unsqueeze(1)
display_audio(aud_input,sample_rate=sr) #Audio is displayed to play in the notebook

In [None]:
output_path = 'Samples/Final' #Folder to save audios for demo
aud_input_save = aud_input.to('cpu')
torchaudio.save(os.path.join(output_path,"reference.wav"), aud_input_save, sr) #Reference track is saved

In [None]:
with open(os.path.join(slakh_path,track_path,'stems','S02.json'), 'r') as metadata_file:
    curr_json = json.load(metadata_file) #.json file for the reference track is loaded for the description
description = curr_json['description']
description = ''.join(description.split('. ')[1:]) #Instruments are discarded from the description

In [None]:
#First instrument group

In [None]:
instruments = ('Electric Guitar (Clean)','Acoustic Grand Piano','Drum','Choir (aahs)') #New instruments are defined
instruments = ', '.join(instruments)

In [None]:
print(description)
print(instruments)

In [None]:
model_name = 'original' #Model name is defined (baseline model)
model = MusicGen.get_pretrained('facebook/musicgen-melody') #Pretrained model is loaded
model.set_generation_params(duration=10) #Generation length is decided as 10 seconds

output = model.generate_with_chroma(
    descriptions=[ #Description with new instrument is given to the model
        f'Instruments: {instruments}. {description}'
    ],
    melody_wavs=aud_input_repeated, #Reference audio is given to the model
    melody_sample_rate=sr, #Input sampling rate
    progress=True, return_tokens=True
)
print(i)
display_audio(output[0], sample_rate=32000) #Output audio is displayed

output_path = 'Samples/Final/1' #Folder for saving the generated audios
output_cpu = output[0].to('cpu')
torchaudio.save(os.path.join(output_path,f"{model_name}.wav"), output_cpu[0], 32000)

In [None]:
#This block is same with the previous block, except the loaded model is our final model
model_name = 'with_submixes_25ep'
model = MusicGen.get_pretrained(f'../audiocraft/checkpoints/{model_name}/')
model.set_generation_params(duration=10)

output = model.generate_with_chroma(
    descriptions=[
        f'Instruments: {instruments}. {description}'
    ],
    melody_wavs=aud_input_repeated,
    melody_sample_rate=sr,
    progress=True, return_tokens=True
)
print(i)
display_audio(output[0], sample_rate=32000)

output_path = 'Samples/Final/1'
output_cpu = output[0].to('cpu')
torchaudio.save(os.path.join(output_path,f"{model_name}.wav"), output_cpu[0], 32000)

In [None]:
#This block is same with the previous block, except the loaded model is our final model
model_name = 'with_submixes_description_25ep'
model = MusicGen.get_pretrained(f'../audiocraft/checkpoints/{model_name}/')
model.set_generation_params(duration=10)

output = model.generate_with_chroma(
    descriptions=[
        f'Instruments: {instruments}. {description}'
    ],
    melody_wavs=aud_input_repeated,
    melody_sample_rate=sr,
    progress=True, return_tokens=True
)
print(i)
display_audio(output[0], sample_rate=32000)

output_path = 'Samples/Final/1'
output_cpu = output[0].to('cpu')
torchaudio.save(os.path.join(output_path,f"{model_name}.wav"), output_cpu[0], 32000)

In [None]:
#Second instrument group

In [None]:
instruments = ('Trumpet','Cello','Fretless Bass') #New instruments are defined
instruments = ', '.join(instruments)

In [None]:
print(description)
print(instruments)

In [None]:
model_name = 'original' #Model name is defined (baseline model)
model = MusicGen.get_pretrained('facebook/musicgen-melody') #Pretrained model is loaded
model.set_generation_params(duration=10) #Generation length is decided as 10 seconds

output = model.generate_with_chroma(
    descriptions=[ #Description with new instrument is given to the model
        f'Instruments: {instruments}. {description}'
    ],
    melody_wavs=aud_input_repeated, #Reference audio is given to the model
    melody_sample_rate=sr, #Input sampling rate
    progress=True, return_tokens=True
)
print(i)
display_audio(output[0], sample_rate=32000) #Output audio is displayed

output_path = 'Samples/Final/2' #Folder for saving the generated audios
output_cpu = output[0].to('cpu')
torchaudio.save(os.path.join(output_path,f"{model_name}.wav"), output_cpu[0], 32000)

In [None]:
#This block is same with the previous block, except the loaded model is our final model
model_name = 'with_submixes_25ep'
model = MusicGen.get_pretrained(f'../audiocraft/checkpoints/{model_name}/')
model.set_generation_params(duration=10)

output = model.generate_with_chroma(
    descriptions=[
        f'Instruments: {instruments}. {description}'
    ],
    melody_wavs=aud_input_repeated,
    melody_sample_rate=sr,
    progress=True, return_tokens=True
)
print(i)
display_audio(output[0], sample_rate=32000)

output_path = 'Samples/Final/2'
output_cpu = output[0].to('cpu')
torchaudio.save(os.path.join(output_path,f"{model_name}.wav"), output_cpu[0], 32000)

In [None]:
#This block is same with the previous block, except the loaded model is our final model
model_name = 'with_submixes_description_25ep'
model = MusicGen.get_pretrained(f'../audiocraft/checkpoints/{model_name}/')
model.set_generation_params(duration=10)

output = model.generate_with_chroma(
    descriptions=[
        f'Instruments: {instruments}. {description}'
    ],
    melody_wavs=aud_input_repeated,
    melody_sample_rate=sr,
    progress=True, return_tokens=True
)
print(i)
display_audio(output[0], sample_rate=32000)

output_path = 'Samples/Final/2'
output_cpu = output[0].to('cpu')
torchaudio.save(os.path.join(output_path,f"{model_name}_{i}.wav"), output_cpu[0], 32000)