In [None]:
import torchaudio
import os

In [None]:
import os
import torch
import librosa

import numpy as np

from scipy.stats import zscore

from IPython.display import Audio, display

subfolder_path = os.path.abspath("audiocraft")
sys.path.append(subfolder_path)

import torchaudio
from audiocraft.models import MusicGen
from audiocraft.utils.notebook import display_audio
from audiocraft.utils import export

In [None]:
#This block is for loading the trained models into the audiocraft module for using them to generate music with the python package
model_name = 'both_indv_mix'
export.export_lm(f'/home/sd3705/music_gen_2024f/audiocraft_output_sd3705/xps/{model_name}/checkpoint.th', f'../audiocraft/checkpoints/{model_name}/state_dict.bin')
export.export_pretrained_compression_model('facebook/encodec_32khz', f'../audiocraft/checkpoints/{model_name}/compression_state_dict.bin')

model_name = 'only_mix'
export.export_lm(f'/home/sd3705/music_gen_2024f/audiocraft_output_sd3705/xps/{model_name}/checkpoint.th', f'../audiocraft/checkpoints/{model_name}/state_dict.bin')
export.export_pretrained_compression_model('facebook/encodec_32khz', f'../audiocraft/checkpoints/{model_name}/compression_state_dict.bin')

model_name = 'with_submixes'
export.export_lm(f'/home/sd3705/music_gen_2024f/audiocraft_output_sd3705/xps/{model_name}/checkpoint.th', f'../audiocraft/checkpoints/{model_name}/state_dict.bin')
export.export_pretrained_compression_model('facebook/encodec_32khz', f'../audiocraft/checkpoints/{model_name}/compression_state_dict.bin')

In [None]:
slakh_path = "/engram/naplab/shared/Slakh2100/slakh2100_flac_redux" #Dataset path is defined (this should be edited to run in another machine)
track_path = "test/Track01881" #One track from the test set is chosen to use as the reference track

In [None]:
aud_input, sr = torchaudio.load(os.path.join(slakh_path,track_path,'stems','S02.flac')) #The acoustic piano channel of the chosen track is loaded
aud_input = aud_input[:,int(sr*1.5):int(sr*11.5)] #Only the 10 seconds of the track is used
aud_input_repeated = aud_input.unsqueeze(1).repeat(4,1,1) #The audio is repeated for generating with 4 different set of instruments
display_audio(aud_input,sample_rate=sr) #Audio is displayed to play in the notebook

In [None]:
model_name = 'original' #Model name is defined (baseline model)
model = MusicGen.get_pretrained('facebook/musicgen-melody') #Pretrained model is loaded
model.set_generation_params(duration=10) #Generation length is decided as 10 seconds

output = model.generate_with_chroma(
    descriptions=[ #Descriptions with different set of instruments are given to the model
        'Acoustic Guitar',
        'Acoustic Guitar, Halo Pad',
        'Acoustic Guitar, Halo Pad, Rock Organ',
        'Acoustic Guitar, Halo Pad, Rock Organ, Drums'
    ],
    melody_wavs=aud_input_repeated, #Reference audio is given to the model
    melody_sample_rate=sr, #Input sampling rate
    progress=True, return_tokens=True
)
display_audio(output[0], sample_rate=32000) #Output audio is displayed

output_path = 'Samples/Uncompleted' #Folder for saving the generated audios
output_cpu = output[0].to('cpu')
torchaudio.save(os.path.join(output_path,f"1_{model_name}.wav"), output_cpu[0], 32000)
torchaudio.save(os.path.join(output_path,f"2_{model_name}.wav"), output_cpu[1], 32000)
torchaudio.save(os.path.join(output_path,f"3_{model_name}.wav"), output_cpu[2], 32000)
torchaudio.save(os.path.join(output_path,f"4_{model_name}.wav"), output_cpu[3], 32000)

In [None]:
#This block is same with the previous block, except the loaded model is one of the fine-tuned ones
model_name = 'both_indv_mix'
model = MusicGen.get_pretrained(f'../audiocraft/checkpoints/{model_name}/')
model.set_generation_params(duration=10)

output = model.generate_with_chroma(
    descriptions=[
        'Acoustic Guitar',
        'Acoustic Guitar, Halo Pad',
        'Acoustic Guitar, Halo Pad, Rock Organ',
        'Acoustic Guitar, Halo Pad, Rock Organ, Drums'
    ],
    melody_wavs=aud_input_repeated,
    melody_sample_rate=sr,
    progress=True, return_tokens=True
)
display_audio(output[0], sample_rate=32000)

output_path = 'Samples/Project Update'
output_cpu = output[0].to('cpu')
torchaudio.save(os.path.join(output_path,f"1_{model_name}.wav"), output_cpu[0], 32000)
torchaudio.save(os.path.join(output_path,f"2_{model_name}.wav"), output_cpu[1], 32000)
torchaudio.save(os.path.join(output_path,f"3_{model_name}.wav"), output_cpu[2], 32000)
torchaudio.save(os.path.join(output_path,f"4_{model_name}.wav"), output_cpu[3], 32000)

In [None]:
#This block is same with the previous block, except the loaded model is one of the fine-tuned ones
model_name = 'only_mix'
model = MusicGen.get_pretrained(f'../audiocraft/checkpoints/{model_name}/')
model.set_generation_params(duration=10)

output = model.generate_with_chroma(
    descriptions=[
        'Acoustic Guitar',
        'Acoustic Guitar, Halo Pad',
        'Acoustic Guitar, Halo Pad, Rock Organ',
        'Acoustic Guitar, Halo Pad, Rock Organ, Drums'
    ],
    melody_wavs=aud_input_repeated,
    melody_sample_rate=sr,
    progress=True, return_tokens=True
)
display_audio(output[0], sample_rate=32000)

output_path = 'Samples/Uncompleted'
output_cpu = output[0].to('cpu')
torchaudio.save(os.path.join(output_path,f"1_{model_name}.wav"), output_cpu[0], 32000)
torchaudio.save(os.path.join(output_path,f"2_{model_name}.wav"), output_cpu[1], 32000)
torchaudio.save(os.path.join(output_path,f"3_{model_name}.wav"), output_cpu[2], 32000)
torchaudio.save(os.path.join(output_path,f"4_{model_name}.wav"), output_cpu[3], 32000)

In [None]:
#This block is same with the previous block, except the loaded model is one of the fine-tuned ones
model_name = 'with_submixes'
model = MusicGen.get_pretrained(f'../audiocraft/checkpoints/{model_name}/')
model.set_generation_params(duration=10)

output = model.generate_with_chroma(
    descriptions=[
        'Acoustic Guitar',
        'Acoustic Guitar, Halo Pad',
        'Acoustic Guitar, Halo Pad, Rock Organ',
        'Acoustic Guitar, Halo Pad, Rock Organ, Drums'
    ],
    melody_wavs=aud_input_repeated,
    melody_sample_rate=sr,
    progress=True, return_tokens=True
)
display_audio(output[0], sample_rate=32000)

output_path = 'Samples/Uncompleted'
output_cpu = output[0].to('cpu')
torchaudio.save(os.path.join(output_path,f"1_{model_name}.wav"), output_cpu[0], 32000)
torchaudio.save(os.path.join(output_path,f"2_{model_name}.wav"), output_cpu[1], 32000)
torchaudio.save(os.path.join(output_path,f"3_{model_name}.wav"), output_cpu[2], 32000)
torchaudio.save(os.path.join(output_path,f"4_{model_name}.wav"), output_cpu[3], 32000)