In [None]:
import torch
import copy
import librosa
from IPython.display import Audio, display
import pandas as pd
import ast
import matplotlib.pyplot as plt
import os
import h5py
import pandas as pd
import numpy as np
import copy

def AverageRaveModels(rave_a, rave_b, bias = 0):

    r1_ratio = .5
    r2_ratio = .5

    if abs(bias) <= 1:
        if bias > 0:
            r1_ratio = .5 + bias/2
            r2_ratio = 1.0 - r1_ratio

            rave_temp = rave_a
        elif bias < 0:
            r2_ratio = .5 + abs(bias)/2
            r1_ratio = 1.0 - r2_ratio

            # rave_a, rave_b = rave_b, rave_a
    else:
        print(f"Unable to apply bias {bias} - bias must be between -1 and 1.")
    
    # Get state dictionaries of both models
    rave_a_params = rave_a.state_dict()
    rave_b_params = rave_b.state_dict()

    keys_not_modified=[]
    numb_params_mod = 0
    numb_params_unable_to_mod = 0
    
    rave_avg = copy.deepcopy(rave_a)
    avg = rave_avg.state_dict()    
    for key in rave_a_params:
        if key in rave_b_params:
            try:
                avg[key] = ((rave_a_params[key] * r1_ratio) + (rave_b_params[key] * r2_ratio)) 
                numb_params_mod = numb_params_mod + 1
                
            except Exception as e:
                print(f"Error averaging key {key}: {e}")
                keys_not_modified.append(key)
                numb_params_unable_to_mod = numb_params_unable_to_mod + 1
        else:
            print(f"Key {key} not found in rave_b parameters, skipping.")
            keys_not_modified.append(key)
            numb_params_unable_to_mod += 1

    # Now add any keys from rave_b that are missing from rave_avg (which started as rave_a)
    # Note: This doesn't work. Get 'Unexpected key(s) in state_dict:'
    # for key in rave_b_params:
    #     if key not in rave_a_params:
    #         avg[key] = rave_b_params[key]
    
    # Now commit up the changes
    rave_avg.load_state_dict(avg) 
   
    for key in rave_avg.state_dict():
        
        if key in keys_not_modified:
            print(f'Key: {key}: Unable to average.')
        else:
            try:
                print(key)
                print(f'rave_a: {rave_a_params[key][...,0]}')
                print(f'rave_b: {rave_b_params[key][...,0]}')
                print(f'rave_avg: {rave_avg.state_dict()[key][...,0]}\n\n')
            except KeyError as e:
                print (f'Only one of the original models had a key for {key}')
            except IndexError as e:
                print(f'Key: {key} - ', e)
    
    return rave_avg, numb_params_mod, numb_params_unable_to_mod

def saveAudio(file_path, audio):
    with open(file_path + '.wav', 'wb') as f:
        f.write(audio.data)
        


In [None]:
import huggingface_hub

model_path_configs = {
        "Hunchback Wales":      ("Intelligent-Instruments-Lab/rave-models", "humpbacks_pondbrain_b2048_r48000_z20.ts"), 
        "Magnets":              ("Intelligent-Instruments-Lab/rave-models", "magnets_b2048_r48000_z8.ts"), 
        "BigEnsemble":          ("Intelligent-Instruments-Lab/rave-models", "crozzoli_bigensemblesmusic_18d.ts"),
        "BirdDawnChorus":       ("Intelligent-Instruments-Lab/rave-models", "birds_dawnchorus_b2048_r48000_z8.ts"), 
        "SpeakingAndSinging":   ("Intelligent-Instruments-Lab/rave-models", "voice-multi-b2048-r48000-z11.ts"), 
        "Resonator Piano":      ("Intelligent-Instruments-Lab/rave-models", "mrp_strengjavera_b2048_r44100_z16.ts"),
        "Multimbral Guitar":    ("Intelligent-Instruments-Lab/rave-models", "guitar_iil_b2048_r48000_z16.ts"),
        "Organ Archive":        ("Intelligent-Instruments-Lab/rave-models", "organ_archive_b2048_r48000_z16.ts"),
        "Water":                ("Intelligent-Instruments-Lab/rave-models", "water_pondbrain_b2048_r48000_z16.ts"),
        "Brass Sax":            ("shuoyang-zheng/jaspers-rave-models", "aam_brass_sax_b2048_r44100_z8_noncausal.ts"),
        "Speech":               ("shuoyang-zheng/jaspers-rave-models", "librispeech100_b2048_r44100_z8_noncausal.ts"),
        "String":               ("shuoyang-zheng/jaspers-rave-models" ,"aam_string_b2048_r44100_z16_noncausal.ts"),
        "Singer":               ("shuoyang-zheng/jaspers-rave-models","gtsinger_b2048_r44100_z16_noncausal.ts"),
        "Bass":                 ("shuoyang-zheng/jaspers-rave-models","aam_bass_b2048_r44100_z16_noncausal.ts"),
        "Drum":                 ("shuoyang-zheng/jaspers-rave-models","aam_drum_b2048_r44100_z16_noncausal.ts"),
        "Gtr Picking":          ("shuoyang-zheng/jaspers-rave-models","guitar_picking_dm_b2048_r44100_z8_causal.ts"),
        "Percussion-LB":        ("lancelotblanchard/rave_percussion", "Percussion_LB': 'percussion.ts")
    }


model_paths_cache = {}

def GetModelPath(model_path_name):
    model_path = ()

    if model_path_name in model_paths_cache.keys():
        model_path = model_paths_cache[model_path_name]

        # print("Model retrieved from cache.")
    else:
        repo_id, filename = model_path_configs[model_path_name]
        print("repo_id", repo_id)
        print("filename", filename)
        model_path = huggingface_hub.hf_hub_download(
        repo_id =repo_id,
        filename = filename,
        cache_dir="../huggingface_hub_cache",
        force_download=False,
        )
        # print(f"Generated Model Path for {filename}.")

        model_paths_cache[model_path_name] = model_path
        
    return model_path 



In [None]:
##############################################
# Adjustabler Parameters
################################################

###############################################
# Choose models from filenames dictionary created in previous cell
# Note: model_path_a is always used to initialize the averaged model.
# Switching them gets different results if the parameters are not all matched.
###############################################
# Examples - this matches only 21 params, but it sounds like maybe sosme of both are in the result.
model_path_a = GetModelPath('BirdDawnChorus')
model_path_b = GetModelPath('Speech')

# Examples: This has 76 params averaged
# model_path_a = model_paths['Water']
# model_path_b = model_paths['Organ Archive']

# Examples: All Params Match but high pitch for avg
# model_path_a = model_paths['Organ Archive']
# model_path_b = model_paths['Multimbral Guitar']
#
# model_path_a = model_paths['String']
# model_path_b = model_paths['Singer']


#####################################
# Set biases between -1 and 1 to bias the result towards one of the models
#   0 = standard average
# > 0 = biased towards model_a
# < 0 = biases towards  model_b
#####################################
biases=[0, .2, .4, .5, .7, -.4, -.6, -.9]

####################################
# Choose Audio File to encode/decode
#####################################
audio_file = "assets/RJM1240-Gestures.wav"
#audio_file = "assets/bird_calls.m4a"
#audio_file = "assets/SilverCaneAbby_ThreeVoices_v1.wav"
#audio_file = "assets/SingingBowl_Singing-Omni_sixInchesAbove_nm.wav"
#audio_file = "assets/RJM1221b_Technomanity_30.wav"


####################################
# Generate Audio Files
# Audio files are created in the assets folder
generate_audio_files = False


In [None]:
rave_a = torch.jit.load(model_path_a)
rave_b = torch.jit.load(model_path_b)

# Let's load a sample audio file
y, sr = librosa.load(audio_file)

audio_outputs={}
for bias in biases:

    # Average the rave models
    # rave_avg, numb_params_mod, numb_params_unable_to_mod = AverageRaveModels(rave_a, rave_b, bias=bias)
    rave_avg, numb_params_mod, numb_params_unable_to_mod = AverageRaveModels(rave_a, rave_b)

    # Convert audio to a PyTorch tensor and reshape it to the
    # required shape: (batch_size, n_channels, n_samples)
    audio = torch.from_numpy(y).float()
    audio = audio.reshape(1, 1, -1) 

    # no decode the results back to audio
    with torch.no_grad():
        # encode the audio with the new averaged models
        try:
            latent_a = rave_a.encode(audio)
            latent_b = rave_b.encode(audio)
            latent_avg = rave_avg.encode(audio)

            # decode individual and averaged models
            decoded_a = rave_a.decode(latent_a)
            decoded_b = rave_b.decode(latent_b)
            decoded_avg = rave_avg.decode(latent_avg)
            audio_outputs[bias] = decoded_avg[0].detach().numpy()
            # print(bias)
            # print (audio)
        except:
            print(f'Bias {bias} generated an error. Removing it from list of biases.')
            biases.remove(bias)
            # print(biases)

    # print(decoded_a.shape) 
    # print(decoded_b.shape) 
    # print(decoded_avg.shape) 


model_a_file=model_path_a.rsplit("/")[-1]
model_b_file=model_path_b.rsplit("/")[-1]

print("---------------------------------------\n")
# Original Audio
print("\n")
# Let's listen to the decoded audio and compare
print("Original Audio")
display(Audio(y, rate=sr))
print("---------------------------------------\n")

# Decoded Audio
print("Encoded and Decoded using original models")
a = Audio(decoded_a[0].detach().numpy(), rate=sr)
display(a)
saveAudio('assets/' + model_a_file[: 7] + '_only.wav', a)
print("Model: ", model_a_file)

a = Audio(decoded_b[0].detach().numpy(), rate=sr)
display(a)
saveAudio('assets/' + model_b_file[: 7] + '_only.wav', a)
print("Model: ", model_b_file)
print("---------------------------------------\n")

print("Encoded and Decoded using Averaged Models")
print("with Biases: ", biases)
print("\nNumber of params able to average:", numb_params_mod)
print("Number of params unable to average:", numb_params_unable_to_mod)

output_file_prefix = f'assets/{model_a_file[: 7]}-{model_b_file[: 7]}_'

for bias in biases:
    if bias in audio_outputs.keys(): 
        a = Audio(audio_outputs[bias], rate=sr)
        display(a)
        print(f"Average of Models, bias = {bias}")
        if generate_audio_files:
            saveAudio(output_file_prefix + 'bias_' + str(bias), a)

        print("---------------------------------------\n")
    else:
        print(f"Average of Models with bias {bias} is unavailable.")

