<a href="https://colab.research.google.com/github/migperfer/MIR-UPF/blob/master/best_mix_survey.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Instructions to run the Notebook


1.   Run all the cells until the first _Create the mixes_ section. There are some cells that may take some time to run.
2.   In the **Select a target audio randomly** section, the **Target audio** is selected. You can take a listen in that very same cell.
3.   In the _Create the mixes_ section, the mixes of the **target audio** with the **candidate audios** are created, take a listen to the mixes. Each mix has a number.
4.   In each _Create the mixes_ section there is a cell where you can rate the mixes in the 0-5 range. Rate the mixes and then, press play in the rating cell.
5.   After listening and rating all the mixes, run  the cell on _Print the results_ section to see the results. Kindly copy the output and send it to me.



## Download and import the python packages

In [0]:
%%capture
!pip install  essentia numpy matplotlib
import os
import numpy as np
import matplotlib as plt
import essentia.standard as std
import pandas as pd
if not os.path.isdir('TIVlib'):
    !git clone https://github.com/aframires/TIVlib
else:
    print("TIVlib already installed")
from TIVlib import TIVlib
from glob import glob
from IPython.display import display, Audio, HTML
import re
import csv
from zipfile import ZipFile
import requests

## Download and extract the folder contaning the audio loops

In [0]:
%%capture
def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

# Download the audios
file_id = '1mS0G_Gk4v6IHe2E2IZ2Z6dcE-lcTuZx3'
destination = 'audios.zip'
download_file_from_google_drive(file_id, destination)

In [0]:
%%capture
# Unzip the file
with ZipFile('audios.zip', 'r') as zipObj:
   # Extract all the contents of zip file in different directory
   zipObj.extractall()

## Download the subset file


In [0]:
%%capture
!git clone https://github.com/migperfer/MIR-UPF
!cp ./MIR-UPF/compt_loops.csv compt_loops.csv 

## Define a function to get beatwise TIVs

In [0]:
%%capture
def get_beat_chunks(filename, bpm_restrict=None):
    audio = std.MonoLoader(filename=filename)()
    hpcp = std.HPCP()
    spectrum = std.Spectrum()
    speaks = std.SpectralPeaks()
    tivs = []
    sr = 44100
    bpm = get_tempo(filename)
    tivs_framewise = []
    if bpm_restrict != None and bpm_restrict!=bpm:
        raise ValueError
    sec_beat = (60/bpm)
    beats = np.arange(0, len(audio)/sr, sec_beat)
    beats = np.append(beats, len(audio)/sr)
    frames = []
    for i in range(1, len(beats)):
        segmented_audio = audio[int(beats[i - 1] * sr):int(beats[i] * sr)]
        cutter = std.FrameGenerator(segmented_audio)
        aux = []
        for sec in cutter:
            spec = spectrum(sec)
            freq, mag = speaks(spec)
            chroma = hpcp(freq, mag)
            tivs_framewise.append(TIVlib.TIV.from_pcp(chroma))
            aux.append(chroma)
            frames.append(spec)
        chroma = np.mean(aux, axis=0)
        tiv = TIVlib.TIV.from_pcp(chroma)
        tivs.append(tiv)
    
    # Calculate the whole TIV
    frame_mean = np.mean(frames, axis=0)
    freq, mag = speaks(frame_mean)
    chroma_whole = hpcp(freq, mag)
    tiv_whole = TIVlib.TIV.from_pcp(chroma_whole)
    return tivs, tiv_whole, tivs_framewise

def get_number_beats(filename):
    audio = std.MonoLoader(filename=filename)()
    sr = 44100
    bpm = get_tempo(filename)
    sec_beat = (60/bpm)
    beats = np.arange(0, len(audio)/sr, sec_beat)
    beats = np.append(beats, len(audio)/sr)
    return len(beats)

def get_tempo(filename):
    try:
        bpm = int(re.search(r"(\d+)bpm", filename).group(1))
    except:
        bpm = int(re.search(r"/(\d+)-", filename).group(1))
    return bpm


## Define functions to retrieve Essentia dissonance

In [0]:
%%capture
def audio_dissonance(filename1, filename2):
    audio1 = std.MonoLoader(filename=filename1)()
    audio2 = std.MonoLoader(filename=filename2)()
    spectrum = std.Spectrum()
    speaks = std.SpectralPeaks()
    diss = std.Dissonance()
    dissonances = []
    sr = 44100
    bpm1 = get_tempo(filename1)
    bpm2 = get_tempo(filename2)
    if bpm1 != bpm2:
      raise ValueError("Different tempo")
    else:
      bpm = bpm1
    sec_beat = (60/bpm)
    beats = np.arange(0, len(audio2)/sr, sec_beat)
    beats = np.append(beats, len(audio2)/sr)
    aux_total = []
    for i in range(1, len(beats)):
        # Beatwise dissonance
        segmented_audio1 = audio1[int(beats[i - 1] * sr):int(beats[i] * sr)]
        segmented_audio2 = audio2[int(beats[i - 1] * sr):int(beats[i] * sr)]
        try:
          segmented_audio = segmented_audio1 + segmented_audio2[:len(segmented_audio1)]
        except:
          segmented_audio = segmented_audio2 + segmented_audio1[:len(segmented_audio2)]
        cutter = std.FrameGenerator(segmented_audio)
        aux = []
        for sec in cutter:
            spec = spectrum(sec)
            aux_total.append(spec)
            freq, mag = speaks(spec)
            aux.append(diss(freq, mag))  # Framewise dissonance for beat i
        dissonances.append(np.mean(aux)) # Append mean dissonance for beat i
    
    # Whole dissonance
    freq, mag = speaks(np.mean(aux_total, axis=0))
    dissonance = diss(freq, mag)  # Dissonance produced by the mean spectrum
    return np.mean(dissonances), dissonance

def pad_zeros(arr):
  arr_len = np.log2(len(arr))
  fin_len = np.ceil(arr_len)
  deficit = int(np.power(2, fin_len) - len(arr))
  arr = np.concatenate((np.zeros(deficit, dtype=arr.dtype), arr))
  return arr

## Load all the compatible loops
And then calculate the compatibilities


### Select a **target audio** randomly
Listen to the target loop. If it's mainly a drum loop, rerun this cell until you get something that doesn't contain mainly percussion.



In [0]:
filenames = []
with open('compt_loops.csv', 'r') as file:
    dicw = csv.DictReader(file)
    for row in dicw:
        filenames.append(row['filename'])
mp3list = filenames
# Target audio selected using the initial form
loop_sample = np.random.randint(0, len(mp3list)-1)
target_audio = mp3list[loop_sample]
print("Selected audio:", target_audio.split('subset/')[1])
display(Audio(filename=target_audio))
main_song_tivs, main_song_tiv_whole, main_song_framewise = get_beat_chunks(target_audio)

#### TIV

In [0]:
compdict = {}  # A dictionary to keep all compatibilities
compdict_whole = {}  # A dictionary to keep all compatibilities (whole version)
compdict_framewise = {}
for candidate in filenames:
    tivcand, tivcand_whole, tivframewise = get_beat_chunks(candidate)
    # TIV beatwise
    comp = []
    for i in range(len(main_song_tivs)):
        comp.append(main_song_tivs[i].small_scale_compatibility(tivcand[i]))
    compdict[candidate] = np.sum(comp)
    # TIV Whole
    compdict_whole[candidate] = main_song_tiv_whole.small_scale_compatibility(tivcand_whole)
    # TIV Framewise
    comp = []
    for i in range(len(tivframewise)):
      comp.append(main_song_framewise[i].small_scale_compatibility(tivframewise[i]))
    compdict_framewise[candidate] = np.sum(comp)

#### Essentia Dissonance


In [0]:
compdict_diss = {}  # A dictionary to keep all compatibilities
compdict_diss_whole = {}  # A dictionary to keep all compatibilities (whole version)
for candidate in filenames:
  framewise_diss, whole_diss = audio_dissonance(candidate, target_audio)
  compdict_diss[candidate] = framewise_diss
  compdict_diss_whole[candidate] = whole_diss

### Sort the loops according to compatibility
Create also a dictionary containing the 10 most compatibles

#### TIV

In [0]:
#Beatwise version
compdict = {k: v for k, v in sorted(compdict.items(), key=lambda item: item[1])}
dict_10 = {}
for x in list(compdict)[1:11]:
    dict_10[x] = compdict[x]

In [0]:
#Whole version
compdict_whole = {k: v for k, v in sorted(compdict_whole.items(), key=lambda item: item[1])}
dict_10_w = {}
for x in list(compdict_whole)[1:11]:
    dict_10_w[x] = compdict_whole[x]

In [0]:
#Framewise version
compdict_framewise = {k: v for k, v in sorted(compdict_framewise.items(), key=lambda item: item[1])}
dict_10_f = {}
for x in list(compdict_framewise)[1:11]:
    dict_10_f[x] = compdict_framewise[x]

In [0]:
def create_mix(song1, song2):
    audio1 = std.MonoLoader(filename=song1)()
    audio2 = std.MonoLoader(filename=song2)()
    audio1 = audio1/max(audio1)/2
    audio2 = audio2/max(audio2)/2
    try:
      mix = audio1 + audio2[:len(audio1)]
    except:
      mix = audio2 + audio1[:len(audio2)]
    return mix

In [0]:
def intersection(lst1, lst2): 
    lst3 = [list(filter(lambda x: x in lst1, sublist)) for sublist in lst2] 
    return lst3 

intersection_tivbeatwise_tivwhole = len(intersection(list(dict_10_w.keys()), list(dict_10.keys())))/len(mp3list)

#### Essentia dissonance

In [0]:
#Framewise mean version
compdict_diss = {k: v for k, v in sorted(compdict_diss.items(), key=lambda item: item[1])}
dict_10_d = {}
for x in list(compdict_diss)[1:11]:
    dict_10_d[x] = compdict_diss[x]

In [0]:
#Whole mean spectrum version
compdict_diss_whole = {k: v for k, v in sorted(compdict_diss_whole.items(), key=lambda item: item[1])}
dict_10_dw = {}
for x in list(compdict_diss_whole)[0:10]:
    dict_10_dw[x] = compdict_diss_whole[x]

## Create the mixes and listen to them (TIV Beatwise)
This will output the ten most compatible **candidate loops**  along with the mixes with the **target loops**. Those mixes will be created with the beatwise TIV.

In [0]:
i = 0
for key in dict_10.keys():
    display(HTML("<h3>Mix #%s: %s</h3>" % (i, key.split('/')[-1])))
    display(HTML("Original"))
    display(Audio(filename=key))
    mix = create_mix(target_audio, key)
    display(HTML("Mix"))
    display(Audio(data=mix, rate=44100))
    i += 1

In [0]:
#@title Rate the _consonance_ of the mixes with a score between 0 and 5
#@markdown Run this cell to see the output.
#@markdown Please rate the mix with a **-1** if the candidate loop for the mix is mainly a drum loop
mix_0_tivb = 4  #@param {type: "slider", min: -1, max: 5}
mix_1_tivb = 2  #@param {type: "slider", min: -1, max: 5}
mix_2_tivb = 1  #@param {type: "slider", min: -1, max: 5}
mix_3_tivb = 3  #@param {type: "slider", min: -1, max: 5}
mix_4_tivb = 1  #@param {type: "slider", min: -1, max: 5}
mix_5_tivb = 3  #@param {type: "slider", min: -1, max: 5}
mix_6_tivb = 3  #@param {type: "slider", min: -1, max: 5}
mix_7_tivb = 1  #@param {type: "slider", min: -1, max: 5}
mix_8_tivb = 3  #@param {type: "slider", min: -1, max: 5}
mix_9_tivb = 1  #@param {type: "slider", min: -1, max: 5}
#@markdown ---


## Create the mixes and listen to them (TIV Whole)
This will output the ten most compatible **candidate loops**  along with the mixes with the **target loops**. Those mixes will be created with the whole TIV.

In [0]:
i = 0
for key in dict_10_w.keys():
    display(HTML("<h3>Mix #%s: %s</h3>" % (i, key.split('/')[-1])))
    display(HTML("Original"))
    display(Audio(filename=key))
    mix = create_mix(target_audio, key)
    display(HTML("Mix"))
    display(Audio(data=mix, rate=44100))
    i += 1

In [0]:
#@title Rate the _consonance_ of the mixes with a score between 0 and 5
#@markdown Run this cell to see the output.
#@markdown Please rate the mix with a **-1** if the candidate loop for the mix is mainly a drum loop
mix_0_tivw = 2  #@param {type: "slider", min: -1, max: 5}
mix_1_tivw = 2  #@param {type: "slider", min: -1, max: 5}
mix_2_tivw = 2  #@param {type: "slider", min: -1, max: 5}
mix_3_tivw = 2  #@param {type: "slider", min: -1, max: 5}
mix_4_tivw = 2  #@param {type: "slider", min: -1, max: 5}
mix_5_tivw = 2  #@param {type: "slider", min: -1, max: 5}
mix_6_tivw = 2  #@param {type: "slider", min: -1, max: 5}
mix_7_tivw = 2  #@param {type: "slider", min: -1, max: 5}
mix_8_tivw = 2  #@param {type: "slider", min: -1, max: 5}
mix_9_tivw = 2  #@param {type: "slider", min: -1, max: 5}
#@markdown ---


## Create the mixes and listen to them (TIV Framewise)

In [0]:
i = 0
for key in dict_10_f.keys():
    display(HTML("<h3>Mix #%s: %s</h3>" % (i, key.split('/')[-1])))
    display(HTML("Original"))
    display(Audio(filename=key))
    mix = create_mix(target_audio, key)
    display(HTML("Mix"))
    display(Audio(data=mix, rate=44100))
    i += 1

In [0]:
#@title Rate the _consonance_ of the mixes with a score between 0 and 5
#@markdown Run this cell to see the output.
#@markdown Please rate the mix with a **-1** if the candidate loop for the mix is mainly a drum loop
mix_0_tivf = 2  #@param {type: "slider", min: -1, max: 5}
mix_1_tivf = 2  #@param {type: "slider", min: -1, max: 5}
mix_2_tivf = 2  #@param {type: "slider", min: -1, max: 5}
mix_3_tivf = 2  #@param {type: "slider", min: -1, max: 5}
mix_4_tivf = 2  #@param {type: "slider", min: -1, max: 5}
mix_5_tivf = 2  #@param {type: "slider", min: -1, max: 5}
mix_6_tivf = 2  #@param {type: "slider", min: -1, max: 5}
mix_7_tivf = 2  #@param {type: "slider", min: -1, max: 5}
mix_8_tivf = 2  #@param {type: "slider", min: -1, max: 5}
mix_9_tivf = 2  #@param {type: "slider", min: -1, max: 5}
#@markdown ---

## Create the mixes and listen to them (Dissonance)



In [0]:
i = 0
for key in dict_10_d.keys():
    display(HTML("<h3>Mix #%s: %s</h3>" % (i, key.split('/')[-1])))
    display(HTML("Original"))
    display(Audio(filename=key))
    mix = create_mix(target_audio, key)
    display(HTML("Mix"))
    display(Audio(data=mix, rate=44100))
    i += 1

In [0]:
#@title Rate the _consonance_ of the mixes with a score between 0 and 5
#@markdown Run this cell to see the output.
#@markdown Please rate the mix with a **-1** if the candidate loop for the mix is mainly a drum loop
mix_0_diss = 2  #@param {type: "slider", min: -1, max: 5}
mix_1_diss = 2  #@param {type: "slider", min: -1, max: 5}
mix_2_diss = 2  #@param {type: "slider", min: -1, max: 5}
mix_3_diss = 2  #@param {type: "slider", min: -1, max: 5}
mix_4_diss = 2  #@param {type: "slider", min: -1, max: 5}
mix_5_diss = 2  #@param {type: "slider", min: -1, max: 5}
mix_6_diss = 2  #@param {type: "slider", min: -1, max: 5}
mix_7_diss = 2  #@param {type: "slider", min: -1, max: 5}
mix_8_diss = 2  #@param {type: "slider", min: -1, max: 5}
mix_9_diss = 2  #@param {type: "slider", min: -1, max: 5}
#@markdown ---


## Print the results

In [0]:
#@markdown #Press play on this cell to show the results
final_results = {'loop_sample': loop_sample, 'mix_0_diss': mix_0_diss, 'mix_1_diss': mix_1_diss, 'mix_2_diss': mix_2_diss, 
 'mix_3_diss': mix_3_diss, 'mix_4_diss': mix_4_diss, 'mix_5_diss': mix_5_diss, 'mix_6_diss': mix_6_diss, 'mix_7_diss': mix_7_diss,
 'mix_8_diss': mix_8_diss, 'mix_9_diss': mix_9_diss, 'mix_0_tivb': mix_0_tivb, 'mix_1_tivb': mix_1_tivb, 'mix_2_tivb': mix_2_tivb, 
 'mix_3_tivb': mix_3_tivb, 'mix_4_tivb': mix_4_tivb, 'mix_5_tivb': mix_5_tivb, 'mix_6_tivb': mix_6_tivb, 'mix_7_tivb': mix_7_tivb,
 'mix_8_tivb': mix_8_tivb, 'mix_9_tivb': mix_9_tivb, 'mix_0_tivw': mix_0_tivw, 'mix_1_tivw': mix_1_tivw, 'mix_2_tivw': mix_2_tivw, 
 'mix_3_tivw': mix_3_tivw, 'mix_4_tivw': mix_4_tivw, 'mix_5_tivw': mix_5_tivw, 'mix_6_tivw': mix_6_tivw, 'mix_7_tivw': mix_7_tivw,
 'mix_8_tivw': mix_8_tivw, 'mix_9_tivw': mix_9_tivw, 'mix_0_tivf': mix_0_tivf, 'mix_1_tivf': mix_1_tivf, 'mix_2_tivf': mix_2_tivf, 
 'mix_3_tivf': mix_3_tivf, 'mix_4_tivf': mix_4_tivf, 'mix_5_tivf': mix_5_tivf, 'mix_6_tivf': mix_6_tivf, 'mix_7_tivf': mix_7_tivf,
 'mix_8_tivf': mix_8_tivf, 'mix_9_tivf': mix_9_tivf}
final_results.update({'hcom_b{}'.format(i): list(dict_10.values())[i] for i in range(len(dict_10))})
final_results.update({'hcom_w{}'.format(i): list(dict_10_w.values())[i] for i in range(len(dict_10_w))})
final_results.update({'hcom_d{}'.format(i): list(dict_10_d.values())[i] for i in range(len(dict_10_d))})
final_results.update({'hcom_f{}'.format(i): list(dict_10_f.values())[i] for i in range(len(dict_10_f))})
final_results