Load all packages and song to be used for generation


In [1]:
%matplotlib inline
from pylab import *
import random
import pretty_midi
import numpy
import scipy 
import IPython.display as ipd
import matplotlib.pyplot as plt
import librosa, librosa.display
from math import sqrt

# Here we load the song that will be used as the basis for our recomposition and display it for listening
original = './Chopin7.wav'
x, sr = librosa.load(original) 
ipd.Audio(data=original, rate=sr)


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\user\\Documents\\Computer Science\\Computer Music\\Final\\Chopin.wav'

The first step in pitch detection is to detect onsets. This tells us when a new sound begins and then gives us starting indicies for each note. In order to calculate an estimate of the length of note N we create onset boundaries. We then estimate the pitch for each period between onsets and output a list pitches containing the list of the pitches of each period and leghts showing the lengths of each period.

In [None]:
onset_samples = librosa.onset.onset_detect(x,
                                           sr=sr, units='samples', 
                                           hop_length=100, 
                                           backtrack=False,
                                           pre_max=20,
                                           post_max=20,
                                           pre_avg=100,
                                           post_avg=100,
                                           delta=0.2,
                                           wait=0)

In [None]:
onset_boundaries = numpy.concatenate([[0], onset_samples, [len(x)]])
#print(onset_boundaries)

In [None]:
onset_times = librosa.samples_to_time(onset_boundaries, sr=sr)

In [None]:
def estimate_pitch(segment, sr, lowest_freq=50.0, highest_freq=2000.0):
    correlation = librosa.autocorrelate(segment)
    minimum = int(sr/ highest_freq)
    maximum = int(sr/lowest_freq)
    correlation[:minimum] = 0
    correlation[maximum:] = 0
    i = correlation.argmax()
    freq = float(sr)/i
    return freq

In [None]:
def estimate_pitch2(x, onset_samples, i, sr):
    start = onset_samples[i]
    end = onset_samples[i+1]
    freq = estimate_pitch(x[start:end], sr)
    return (freq)
def pitch_length(x, onset_samples, i, sr):
    start = onset_samples[i]
    end = onset_samples[i+1]
    return (end-start)

In [None]:
pitches = []
lengths = []
for i in range(len(onset_boundaries)-1):
    pitches.append(estimate_pitch2(x, onset_boundaries, i, sr))
    lengths.append(pitch_length(x, onset_boundaries, i, sr))
    


We then convert the frequency of each note to a string of the note name with octave removed


In [None]:
notes = [librosa.hz_to_note(i) for i in pitches]
formatted_notes = [note[0] if note[1] != '#' else note[0:2] for note in notes ]
length_dict = {}
for i in range(len(formatted_notes)):
    try:
        length_dict[formatted_notes[i]].append(lengths[i])
    except:
        length_dict[formatted_notes[i]] =[lengths[i]]
    
    

We then calculate the total time each note is played in the composition

In [None]:
profile = {'A':0, 'A#':0, 'B':0, 'C':0, 'C#':0, 'D':0, 'D#':0,'E':0, 'F':0, 'F#':0 ,'G':0, 'G#':0}
for i in length_dict.keys():
    time = 0
    for j in length_dict[i]:
        time += j
    
    profile[i] = time/sr
print(profile)

In [None]:
major_profile = [6.35,2.23,3.48,2.33,4.38,4.09,2.52,5.19,2.39,3.66,2.29,2.88]
minor_profile = [6.33,2.68,3.52,5.38,2.60,3.53,2.54,4.75,3.98,2.69,3.34,3.17]
note_list = ['A','A#','B','C','C#','D','D#','E','F','F#','G','G#']

In [None]:
def calc_coefficient(x,y):
    x_mean = numpy.mean(x)
    y_mean = numpy.mean(y)
    numerator = 0
    denominator = 0
    x_summation = 0
    y_summation = 0
    for i in range(len(x)):
        numerator += (x[i]-x_mean)*(y[i]-y_mean)
    for i in range(len(x)):
        x_summation += (x[i]-x_mean)**2
        y_summation += (y[i]-y_mean)**2
    return(numerator/(sqrt(x_summation*y_summation)))
    
        

The final step in key detection is to loop over each key profile and calulate its correlation(Pearson Corr with our note durations. The key profile that has the highest correlation with our note distribution will be our best match.

In [None]:
matches = {}
for index in range(len(note_list)):
    new_profile = []
    for i in note_list[index:]:
        new_profile.append(profile[i])
    for i in note_list[:index]:
        new_profile.append(profile[i])
    #print(new_profile)
    #print(calc_coefficient(new_profile,major_profile))
    matches[calc_coefficient(new_profile,major_profile)] = ''.join([str(note_list[index]),'_major'])
    matches[calc_coefficient(new_profile,minor_profile)] = ''.join([str(note_list[index]),'_minor'])
    
best_match = max(matches.keys())
key =matches[best_match]
print(key)
print(best_match)

We then find the BPM (Beats per Minute) which is the second factor we will use in our nearest neighbor search. We do this using a librosa utility.

In [None]:
bpm = librosa.beat.tempo(x)
print(round(int(bpm)))

Next we use a naive K-Nearest Neighbors search to find most closely related songs in our library. This is complicated by the fact that distance for keys is circular, A minor is first in our list and G# major is last in our list but they have a distance of 1. To account for this we compute both the distance from point1 to point2 in the traditional sense and then compute the distance "around the back" and return the minimum of the two.

In [None]:
key_to_value = ['A_minor','A_major','A#_minor','A#_major','B_minor','B_major','C_minor','C_major','C#_minor','C#_major','D_minor','D_major','D#_minor','D#_major','E_minor','E_major','F_minor','F_major','F#_minor','F#_major','G_minor','G_major','G#_minor','G#_major']
def euclidean_dist(point1, point2):
    x_dist = int(point1[1]) - int(point2[1])
    if key_to_value.index(point1[0]) < key_to_value.index(point2[0]):
        y_dist = min(key_to_value.index(point2[0]) - key_to_value.index(point1[0]), (len(key_to_value)-key_to_value.index(point2[0]))+key_to_value.index(point1[0]))
    elif key_to_value.index(point1[0]) > key_to_value.index(point2[0]):
        y_dist = min(key_to_value.index(point1[0]) - key_to_value.index(point2[0]), (len(key_to_value)-key_to_value.index(point1[0]))+key_to_value.index(point2[0]))
    else:
        y_dist = 0
    return(sqrt((x_dist**2)+(y_dist**2)))

In [None]:
songlist = [['./musicdr/chopin1.wav', 'G#_major', 135], ['./musicdr/chopin2.wav', 'G#_major', 89], ['./musicdr/haydn1.wav', 'G#_major', 129], ['./musicdr/haydn2.wav', 'G#_major', 129], ['./musicdr/haydn3.wav', 'G#_major', 117], ['./musicdr/mozart1.wav', 'A_major', 103], ['./musicdr/mozart2.wav', 'A_major', 143], ['./musicdr/mozart3.wav', 'A_major', 123], ['./musicdr/schu1.wav', 'A_minor', 129], ['./musicdr/schu2.wav', 'A_minor', 103], ['./musicdr/schu3.wav', 'A_minor', 123], ['./musicdr/bee1.wav', 'A#_major', 107], ['./musicdr/bee2.wav', 'A#_major', 112], ['./musicdr/bee3.wav', 'A#_major', 92], ['./musicdr/bee4.wav', 'A#_major', 135], ['./musicdr/bee5.wav', 'A#_major', 161], ['./musicdr/bee6.wav', 'A#_major', 129], ['./musicdr/bee8.wav', 'A#_major', 172], ['./musicdr/mozart4.wav', 'A#_major', 129], ['./musicdr/mozart5.wav', 'A#_major', 112], ['./musicdr/mozart6.wav', 'A#_major', 161], ['./musicdr/mozart7.wav', 'A#_major', 161], ['./musicdr/mozart8.wav', 'A#_major', 92], ['./musicdr/mozart9.wav', 'A#_major', 143], ['./musicdr/schu4.wav', 'A#_major', 95], ['./musicdr/schu5.wav', 'A#_major', 129], ['./musicdr/schu6.wav', 'A#_major', 135], ['./musicdr/chopin3.wav', 'A#_minor', 95], ['./musicdr/chopin4.wav', 'A#_minor', 112], ['./musicdr/chopin5.wav', 'A#_minor', 117], ['./musicdr/chopin6.wav', 'A#_minor', 123], ['./musicdr/chopin7.wav', 'A#_minor', 112], ['./musicdr/bach1.wav', 'C_major', 151], ['./musicdr/brahms1.wav', 'C_major', 112], ['./musicdr/brahms2.wav', 'C_major', 78], ['./musicdr/brahms3.wav', 'C_major', 172], ['./musicdr/brahms4.wav', 'C_major', 129], ['./musicdr/haydn4.wav', 'C_major', 89], ['./musicdr/haydn5.wav', 'C_major', 112], ['./musicdr/haydn6.wav', 'C_major', 112], ['./musicdr/haydn7.wav', 'C_major', 161], ['./musicdr/haydn8.wav', 'C_major', 103], ['./musicdr/haydn9.wav', 'C_major', 99], ['./musicdr/mozart10.wav', 'C_major', 135], ['./musicdr/mozart11.wav', 'C_major', 92], ['./musicdr/mozart12.wav', 'C_major', 172], ['./musicdr/mozart13.wav', 'C_major', 135], ['./musicdr/mozart14.wav', 'C_major', 112], ['./musicdr/mozart15.wav', 'C_major', 95], ['./musicdr/schu7.wav', 'C_major', 143], ['./musicdr/schu8.wav', 'C_major', 123], ['./musicdr/schu9.wav', 'C_major', 89], ['./musicdr/schu10.wav', 'C_major', 112], ['./musicdr/wald1.wav', 'C_major', 103], ['./musicdr/wald2.wav', 'C_major', 89], ['./musicdr/wald3.wav', 'C_major', 107], ['./musicdr/bach2.wav', 'C_minor', 129], ['./musicdr/bee9.wav', 'C_minor', 184], ['./musicdr/bee10.wav', 'C_minor', 69], ['./musicdr/bee11.wav', 'C_minor', 99], ['./musicdr/pa1.wav', 'C_minor', 151], ['./musicdr/pa2.wav', 'C_minor', 107], ['./musicdr/pa3.wav', 'C_minor', 198], ['./musicdr/mond1.wav', 'C#_minor', 143], ['./musicdr/mond2.wav', 'C#_minor', 103], ['./musicdr/mond3.wav', 'C#_minor', 161], ['./musicdr/rac1.wav', 'C#_minor', 61], ['./musicdr/bach3.wav', 'D_major', 143], ['./musicdr/haydn10.wav', 'D_major', 95], ['./musicdr/haydn11.wav', 'D_major', 129], ['./musicdr/haydn12.wav', 'D_major', 123], ['./musicdr/mozart16.wav', 'D_major', 143], ['./musicdr/mozart17.wav', 'D_major', 89], ['./musicdr/mozart18.wav', 'D_major', 95], ['./musicdr/schu11.wav', 'D_major', 172], ['./musicdr/schu12.wav', 'D_major', 129], ['./musicdr/schu13.wav', 'D_major', 151], ['./musicdr/schu14.wav', 'D_major', 112], ['./musicdr/bee12.wav', 'D#_major', 123], ['./musicdr/bee13.wav', 'D#_major', 123], ['./musicdr/bee14.wav', 'D#_major', 107], ['./musicdr/bee15.wav', 'E_minor', 151], ['./musicdr/bee16.wav', 'E_minor', 143], ['./musicdr/ba1.wav', 'F_major', 83], ['./musicdr/haydn13.wav', 'F_major', 143], ['./musicdr/haydn14.wav', 'F_major', 123], ['./musicdr/haydn15.wav', 'F_major', 117], ['./musicdr/mozart19.wav', 'F_major', 143], ['./musicdr/mozart20.wav', 'F_major', 143], ['./musicdr/mozart21.wav', 'F_major', 99], ['./musicdr/ap1.wav', 'F_minor', 117], ['./musicdr/ap2.wav', 'F_minor', 99], ['./musicdr/ap3.wav', 'F_minor', 143], ['./musicdr/haydn16.wav', 'G_major', 184], ['./musicdr/haydn17.wav', 'G_major', 161], ['./musicdr/haydn18.wav', 'G_major', 172], ['./musicdr/haydn19.wav', 'G_major', 99], ['./musicdr/haydn20.wav', 'G_major', 103], ['./musicdr/haydn21.wav', 'G_major', 107], ['./musicdr/ba2.wav', 'G_minor', 129], ['./musicdr/ba3.wav', 'G_major', 123], ['./musicdr/chopin8.wav', 'G_major', 123]]

Here we find the most closely related songs in database using naive k-nearest neighbors search that calculates the euclidean distance from our original piece to all pieces in our database and returns the k nearest neighbors.

In [None]:
k = 3
distances = {}
for song in songlist:
    distances[euclidean_dist([key,bpm],(song[1],song[2]))] = song[0]
neighbors = []
for neighbor in range(k):
    neighbors.append(distances.pop(min(distances.keys())))
neighbors.append(original)

Now that we have all the songs we are going to pull from we can begin our recomposition. For our recomposition we loop over each song creating windows of size window_size and estimating their tempo. We then combine these windows into samples or or all possible groupings of contigous windows of size sample_size. We then compute the average tempo and the std of each sample. We then divide these into 3 groups slow, medium, or fast based off their tempo with samples having low std's first. The idea is that the lower the std the lower changes in speed is across the sample indicating that all windows inside of it are likely stylistically simillar. We can then choose these stylistically simillar samples for recomposition based off of their tempo.

In [None]:
activity = []
window_size = 2

for song in neighbors:
    x, sr = librosa.load(song) 
    onset_samples = librosa.onset.onset_detect(x,
                                               sr=sr, units='samples', 
                                               hop_length=100, 
                                               backtrack=False,
                                               pre_max=20,
                                               post_max=20,
                                               pre_avg=100,
                                               post_avg=100,
                                               delta=0.2,
                                               wait=0)
    onset_boundaries = numpy.concatenate([[0], onset_samples, [len(x)]])
   
    
    index = 0
    for i in range(0,int(len(x)/sr),window_size):
        window_start=(i*sr)
        window_end=(i+window_size)*sr
        count=0
        while(onset_boundaries[index] < window_end):
            count+=1
            index+=1
            if(index == len(onset_boundaries)):
                count = 0
                break

        activity.append([count,window_start,song])
#print(activity)

In [None]:
sample_size = 3
sample_metrics = []

for i in range(len(activity)-sample_size):
    sample_metrics.append([numpy.std([window[0] for  window in activity[i:i+3]]),numpy.mean([window[0] for  window in activity[i:i+3]]),activity[i][1],activity[i][2]])

    
maximum = max([sample[1] for sample in sample_metrics])
minimum = min([sample[1] for sample in sample_metrics])
difference = maximum-minimum
slow = [sample for sample in sample_metrics if (0<sample[1] < (minimum + (difference/3)))]
medium = [sample  for sample in sample_metrics if (minimum + (difference/3)) <= sample[1] < (minimum + (2*(difference/3)))]
fast = [sample for sample in sample_metrics if (minimum + (2*(difference/3)) <= sample[1] )]

