In [58]:
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.mlab as mlab

from collections import Counter

%matplotlib notebook


In [59]:
def peaks_to_fingerprints (local_peaks: np.ndarray, n=15):
    """Takes in the array local_peaks of freq, time for each peak
    for all of the local peaks in a section of a song and takes in
    fanout value n.
    
    It will return fingerprints for each peak entered in an ndarray 
    """
    
    pairs = form_peak_pairs(local_peaks)
    peaks = points_to_data(pairs)

    fingerprint = np.array([peaks_to_fanout(
        peaks[
            np.where(np.multiply(pairs[:,0,0] == i[0], pairs[:,0,1] == i[1]))],
        n) for i in local_peaks if np.size(
        np.where(np.multiply(pairs[:,0,0] == i[0], pairs[:,0,1] == i[1])))<= n and 
                            np.size(np.where(np.multiply(pairs[:,0,0] == i[0], pairs[:,0,1] == i[1]))) > 0])
    
    ## np.where(np.multiply(pairs[:,0,0] == i[0], pairs[:,0,1] == i[1])) is the indices of the peak-pair data for the peak in question i 
    ## compares the first item of peak i and the second item of peak i with the first and second items of each peak in the array
    
    """
    # GET RID OF FOOR LOOP
    for i in local_peaks: # each peak...
        indices = np.array(np.where(np.multiply(pairs[:,0,0] == i[0], pairs[:,0,1] == i[1]))) # index of peak pair data (row containing it) where first peak is peak i (can ignore the second peak since we are only looking forward)
        
        ### indices[i] --> (row) index of peak-pair P the array peaks (peaks[indices[i], : ] --> original data for peak-pair P)
        
        selected = peaks[indices, : ]
        
        if len(selected) >= n: # only going to add it if it's long enough
            fanout = peaks_to_fanout(selected, n)
    """
        
    ### return array of the fingerprints for each peak
    return fingerprint

In [60]:
def form_peak_pairs(local_peaks: np.ndarray):
    """Takes in the array local_peaks of any number of rows and two columns freq and time
    for each of the local peaks and returns an array of each peak-pair from these peaks
    """
    
    pairs = np.array([(peak1,peak2) for peak2 in local_peaks for peak1 in local_peaks if peak1[1] > peak2[1]])    
    
    return pairs

In [61]:
def points_to_data(points: np.ndarray):
    """Takes in an array of freq and time data for each peak in each peak-pair
    (where the first listed is one that occurs first) and returns a 2d array
    which contains three columns and number of rows equivalent to the number of 
    peak-pairs entered
        Col 1 --> frequency at point 1 (point 1 must occur first)
        Col 2 --> frequency at point 2
        Col 3 --> time elapsed between points
    """
    data = np.array([[pair[0,0], pair[1,0], pair[1,1]-pair[0,1]] for pair in points])
    return data

In [62]:
def peaks_to_fanout(selected_peaks: np.ndarray, n):
    """Takes in a 2d array selected_peaks of selected peak-pair
    data in the same formatting as the full array, and containing
    all of the peak-pairs with a particular peak as peak 1. Also
    takes in the fanout value n.
    
    Returns the fanout for this peak

                                  [[fi, fi+1, delta_t(i,i+1)]
                                   [fi, fi+2, delta_t(i,i+2)],
        fanout (for peak i)  =               ...,
                                   [fi, fi+n, delta_t(i,i+n)]]    
    """

    delta_fs = selected_peaks[:,1]-selected_peaks[:,0] # array of change in frequencies for our peak pairs
    delta_ts = selected_peaks[:,2] # array of change in times for our peak pairs

    ### for each index i,
    ###     selected_peaks[i] --> peak-pair P data (this is just a segment of the original data; just the peak-pairs contianing our particular peak)
    ###     delta_fs[i]       --> change in frequency for peak-pair P
    ###     delta_ts[i]       --> change in time for peak-pair P


    # an array of indices pointing towards our selected peak-pair data
    # (selected_peaks_sorted[i] --> index of i'th closest peak (index pointing towards our selected peak-pair data arrays))
    selected_peaks_sorted = sort_peaks(delta_fs, delta_ts) 

    #selected_peaks_sorted[0:n] are the indices of the peaks within the fanout
    fanout_inds = selected_peaks_sorted[0:n]
    fanout_peaks = selected_peaks[fanout_inds]

    ###    fanout for this peak = an array of [[fi, fi+1, delta_t(i,i+1)]
    ###                                        [fi, fi+2, delta_t(i,i+2)],
    ###                                                   ...,
    ###                                        [fi, fi+n, delta_t(i,i+n)]]
    
    fanout = fanout_peaks[:,0:3]
    return fanout

In [63]:
def sort_peaks (delta_fs: np.ndarray, delta_ts: np.ndarray):
    """Takes in two 1 dimensional nd arrays delta_fs and delta_ts
    of the same length which both point towards data from the same
    selected peak-pairs and for which the value of each at each same
    index point to the same individual peak-pair
        - this means that for each index i, delta_fs[i] and delta_ts[i]
          correspond to the same peak-pair
        - the delta_f and delta_t values should be from all of the
          peak-pairs for which a particular peak is the first peak in
          the pair
    
    It returns an nd array where the values of which represent the
    indices of delta_ts, delta_fs, and other arrays which point to the
    same data in the same order sorted by time then frequency (eg. the
    first value of the output array is the index of the smallest
    delta_t, second is the next smallest, etc. (where identical delta_ts
    will be ordered by smallest delta_f to greatest))
    """
    
    
    ### get indices sorted based on delta_t values of our selected data (sorted_t)
    sorted_t = np.argsort(delta_ts)

    # we need to make sure that any duplicate times are sorted by frequency:

    #finding the time values
    time_counts = Counter(delta_ts)
    
    arr_counts = np.array(list(time_counts.items())) # an array of the counts in the Counter time_counts
    duplicates_ind = np.where(arr_counts[:,1] > 1) # finding the indexes of where there are more than one of each time
    
    if len(duplicates_ind) > 0: # only continue to sort if there are duplicates
        duplicates_times = arr_counts[:,0][duplicates_ind] #finding the time values based on the indices in the counter

        # list of an array of indices of each duplicate time
        # (indices are the indices pointing towards data in delta_fs, delta_ts, and t_sorted, NOT the values in t_sorted which will be used as indices (to get those do t_sorted[unsorted_by_freq]))
        unsorted_by_freq = [list(np.where(delta_ts == time)[0]) for time in duplicates_times]

        # list of an array containing the new index values pointed to by each array of indices in unsorted_by_freq, sorted by frequency
        # (these values are the index values to put into the array of indices)
        # each item = reorder of the values of sorted_t by sorting by frequency for each index_arr in the unsorted_by_freq lsit
        sorted_by_freq = [list(sorted_t[index_arr][np.argsort(delta_fs[sorted_t[index_arr]])]) for index_arr in unsorted_by_freq]                
        # put back in:
        ## 'flattening' the lists so as to be able to index with them
        flat_unsorted = []
        flat_sorted = []
        [[flat_unsorted.append(inner_vals) for inner_vals in value] for value in unsorted_by_freq]
        [[flat_sorted.append(inner_vals) for inner_vals in value] for value in sorted_by_freq]
        ## putting these into sorted
        sorted_fully = sorted_t.copy()
        sorted_fully[flat_unsorted] = flat_sorted
        
        return sorted_fully

        Col 1 --> frequency at point 1 (point 1 must occur first)
        Col 2 --> frequency at point 2
        Col 3 --> time elapsed between points
        Col 4 --> tuple of point 1 (row, col) of the peak for identification purposes
        Col 5 --> tuple of point 2 (row, col)

In [64]:
test_peaks_arr = np.array([(4.5, 0.3), (5.6, 0.1), (2.1, 3.2), (4.3, 1.1), (3.4, 1.2), (0.9, 0.8),
                       (6.6, 2.4), (0.1, 2.0), (5.5, 3.3), (5.6, 2.1), (4.1, 1.2), (6.3, 3.1),
                       (0.4, 0.2), (0.8, 0.7), (6.7, 1.4), (1.1, 2.1)])

In [65]:
arr = np.array([(4.5, 0.3), (5.6, 0.1), (2.1, 3.2)])

In [66]:
peaks_to_fingerprints(test_peaks_arr)

  fingerprint = np.array([peaks_to_fanout(


array([array([[ 4.5,  5.6, -0.2],
              [ 4.5,  0.4, -0.1]]), array([[ 2.1,  5.6, -3.1],
                                           [ 2.1,  0.4, -3. ],
                                           [ 2.1,  4.5, -2.9],
                                           [ 2.1,  0.8, -2.5],
                                           [ 2.1,  0.9, -2.4],
                                           [ 2.1,  4.3, -2.1],
                                           [ 2.1,  3.4, -2. ],
                                           [ 2.1,  4.1, -2. ],
                                           [ 2.1,  6.7, -1.8],
                                           [ 2.1,  0.1, -1.2],
                                           [ 2.1,  5.6, -1.1],
                                           [ 2.1,  1.1, -1.1],
                                           [ 2.1,  6.6, -0.8],
                                           [ 2.1,  6.3, -0.1]]),
       array([[ 4.3,  5.6, -1. ],
              [ 4.3,  0.4, -0.9],
              

In [67]:
arr2 = np.array([[[1,6],
                  [3,6]],
                
                 [[1,6],
                  [1,8]]])

In [68]:
np.size(np.where(np.multiply(arr2[:,0,0] == [1,6][0], arr2[:,0,1] == [1,6][1])))

2