# Programming Assignment 1
## Genre Classification using Locality Sensitive Hashing (LSH)


In [107]:
#Imports

import pandas as pd
import numpy as np
from tqdm import tqdm

### Data Loading and Preprocessing 

In [108]:
"""

"""
# Load data
df_tracks = pd.read_csv('tracks.csv', index_col=0, header=[0, 1])
df_tracks = df_tracks[df_tracks['set']['subset'] == 'medium']
df_features = pd.read_csv('features.csv', index_col=0, header=[0, 1, 2])

# Filter by genres
df_tracks = df_tracks[df_tracks['track']['genre_top'].isin(['Hip-Hop', 'Pop', 'Folk', 'Rock', 'Experimental', 'International', 'Electronic', 'Instrumental'])]

# Split df_tracks into training, testing, and validation sets
df_tracks_train = df_tracks[df_tracks.iloc[:, 30] == 'training']
df_tracks_test = df_tracks[df_tracks.iloc[:, 30] == 'test']
df_tracks_validation = df_tracks[df_tracks.iloc[:, 30] == 'validation']

# Match features with tracks for training, testing, and validation
df_features_train = df_features[df_features.index.isin(df_tracks_train.index)]
df_features_test = df_features[df_features.index.isin(df_tracks_test.index)]
df_features_validation = df_features[df_features.index.isin(df_tracks_validation.index)]

# Extract feature values
X_train = df_features_train.values
X_test = df_features_test.values
X_validation = df_features_validation.values

# Extract genre labels
y_train = df_tracks_train['track']['genre_top']
y_test = df_tracks_test['track']['genre_top']
y_validation = df_tracks_validation['track']['genre_top']

# create smaller test subset from test
X_test_small = X_test[:100]
y_test_small = y_test[:100]

In [109]:
print(y_train)
print(X_train)

track_id
3              Hip-Hop
134            Hip-Hop
136               Rock
139               Folk
198               Folk
              ...     
155297    Instrumental
155298            Folk
155306            Folk
155307    Experimental
155314            Rock
Name: genre_top, Length: 11912, dtype: object
[[ 1.88896334e+00  7.60539293e-01  3.45296562e-01 ...  0.00000000e+00
   1.71672380e+00  6.93301633e-02]
 [ 9.18444753e-01  6.74147248e-01  5.77818275e-01 ...  0.00000000e+00
   1.80610597e+00  5.46228550e-02]
 [ 9.15000617e-01 -6.43476248e-01 -4.60507214e-01 ...  3.41796875e-03
   8.05020452e-01  1.69045236e-02]
 ...
 [ 6.44815028e-01 -8.79404128e-01 -1.14923191e+00 ...  4.88281250e-03
   3.17907929e+00  2.20229235e-02]
 [-4.46937442e-01  1.23500383e+00 -2.50854611e-01 ...  1.95312500e-03
   1.99608481e+00  4.30976301e-02]
 [-1.21936493e-01 -3.48523021e-01 -5.55810153e-01 ...  6.34765625e-03
   1.98907959e+00  1.92883536e-02]]


### Random Projection Matrix

In [110]:
# r_i = rowsize, r_j) = columsize
def generate_random_matrix(r_i, r_j):
    rij = np.random.choice([-1, 0, 1], size=(r_i, r_j), p=[1/6, 2/3, 1/6])
    return np.sqrt(3) * rij

### Hashtable generator function

We use the transpose of the Random Projection Matrix to reduce the dimensionality  and determine the orientation of each track's data relative to the hyperplanes by using the dot Product of the feature matrix and the transposed Random Projection Matrix. 
Then we use the binary representations of the orientations as a bucket and put in the tracks accordingly. 
$ \begin{cases} 
0 & \text{ if } x < 0 \\
1 & \text{ else}
\end{cases}
$ 
We can do this because of $\mathbf{a} \cdot \mathbf{b} = \|\mathbf{a}\| \|\mathbf{b}\| \cos(\theta)$ positive means on one side and negative on the other.
This whole process represents one hashtable.

In [111]:
"""
The binary representations are of length l.
And the number of hashtables we creat is equal to n.
"""
def hashtable_generator(X, l=64, n=2):
    hash_tables_and_matrices = []  
    for _ in range(n):
        buckets = {}
        random_matrix = generate_random_matrix(l, X.shape[1])
        X_dot = np.dot(X, random_matrix.T)
        X_dot = X_dot > 0
        X_dot = X_dot.astype(int)

        for i in range(len(X_dot)):
            hash_str = ''.join(X_dot[i].astype(str))
            if hash_str not in buckets:
                buckets[hash_str] = []
            buckets[hash_str].append(i)
        
        hash_tables_and_matrices.append((buckets, random_matrix))
        
    
    return hash_tables_and_matrices


### Similar Songs Finder
In this step we use the computed hash_tables and the according matrices to find all similar songs of the input song. 
> A music track is defined as similar if it is in the same bucket as $t_i$ in one of the $n$ hash tables.


In [137]:
import itertools

def find_similar_songs(song_input, hash_tables_and_matrices):
    similar_songs_indices = set()

    # First, try to find an exact match in any of the hash table and matrix combinations
    for buckets, random_matrix in hash_tables_and_matrices:
        song_projected = np.dot(song_input, random_matrix.T) > 0
        song_hash = ''.join(song_projected.astype(int).astype(str))

        if song_hash in buckets:
            similar_songs_indices.update(buckets[song_hash])

    # If no exact match is found in any combination, try to find the closest 10 buckets
    if len(similar_songs_indices) == 0:
        print("empty bucket")
        closest_bucket_names = []
        min_hamming_distances = []

        for buckets, random_matrix in hash_tables_and_matrices:
            song_projected = np.dot(song_input, random_matrix.T) > 0
            song_hash = ''.join(song_projected.astype(int).astype(str))

            bucket_hamming_distances = [(hamming_distance(song_hash, bucket_name), bucket_name) for bucket_name in buckets.keys()]
            bucket_hamming_distances.sort(key=lambda x: x[0])
            closest_bucket_names.extend([bucket_name for _, bucket_name in bucket_hamming_distances[:10]])
            min_hamming_distances.extend([hamming_dist for hamming_dist, _ in bucket_hamming_distances[:10]])

        closest_bucket_names = list(dict.fromkeys(closest_bucket_names))  # Remove duplicates
        min_hamming_distances = list(dict.fromkeys(min_hamming_distances))  # Remove duplicates

        for bucket_name, min_hamming_distance in zip(closest_bucket_names, min_hamming_distances):
            for buckets, _ in hash_tables_and_matrices:
                if bucket_name in buckets and buckets[bucket_name]:
                    similar_songs_indices.update(buckets[bucket_name])
                    break

    return list(similar_songs_indices)

def hamming_distance(str1, str2):
    """Calculate the Hamming distance between two binary strings"""
    return sum(c1 != c2 for c1, c2 in zip(str1, str2))

In [113]:
for item in range(len(X_test)):
    test = (find_similar_songs(X_test[0], hashtable_generator(X_train)))
    #print(X_test[0])
    print(y_test.iloc[0])
    #print(X_train[0])
    print(y_train.iloc[0])
    print(y_train.iloc[1])
    print(y_train.iloc[2])
    
    break

Rock
Hip-Hop
Hip-Hop
Rock


### Distance Computation of Similar Songs
This function computes the distance of all similar Songs to the input Song.

In [114]:
def compute_distances(X, song_input, similar_songs, metric="euclid", cut=10):
    filtered_songs = []
    if metric == "euclid":
        for element in similar_songs:
            distance = np.linalg.norm(X[element] - song_input)
            filtered_songs.append((element, distance))
    elif metric == "cosine":
        for element in similar_songs:
            # cosine similarity
            dot_product = np.dot(X[element], song_input)
            norm_song = np.linalg.norm(X[element])
            norm_input = np.linalg.norm(song_input)
            similarity = dot_product / (norm_song * norm_input)
            
            # From similarity to distance (cosine distance)
            distance = 1 - similarity
            filtered_songs.append((element, distance))
    else:
        raise ValueError("Invalid metric specified. Use 'euclid' or 'cosine'.")
    
    sorted_songs = sorted(filtered_songs, key=lambda x: x[1])
    if cut is not None:
        sorted_songs = sorted_songs[:cut]
    
    return [index for index, _ in sorted_songs]

### Getting the Genre by Majority vote

In [115]:
def determine_genre_by_majority_vote(song_indices, Y):
    genres = []
    for index in song_indices:
        genres.append(Y.iloc[index])
    if len(genres) == 0:
        print("No similar songs found.")
        return 'Rock'
    return max(set(genres), key=genres.count)


In [116]:
def find_song_genre(song, X,Y, hashtables = None, l=64, n=2, cut=10, metric="euclid"):
    if hashtables is None:
        hashtables = hashtable_generator(X,l, n)
    similar_songs = find_similar_songs(song, hashtables)
    nearest_neighbours = compute_distances(X,song, similar_songs, metric, cut)
    genre =  determine_genre_by_majority_vote(nearest_neighbours, Y)
    
    return genre 

In [125]:
def test_and_validation_accuracy_with_find_matching_songs_multiple_optimized(l = 64,n = 2,cut = 10, metric="euclid"):
    hashtables = hashtable_generator(X_train, l, n)
    correct = 0
    for index in tqdm(range(len(X_test)), desc="Progress Test Set"):
        song = X_test[index]
        genre = find_song_genre(song, X_train, y_train, hashtables, l, n, cut, metric)
        if genre == y_test.iloc[index]:
            correct += 1
    for index in tqdm(range(len(X_validation)), desc="Progress Validation Set"):
        song = X_validation[index]
        genre = find_song_genre(song, X_train, y_train, hashtables, l, n, cut, metric)
        if genre == y_validation.iloc[index]:
            correct += 1
            
    accuracy = correct / (len(X_test) + len(X_validation))
    
    print("Combined accuracy: ", accuracy)
    return accuracy

test_and_validation_accuracy_with_find_matching_songs_multiple_optimized()

Progress Test Set:  66%|██████▌   | 1006/1535 [00:32<00:13, 39.00it/s]

No similar songs found.


Progress Test Set: 100%|██████████| 1535/1535 [00:48<00:00, 31.86it/s]
Progress Validation Set: 100%|██████████| 1495/1495 [00:49<00:00, 30.21it/s]

Test accuracy:  0.6155115511551155





0.6155115511551155

In [134]:
def test_accuracy_with_find_matching_songs_multiple_optimized(l = 64,n = 2,cut = 10, metric="euclid"):
    hashtables = hashtable_generator(X_train, l, n)
    correct = 0
    for index in tqdm(range(len(X_test)), desc="Progress"):
        song = X_test[index]
        genre = find_song_genre(song, X_train, y_train, hashtables, l, n, cut, metric)
        if genre == y_test.iloc[index]:
            correct += 1

    accuracy = correct / len(X_test)
    print("Test accuracy: ", accuracy)
    return accuracy

#test_accuracy_with_find_matching_songs_multiple_optimized()

In [139]:
def find_best_parameters_test():
    best_accuracy = 0
    best_parameters = None
    for l in [32, 64, 128]:
        for n in [2, 4, 6, 8]:
            for cut in [5, 10, 15]:
                for metric in ["euclid", "cosine"]:
                    print("Parameters: l=", l, "n=", n, "cut=", cut, "metric=", metric)
                    accuracy = test_accuracy_with_find_matching_songs_multiple_optimized(l, n, cut, metric)
                    if accuracy > best_accuracy:
                        best_accuracy = accuracy
                        best_parameters = (l, n, cut, metric)
    print("Best parameters: ", best_parameters)
    print("Best accuracy: ", best_accuracy)
    
find_best_parameters_test()

Parameters: l= 128 n= 2 cut= 5 metric= euclid


Progress:   1%|          | 10/1535 [00:00<00:24, 61.48it/s]

empty bucket


Progress:   4%|▍         | 63/1535 [00:00<00:09, 147.46it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:   6%|▋         | 97/1535 [00:00<00:11, 121.95it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  13%|█▎        | 195/1535 [00:01<00:06, 201.38it/s]

empty bucket
empty bucket
empty bucket


Progress:  22%|██▏       | 339/1535 [00:01<00:04, 250.87it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  24%|██▍       | 368/1535 [00:01<00:04, 235.45it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  26%|██▌       | 394/1535 [00:02<00:06, 177.80it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  29%|██▊       | 438/1535 [00:02<00:06, 175.70it/s]

empty bucket
empty bucket


Progress:  34%|███▎      | 517/1535 [00:02<00:04, 222.78it/s]

empty bucket
empty bucket


Progress:  37%|███▋      | 567/1535 [00:02<00:04, 209.50it/s]

empty bucket
empty bucket


Progress:  38%|███▊      | 589/1535 [00:02<00:04, 199.61it/s]

empty bucket
empty bucket
empty bucket


Progress:  40%|████      | 615/1535 [00:03<00:04, 195.85it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  43%|████▎     | 653/1535 [00:03<00:06, 134.82it/s]

empty bucket
empty bucket
empty bucket


Progress:  45%|████▌     | 695/1535 [00:03<00:05, 162.40it/s]

empty bucket
empty bucket


Progress:  50%|████▉     | 760/1535 [00:03<00:03, 211.37it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  53%|█████▎    | 811/1535 [00:04<00:03, 214.29it/s]

empty bucket
empty bucket


Progress:  54%|█████▍    | 833/1535 [00:04<00:04, 161.43it/s]

empty bucket


Progress:  56%|█████▌    | 861/1535 [00:04<00:04, 155.16it/s]

empty bucket


Progress:  59%|█████▉    | 906/1535 [00:04<00:03, 176.46it/s]

empty bucket
empty bucket
empty bucket


Progress:  60%|██████    | 925/1535 [00:05<00:05, 103.87it/s]

empty bucket


Progress:  63%|██████▎   | 973/1535 [00:05<00:04, 129.12it/s]

empty bucket
empty bucket


Progress:  64%|██████▍   | 989/1535 [00:05<00:05, 107.38it/s]

empty bucket
empty bucket


Progress:  67%|██████▋   | 1036/1535 [00:06<00:04, 107.48it/s]

empty bucket
empty bucket


Progress:  69%|██████▉   | 1059/1535 [00:06<00:03, 120.49it/s]

empty bucket
empty bucket
empty bucket


Progress:  70%|██████▉   | 1073/1535 [00:06<00:06, 76.01it/s] 

empty bucket
empty bucket


Progress:  72%|███████▏  | 1102/1535 [00:07<00:04, 89.74it/s]

empty bucket
empty bucket


Progress:  75%|███████▌  | 1155/1535 [00:07<00:03, 121.24it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  77%|███████▋  | 1188/1535 [00:07<00:02, 125.05it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  81%|████████  | 1237/1535 [00:07<00:01, 162.26it/s]

empty bucket
empty bucket


Progress:  82%|████████▏ | 1255/1535 [00:08<00:01, 156.88it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  84%|████████▍ | 1288/1535 [00:08<00:01, 134.69it/s]

empty bucket
empty bucket
empty bucket


Progress:  93%|█████████▎| 1424/1535 [00:08<00:00, 291.06it/s]

empty bucket
empty bucket
empty bucket


Progress:  95%|█████████▍| 1455/1535 [00:09<00:00, 205.51it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  96%|█████████▋| 1480/1535 [00:09<00:00, 197.91it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  99%|█████████▉| 1523/1535 [00:09<00:00, 162.09it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress: 100%|██████████| 1535/1535 [00:09<00:00, 158.60it/s]

empty bucket
empty bucket
empty bucket
empty bucket





Test accuracy:  0.5771986970684039
Parameters: l= 128 n= 2 cut= 5 metric= cosine


Progress:   1%|          | 10/1535 [00:00<00:20, 75.40it/s]

empty bucket
empty bucket


Progress:   4%|▍         | 61/1535 [00:00<00:13, 109.38it/s]

empty bucket
empty bucket


Progress:   5%|▍         | 72/1535 [00:00<00:14, 101.61it/s]

empty bucket


Progress:   5%|▌         | 83/1535 [00:00<00:14, 103.59it/s]

empty bucket
empty bucket
empty bucket


Progress:   7%|▋         | 110/1535 [00:01<00:16, 84.21it/s]

empty bucket
empty bucket
empty bucket


Progress:  13%|█▎        | 195/1535 [00:01<00:06, 195.76it/s]

empty bucket
empty bucket


Progress:  18%|█▊        | 279/1535 [00:01<00:04, 270.87it/s]

empty bucket
empty bucket


Progress:  21%|██        | 316/1535 [00:01<00:04, 295.19it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  24%|██▍       | 374/1535 [00:02<00:06, 191.28it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  26%|██▌       | 399/1535 [00:02<00:05, 194.25it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  29%|██▊       | 441/1535 [00:02<00:06, 159.58it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  32%|███▏      | 486/1535 [00:03<00:06, 158.83it/s]

empty bucket
empty bucket
empty bucket


Progress:  35%|███▍      | 536/1535 [00:03<00:05, 195.13it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  38%|███▊      | 589/1535 [00:03<00:04, 189.54it/s]

empty bucket
empty bucket
empty bucket


Progress:  41%|████      | 632/1535 [00:03<00:05, 176.83it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  46%|████▌     | 703/1535 [00:04<00:03, 239.58it/s]

empty bucket


Progress:  52%|█████▏    | 805/1535 [00:04<00:02, 292.41it/s]

empty bucket
empty bucket
empty bucket
empty bucket
empty bucket
empty bucket
empty bucket


Progress:  57%|█████▋    | 878/1535 [00:04<00:02, 229.61it/s]

empty bucket
empty bucket
empty bucket


Progress:  61%|██████    | 940/1535 [00:05<00:02, 244.72it/s]

empty bucket
empty bucket


Progress:  63%|██████▎   | 967/1535 [00:05<00:02, 243.58it/s]

empty bucket
empty bucket


Progress:  65%|██████▍   | 993/1535 [00:05<00:02, 193.67it/s]

empty bucket
empty bucket


Progress:  67%|██████▋   | 1033/1535 [00:05<00:03, 143.70it/s]

empty bucket
empty bucket


Progress:  69%|██████▉   | 1066/1535 [00:06<00:03, 133.52it/s]

empty bucket
empty bucket


Progress:  70%|███████   | 1081/1535 [00:06<00:04, 94.54it/s] 

empty bucket
empty bucket


Progress:  71%|███████   | 1093/1535 [00:06<00:05, 84.52it/s]

empty bucket
empty bucket


Progress:  72%|███████▏  | 1103/1535 [00:06<00:05, 73.65it/s]

empty bucket
empty bucket
empty bucket
empty bucket
empty bucket


Progress:  73%|███████▎  | 1119/1535 [00:07<00:07, 52.13it/s]

empty bucket
empty bucket


Progress:  74%|███████▎  | 1130/1535 [00:07<00:07, 57.78it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  76%|███████▌  | 1159/1535 [00:07<00:04, 83.17it/s]

empty bucket
empty bucket
empty bucket


Progress:  76%|███████▌  | 1170/1535 [00:07<00:04, 81.83it/s]

empty bucket
empty bucket
empty bucket
empty bucket
empty bucket


Progress:  78%|███████▊  | 1200/1535 [00:08<00:03, 91.12it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  80%|███████▉  | 1222/1535 [00:08<00:03, 95.37it/s]

empty bucket
empty bucket
empty bucket


Progress:  87%|████████▋ | 1338/1535 [00:08<00:00, 294.25it/s]

empty bucket
empty bucket


Progress:  91%|█████████▏| 1401/1535 [00:08<00:00, 256.81it/s]

empty bucket
empty bucket
empty bucket


Progress:  93%|█████████▎| 1430/1535 [00:08<00:00, 238.69it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  97%|█████████▋| 1489/1535 [00:09<00:00, 217.52it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  99%|█████████▊| 1514/1535 [00:09<00:00, 160.30it/s]

empty bucket
empty bucket


Progress: 100%|██████████| 1535/1535 [00:09<00:00, 156.53it/s]


Test accuracy:  0.5850162866449511
Parameters: l= 128 n= 2 cut= 10 metric= euclid


Progress:   3%|▎         | 39/1535 [00:00<00:07, 206.11it/s]

empty bucket
empty bucket


Progress:   6%|▌         | 91/1535 [00:00<00:07, 184.23it/s]

empty bucket
empty bucket
empty bucket


Progress:  10%|█         | 158/1535 [00:00<00:07, 188.21it/s]

empty bucket


Progress:  14%|█▎        | 210/1535 [00:01<00:06, 218.72it/s]

empty bucket
empty bucket
empty bucket


Progress:  17%|█▋        | 254/1535 [00:01<00:07, 179.60it/s]

empty bucket


Progress:  18%|█▊        | 277/1535 [00:01<00:06, 188.33it/s]

empty bucket
empty bucket


Progress:  20%|██        | 313/1535 [00:01<00:09, 128.51it/s]

empty bucket


Progress:  23%|██▎       | 358/1535 [00:02<00:09, 121.97it/s]

empty bucket
empty bucket


Progress:  24%|██▍       | 371/1535 [00:02<00:10, 109.59it/s]

empty bucket
empty bucket
empty bucket


Progress:  26%|██▌       | 393/1535 [00:02<00:15, 76.12it/s] 

empty bucket
empty bucket


Progress:  27%|██▋       | 412/1535 [00:03<00:13, 82.02it/s]

empty bucket
empty bucket


Progress:  30%|██▉       | 454/1535 [00:03<00:09, 111.64it/s]

empty bucket
empty bucket


Progress:  33%|███▎      | 505/1535 [00:03<00:06, 165.11it/s]

empty bucket


Progress:  35%|███▌      | 538/1535 [00:03<00:05, 189.35it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  36%|███▋      | 558/1535 [00:03<00:06, 143.18it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  39%|███▊      | 594/1535 [00:04<00:07, 131.72it/s]

empty bucket
empty bucket
empty bucket


Progress:  40%|███▉      | 610/1535 [00:04<00:08, 114.73it/s]

empty bucket
empty bucket
empty bucket


Progress:  41%|████      | 624/1535 [00:04<00:11, 80.13it/s] 

empty bucket
empty bucket
empty bucket


Progress:  43%|████▎     | 661/1535 [00:05<00:09, 96.60it/s]

empty bucket
empty bucket


Progress:  46%|████▌     | 701/1535 [00:05<00:07, 105.99it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  48%|████▊     | 744/1535 [00:05<00:05, 141.02it/s]

empty bucket
empty bucket
empty bucket


Progress:  52%|█████▏    | 805/1535 [00:06<00:04, 160.57it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  57%|█████▋    | 874/1535 [00:06<00:03, 178.55it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  58%|█████▊    | 893/1535 [00:06<00:04, 157.66it/s]

empty bucket
empty bucket


Progress:  67%|██████▋   | 1021/1535 [00:06<00:02, 252.03it/s]

empty bucket
empty bucket


Progress:  72%|███████▏  | 1104/1535 [00:07<00:01, 262.20it/s]

empty bucket
empty bucket


Progress:  75%|███████▌  | 1154/1535 [00:07<00:02, 144.74it/s]

empty bucket
empty bucket


Progress:  78%|███████▊  | 1190/1535 [00:08<00:02, 131.98it/s]

empty bucket
empty bucket


Progress:  79%|███████▉  | 1213/1535 [00:08<00:02, 133.37it/s]

empty bucket


Progress:  81%|████████  | 1242/1535 [00:08<00:02, 127.55it/s]

empty bucket
empty bucket


Progress:  83%|████████▎ | 1268/1535 [00:08<00:02, 98.80it/s] 

empty bucket
empty bucket


Progress:  89%|████████▊ | 1361/1535 [00:09<00:01, 147.40it/s]

empty bucket
empty bucket
empty bucket


Progress:  91%|█████████ | 1400/1535 [00:09<00:00, 158.37it/s]

empty bucket
empty bucket
empty bucket


Progress:  94%|█████████▎| 1438/1535 [00:10<00:00, 152.78it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  96%|█████████▌| 1477/1535 [00:10<00:00, 151.15it/s]

empty bucket
empty bucket
empty bucket


Progress:  98%|█████████▊| 1508/1535 [00:10<00:00, 108.97it/s]

empty bucket
empty bucket
empty bucket


Progress:  99%|█████████▉| 1521/1535 [00:10<00:00, 96.25it/s] 

empty bucket
empty bucket
empty bucket
empty bucket
empty bucket


Progress: 100%|██████████| 1535/1535 [00:11<00:00, 134.87it/s]


Test accuracy:  0.5908794788273616
Parameters: l= 128 n= 2 cut= 10 metric= cosine


Progress:   4%|▍         | 66/1535 [00:00<00:04, 342.30it/s]

empty bucket


Progress:   7%|▋         | 101/1535 [00:00<00:04, 308.72it/s]

empty bucket
empty bucket
empty bucket


Progress:  14%|█▍        | 217/1535 [00:00<00:04, 313.83it/s]

empty bucket


Progress:  16%|█▋        | 251/1535 [00:00<00:04, 275.21it/s]

empty bucket
empty bucket
empty bucket


Progress:  20%|██        | 307/1535 [00:01<00:05, 223.85it/s]

empty bucket
empty bucket


Progress:  22%|██▏       | 331/1535 [00:01<00:09, 133.25it/s]

empty bucket
empty bucket


Progress:  24%|██▍       | 371/1535 [00:01<00:09, 124.52it/s]

empty bucket
empty bucket
empty bucket
empty bucket
empty bucket


Progress:  25%|██▌       | 386/1535 [00:02<00:15, 72.60it/s] 

empty bucket
empty bucket


Progress:  27%|██▋       | 411/1535 [00:02<00:14, 75.70it/s]

empty bucket


Progress:  29%|██▉       | 444/1535 [00:03<00:12, 90.18it/s]

empty bucket
empty bucket
empty bucket


Progress:  33%|███▎      | 499/1535 [00:03<00:07, 139.21it/s]

empty bucket
empty bucket
empty bucket


Progress:  35%|███▌      | 544/1535 [00:03<00:06, 161.81it/s]

empty bucket
empty bucket


Progress:  41%|████      | 629/1535 [00:03<00:04, 216.63it/s]

empty bucket
empty bucket
empty bucket
empty bucket
empty bucket


Progress:  44%|████▍     | 681/1535 [00:04<00:05, 148.71it/s]

empty bucket
empty bucket
empty bucket


Progress:  47%|████▋     | 724/1535 [00:04<00:07, 111.55it/s]

empty bucket
empty bucket


Progress:  55%|█████▍    | 842/1535 [00:05<00:02, 259.10it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  59%|█████▉    | 907/1535 [00:05<00:02, 223.35it/s]

empty bucket
empty bucket
empty bucket
empty bucket
empty bucket


Progress:  61%|██████    | 935/1535 [00:05<00:03, 158.88it/s]

empty bucket
empty bucket


Progress:  65%|██████▍   | 993/1535 [00:06<00:03, 176.66it/s]

empty bucket
empty bucket
empty bucket


Progress:  70%|██████▉   | 1069/1535 [00:06<00:01, 258.40it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  76%|███████▌  | 1167/1535 [00:06<00:01, 273.60it/s]

empty bucket
empty bucket


Progress:  78%|███████▊  | 1204/1535 [00:07<00:01, 248.43it/s]

empty bucket
empty bucket
empty bucket


Progress:  81%|████████  | 1236/1535 [00:07<00:01, 233.15it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  82%|████████▏ | 1264/1535 [00:07<00:01, 163.10it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  84%|████████▍ | 1287/1535 [00:07<00:01, 150.54it/s]

empty bucket
empty bucket


Progress:  91%|█████████ | 1395/1535 [00:08<00:00, 227.47it/s]

empty bucket
empty bucket
empty bucket


Progress:  93%|█████████▎| 1421/1535 [00:08<00:00, 220.97it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  96%|█████████▌| 1470/1535 [00:08<00:00, 192.90it/s]

empty bucket
empty bucket


Progress:  99%|█████████▊| 1514/1535 [00:08<00:00, 177.89it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress: 100%|██████████| 1535/1535 [00:09<00:00, 170.41it/s]


empty bucket
Test accuracy:  0.6019543973941368
Parameters: l= 128 n= 2 cut= 15 metric= euclid


Progress:   1%|          | 15/1535 [00:00<00:11, 136.80it/s]

empty bucket
empty bucket


Progress:   8%|▊         | 117/1535 [00:00<00:07, 185.49it/s]

empty bucket


Progress:  21%|██        | 315/1535 [00:01<00:05, 226.96it/s]

empty bucket
empty bucket


Progress:  26%|██▌       | 392/1535 [00:01<00:05, 196.26it/s]

empty bucket
empty bucket
empty bucket
empty bucket
empty bucket


Progress:  34%|███▎      | 518/1535 [00:02<00:05, 190.25it/s]

empty bucket


Progress:  39%|███▊      | 594/1535 [00:03<00:06, 137.50it/s]

empty bucket
empty bucket


Progress:  41%|████      | 627/1535 [00:03<00:06, 138.37it/s]

empty bucket
empty bucket


Progress:  42%|████▏     | 642/1535 [00:03<00:06, 139.64it/s]

empty bucket
empty bucket
empty bucket


Progress:  46%|████▌     | 709/1535 [00:03<00:05, 148.85it/s]

empty bucket
empty bucket


Progress:  53%|█████▎    | 809/1535 [00:04<00:06, 119.88it/s]

empty bucket
empty bucket
empty bucket


Progress:  57%|█████▋    | 868/1535 [00:05<00:06, 109.58it/s]

empty bucket


Progress:  61%|██████    | 929/1535 [00:05<00:04, 137.39it/s]

empty bucket
empty bucket


Progress:  64%|██████▍   | 986/1535 [00:05<00:03, 163.69it/s]

empty bucket
empty bucket


Progress:  70%|███████   | 1076/1535 [00:06<00:04, 105.75it/s]

empty bucket


Progress:  75%|███████▍  | 1147/1535 [00:07<00:04, 91.07it/s] 

empty bucket
empty bucket


Progress:  78%|███████▊  | 1190/1535 [00:07<00:03, 97.71it/s]

empty bucket
empty bucket


Progress:  83%|████████▎ | 1273/1535 [00:08<00:01, 146.71it/s]

empty bucket
empty bucket


Progress:  93%|█████████▎| 1431/1535 [00:10<00:01, 66.77it/s] 

empty bucket


Progress:  97%|█████████▋| 1488/1535 [00:11<00:00, 106.28it/s]

empty bucket
empty bucket


Progress: 100%|██████████| 1535/1535 [00:11<00:00, 133.51it/s]


Test accuracy:  0.6149837133550489
Parameters: l= 128 n= 2 cut= 15 metric= cosine


Progress:   1%|          | 10/1535 [00:00<00:22, 66.90it/s]

empty bucket
empty bucket


Progress:   4%|▎         | 57/1535 [00:00<00:13, 105.75it/s]

empty bucket


Progress:   7%|▋         | 101/1535 [00:00<00:08, 160.21it/s]

empty bucket
empty bucket


Progress:  20%|██        | 314/1535 [00:02<00:07, 170.36it/s]

empty bucket
empty bucket


Progress:  25%|██▍       | 383/1535 [00:03<00:07, 159.71it/s]

empty bucket
empty bucket
empty bucket
empty bucket


Progress:  28%|██▊       | 434/1535 [00:03<00:05, 195.95it/s]

empty bucket
empty bucket
empty bucket


Progress:  33%|███▎      | 502/1535 [00:03<00:05, 199.16it/s]

empty bucket


Progress:  40%|███▉      | 612/1535 [00:04<00:04, 228.38it/s]

empty bucket
empty bucket
empty bucket


Progress:  43%|████▎     | 658/1535 [00:04<00:04, 192.62it/s]

empty bucket
empty bucket


Progress:  47%|████▋     | 720/1535 [00:04<00:04, 193.61it/s]

empty bucket
empty bucket


Progress:  51%|█████▏    | 789/1535 [00:05<00:03, 209.17it/s]

empty bucket
empty bucket


Progress:  55%|█████▍    | 837/1535 [00:05<00:03, 213.71it/s]

empty bucket
empty bucket
empty bucket


Progress:  57%|█████▋    | 880/1535 [00:05<00:03, 198.89it/s]

empty bucket


Progress:  62%|██████▏   | 947/1535 [00:05<00:02, 205.35it/s]

empty bucket


Progress:  65%|██████▍   | 992/1535 [00:06<00:02, 203.92it/s]

empty bucket
empty bucket


Progress:  68%|██████▊   | 1046/1535 [00:06<00:03, 124.16it/s]

empty bucket


Progress:  72%|███████▏  | 1104/1535 [00:07<00:02, 163.21it/s]

empty bucket


Progress:  75%|███████▌  | 1157/1535 [00:07<00:02, 149.17it/s]

empty bucket


Progress:  77%|███████▋  | 1189/1535 [00:07<00:02, 145.68it/s]

empty bucket


Progress: 100%|██████████| 1535/1535 [00:09<00:00, 158.16it/s]


empty bucket
empty bucket
empty bucket
Test accuracy:  0.6201954397394137
Parameters: l= 128 n= 4 cut= 5 metric= euclid
