In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys, traceback
import json
from collections import Counter

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from dataset import *

#sys.path.insert(0, '/mnt/d/projects/bassline_transcription')
sys.path.insert(0, '/scratch/users/udemir15/ELEC491/bassline_transcription')

import bassline_transcriber.transcription as transcription
from utilities import *

In [2]:
def load_data(data_params): 
    
    dataset_path, scale_type, M = data_params['dataset_path'], data_params['scale_type'], data_params['M']
    
    dataset_name = data_params['dataset_name'] +'_{}_M{}.csv'.format(scale_type, M)

    dataset_dir = os.path.join(dataset_path, dataset_name)

    df = pd.read_csv(dataset_dir, header=None)
    
    titles = df[0].tolist()
    
    # First column is the title
    X = df[df.columns[1:]].to_numpy()
    
    return X, titles

def bars_to_representation(bar, M, N_bars, key):
        
    midi_array = transcription.create_midi_array(bar, M, N_bars, silence_code=0)
      
    representation = transcription.encode_midi_array(midi_array, M, N_bars, key, silence_code=0, sustain_code=100)
    
    return representation

In [3]:
directories = get_directories('../data/directories.json')

track_dicts, track_titles = get_track_dicts(directories, 'TechHouse_total_track_dicts.json')

print('Total number of tracks in the dicts: {}'.format(len(track_dicts)))

Total number of tracks in the dicts: 10460


In [4]:
M = 8

data_params = {'dataset_path': '/scratch/users/udemir15/ELEC491/bassline_transcription/data/datasets/[28, 51]',
               'dataset_name': 'TechHouse_bassline_representations',
               'scale_type': 'min',
               'M': M}

X, df_titles = load_data(data_params)
print(len(df_titles))

4421


In [6]:
N_bars_repeat = 1
repeated_1, repeated_titles_1 = repeat_dataset(df_titles, track_dicts, N_bars_repeat, directories)

N_bars_repeat = 2
repeated_2, repeated_titles_2 = repeat_dataset(df_titles, track_dicts, N_bars_repeat, directories)

N_bars_repeat = 4 # actually no repetition, for consistency
repeated_4, repeated_titles_4 = repeat_dataset(df_titles, track_dicts, N_bars_repeat, directories)

new_representations = np.concatenate([repeated_4, repeated_2, repeated_1], axis=0)
new_titles = np.concatenate([repeated_titles_4, repeated_titles_2, repeated_titles_1], axis=0)
 
print(new_representations.shape)

Segment length: 128
Before concat: (17473, 16)
After concat: (17473, 64)
Segment length: 256
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
full silence
Before concat: (8769, 32)
After concat: (8769, 64)


In [17]:
df = df_from_codes(new_representations, new_titles)

df.head()


Codebook before correction:
[0, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 100]

Codebook after correction:
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]

Filtered Codes:
[]

0/30631 Tracks filtered out because of its notes!
Final Dataset size: 30631


Unnamed: 0,Title,0,1,2,3,4,5,6,7,8,...,54,55,56,57,58,59,60,61,62,63
0,RAMA7 - Smiling Faces (Original Mix),0,9,25,25,25,4,25,7,0,...,25,7,0,9,25,25,25,4,25,7
1,Mind Body & Soul (Original Mix),0,25,25,25,25,15,0,25,25,...,15,25,25,0,15,16,15,25,25,16
2,All I Need,0,25,25,25,24,0,25,12,25,...,25,25,25,25,0,12,25,25,25,25
3,Eydrian - What Is Your What is Mine (F.Gazza J...,0,10,0,25,25,12,0,10,9,...,0,25,10,25,0,12,0,9,0,12
4,Anatta - Kickback Afterhours (Original Mix),0,25,25,20,25,0,14,21,25,...,14,21,20,14,16,22,25,16,25,25


In [18]:
dataset_name = 'TechHouse_bassline_representations_repeated'

representation_path = os.path.join('..','data','datasets','[28, 51]','repeated')

min_title = dataset_name+'_min_M{}.csv'.format(M)
min_dir = os.path.join(representation_path, min_title)

df.to_csv(min_dir, index=False, header=False)