In [25]:
import random
import os
import csv
from dataclasses import dataclass

import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt # for making figures
%matplotlib inline

In [2]:
# Set device and CPU threads
from multiprocessing import cpu_count
torch.set_num_threads(cpu_count())
print(f'Using {torch.get_num_threads()} threads')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'Using device {device}')

Using 32 threads
Using device cuda:0


In [15]:
# Tokenizer
NOTES = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
NOT_VOICE_TOKEN = '<N>'
END_OF_FILE_TOKEN = '<EOF>'

ALLOWED_TOKENS = []
for octave in [2,3,4,5]:
    o = str(octave)
    for n in NOTES:
        ALLOWED_TOKENS.append(n+o)

stoi = {s:i+1 for i,s in enumerate(ALLOWED_TOKENS)}
stoi[NOT_VOICE_TOKEN] = 0
itos = {i:s for s,i in stoi.items()}
vocab_size = len(itos)

In [4]:
# 30 seconds of data + around 87 pitch readings per second = 2610 -> make it 2700
BLOCK_SIZE = 87 * 30 + 90

In [17]:
# CLASS NAMES - NOTE: Do not change numbering.
CLASS_NAMES = {
    'saveri': 0,
    'hemavati': 1
}

In [20]:
@dataclass
class PitchDataFile:
    file_path: str
    pitches: list
    

In [27]:
# For initial tests, keep entire data in memory. Keep only the pitch list.
pitch_data_dir = '../../data/simple-test/pitch_data'

X = []
pitch_data = {}

for class_name in os.listdir(pitch_data_dir):
    if class_name not in CLASS_NAMES:
        print(f'{class_name} not included for training')
        continue
    class_dir = os.path.join(pitch_data_dir, class_name)
    for file_name in os.listdir(class_dir):
        file_path = os.path.join(class_dir, file_name)
        data = []
        print(f'processing {file_path}')
        with open(file_path, 'r') as file:
            reader = csv.reader(file)
            for row in reader:
                if len(row) >= 3:
                    data.append(row[2])
            pd = PitchDataFile(file_path=file_path, pitches=data)
            sampling_data = [(file_path, i, CLASS_NAMES[class_name]) for i in range(len(data) - BLOCK_SIZE - 1)]
            X.extend(sampling_data)
            pitch_data[file_path] = pd

print(f'Total data: {len(X)}')

processing ../../data/simple-test/pitch_data/hemavati/aruna-hemavati_F3
processing ../../data/simple-test/pitch_data/hemavati/aruna-hemavati_F3_plus_3
processing ../../data/simple-test/pitch_data/hemavati/aruna-hemavati_F3_minus_1
processing ../../data/simple-test/pitch_data/hemavati/aruna-hemavati_F3_minus_2
processing ../../data/simple-test/pitch_data/hemavati/aruna-hemavati_F3_plus_2
processing ../../data/simple-test/pitch_data/hemavati/aruna-hemavati_F3_plus_1
processing ../../data/simple-test/pitch_data/saveri/tmk-saveri_D3_plus_2
processing ../../data/simple-test/pitch_data/saveri/tmk-saveri_D3_plus_3
processing ../../data/simple-test/pitch_data/saveri/tmk-saveri_D3_plus_1
processing ../../data/simple-test/pitch_data/saveri/tmk-saveri_D3
processing ../../data/simple-test/pitch_data/saveri/tmk-saveri_D3_minus_2
processing ../../data/simple-test/pitch_data/saveri/tmk-saveri_D3_minus_1
Total data: 1844748


In [31]:
random.shuffle(X)
train_size = int(0.8 * len(X))
dev_size = int(0.1 * len(X))

# Split the data
train = X[:train_size]
dev = X[train_size:train_size + dev_size]
test = X[train_size + dev_size:]

Xtrain, Ytrain = [tuple(t[:2]) for t in train], [t[2] for t in train]
Xdev, Ydev = [tuple(t[:2]) for t in dev], [t[2] for t in dev]
Xtest, Ytest = [tuple(t[:2]) for t in test], [t[2] for t in test]

print(f'Xtrain size: {len(Xtrain)}, Ytrain size: {len(Ytrain)}')
print(f'Xdev size: {len(Xdev)}, Ydev size: {len(Ydev)}')
print(f'Xtest size: {len(Xtest)}, Ytest size: {len(Ytest)}')

Xtrain size: 1475798, Ytrain size: 1475798
Xdev size: 184474, Ydev size: 184474
Xtest size: 184476, Ytest size: 184476


In [34]:
print(Xtrain[2:4])
print(Ytrain[2:4])

[('../../data/simple-test/pitch_data/hemavati/aruna-hemavati_F3_minus_2', 16920), ('../../data/simple-test/pitch_data/hemavati/aruna-hemavati_F3_plus_3', 9673)]
[1, 1]


['               total        used        free      shared  buff/cache   available',
 'Mem:           63474        8739       50357         162        5243       54734',
 'Swap:           2047           0        2047']