In [6]:
import re

def get_label(line):
    fields = line.split('\t')
    notes = [note for note in fields if 'note' in note] 
    notes_processed = []
    for note in notes:
        # ignore rests and gracenotes
        if 'gracenote' in note:
            pass
        else:
            # search for sharps and flats
            # redundant because we will try to recognize the clef key
            match = re.search(r'[A-Z][b#][0-9]', note)
            if match is not None:
                index = match.start()
                note = note[:index + 1] + note[index + 2:]
            notes_processed.append(note)
    return notes_processed

In [15]:
import os
import numpy as np
from PIL import Image
import shutil

data_dir_path = 'D:\\primus'
labels = []

index=0
for folder_name in os.listdir(data_dir_path):
    with open(data_dir_path + "\\" + folder_name + f"\\{folder_name}.semantic") as semantic_file:
        content = semantic_file.readline()
        # only take samples in G2 clef
        # and contains 1 clef only
        if 'clef-G2' in content and content.count('clef') == 1:
            labels.append(get_label(content))
            shutil.copy(data_dir_path + "\\" + folder_name + f"\\{folder_name}.png",
                        f"d:\\\primus_cleaned_data\\{index}.png")
            index += 1

FileNotFoundError: [Errno 2] No such file or directory: 'D:\\primus\\data\\data.semantic'

In [16]:
print(len(labels))

48040


In [53]:
print(labels[1])

['note-F4_eighth', 'note-B4_half.', 'note-E5_eighth', 'note-D5_eighth', 'note-D5_half.', 'note-G5_eighth', 'note-F5_eighth', 'note-F5_quarter', 'rest-sixteenth', 'note-F5_sixteenth', 'note-G5_sixteenth', 'note-A5_sixteenth', 'note-B5_sixteenth', 'note-F5_sixteenth', 'note-F5_sixteenth', 'note-F5_sixteenth', 'note-C6_sixteenth', 'note-F5_sixteenth', 'note-F5_sixteenth', 'note-F5_sixteenth']


In [19]:
# save list of labels to json
import json

with open('dataset\\labels.json', 'w') as f:
    json.dump(labels, f, indent=3)

In [20]:
# load list of labels from json
with open('dataset\\labels.json', 'r') as f:
    labels_loaded = json.load(f)

In [21]:
print(len(labels_loaded))

48040


In [6]:
# Use the template matching method of OpenCV and Tesseract
# to identify notes and box them
import os
import cv2
import pytesseract
import numpy as np

pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

def find_best_coeff(template, img2, res_max_global, increasing=True, decreasing=True, time=False):
    max_matches = 0
    best_coeff = 1
    template_copy = template

    ###################Decreasing:
    if decreasing:
        for i in range(3):
            template = template_copy
            img = img2.copy()
            # Scaling coeff in range [0.2, 1] with step = 0.1
            coeff = 1 - i/10.0
            template = cv2.resize(template, (0,0), fx=coeff, fy=coeff)
            w, h = template.shape[::-1]

            try:
                res = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED)
            except:
                continue
            res_max = res.max()

            threshold = res_max_global
            loc = np.where(res >= threshold)
            indices = set()
            for pt in zip(*loc[::-1]):
                x = int(pt[0] + w/2)
                upper_bound = x + 30
                lower_bound = x - 30
                found = False
                for i in range(lower_bound, upper_bound):
                    if i in indices: 
                        found = True
                        break
                if not found:
                    indices.add(x)
            if time:
                if res_max >= res_max_global:
                    res_max_global = res_max
                    best_coeff = coeff
            elif len(indices) > max_matches and res_max >= res_max_global:
                max_matches = len(indices)
                res_max_global = res_max
                best_coeff = coeff
                

    ###################Increasing:
    if increasing:
        for i in range(3):
            template = template_copy
            img = img2.copy()
            # Scaling coeff in range [1, 1.8] with step = 0.1
            coeff = 1 + i/10.0
            template = cv2.resize(template, (0,0), fx=coeff, fy=coeff)
            w, h = template.shape[::-1]
            if(template.shape[0] > img.shape[0]): break

            res = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED)
            res_max = res.max()

            threshold = res_max_global
            loc = np.where(res >= threshold)
            indices = set()
            for pt in zip(*loc[::-1]):
                x = int(pt[0] + w/2)
                upper_bound = x + 30
                lower_bound = x - 30
                found = False
                for i in range(lower_bound, upper_bound):
                    if(i in indices): 
                        found = True
                        break
                if not found:
                    indices.add(x)
            if len(indices) > max_matches and res_max >= res_max_global:
                max_matches = len(indices)
                res_max_global = res_max
                best_coeff = coeff

    return(res_max_global, best_coeff)

def staff_to_boxed_notes(file):
    img = cv2.imread(file, cv2.IMREAD_GRAYSCALE)
    indices = set()
    
    drawn_indices = set()
    list_pt = {}
    boxes = []
    # start_index = 0
    # time_indices = set()
    # best_res = 0
    
    # # detect time signature, then crop the staff
    # # to start from there
    # for dir in os.listdir('template\\time'):
    #     template = cv2.imread(f'template\\time\\{dir}', cv2.IMREAD_GRAYSCALE)
    #     template = cv2.resize(template,(0,0),fx=img.shape[0]/template.shape[0],fy=img.shape[0]/template.shape[0])
    #     template_copy = template.copy()
        
    #     res_max, coeff = find_best_coeff(template_copy, img, 0.7, increasing=False, time=True)
        
    #     template = cv2.resize(template,(0,0),fx=coeff,fy=coeff) #scale the template to the best coeff

    #     w, h = template.shape[::-1] # Get w and h of template
    #     res = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED) # Begin matching and get matches
    #     if res_max > best_res:
    #         best_res = res_max
    #         best_template = template
    # threshold = best_res
    # template = best_template
    # res = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED) # Begin matching and get matches
    # loc = np.where(res >= threshold) # With threshold to offset the difference between template and notes (with staff)
    
    # for pt in zip(*loc[::-1]):
    #     x = int(pt[0] + w/2)
    #     upper_bound = x + 10
    #     lower_bound = x - 10
    #     found = False
    #     for i in range(lower_bound, upper_bound):
    #         if(i in indices):
    #             found = True
    #             break
    #     if not found:
    #         list_pt[x] = pt
    #         time_indices.add(x)
    # if len(time_indices) > 0:
    #     for x in time_indices:
    #         pt = list_pt[x]
    #         start_index = pt[0]
    # img = img[0:img.shape[0],start_index + best_template.shape[1]:start_index + img.shape[1]]
    # list_pt = {}
    
    # cv2.imwrite("cropped.jpg", img)

    for dir in os.listdir('template\\roots'):
        # The template to be matched
        template = cv2.imread(f'template\\roots\\{dir}', cv2.IMREAD_GRAYSCALE)

        res_max, coeff = find_best_coeff(template, img, 0.8)
        template = cv2.resize(template,(0,0),fx=coeff,fy=coeff) #scale the tepmplate to the best coeff

        w, h = template.shape[::-1] # Get w and h of template

        try:
            res = cv2.matchTemplate(img, template, cv2.TM_CCOEFF_NORMED) # Begin matching and get matches
        except:
            continue
        if 'half-note' in dir or 'whole-note' in dir:
            threshold = res_max * 0.7
        else:
            threshold = res_max * 0.7
        loc = np.where(res >= threshold) # With threshold to offset the difference between template and notes (with staff)

        # Get a set of center indices of boxes for further testing
        
        for pt in zip(*loc[::-1]):
            x = int(pt[0] + w/2)
            upper_bound = x + 10
            lower_bound = x - 10
            found = False
            for i in range(lower_bound, upper_bound):
                if(i in indices):
                    found = True
                    break
            if not found:
                list_pt[x] = pt
                indices.add(x)
        for x in indices:
            pt = list_pt[x]
            # draw boxes in copy of image
            if x not in drawn_indices:
                boxes.append((x, cv2.resize(img[0:img.shape[0], pt[0] - int(w * 0.5):pt[0] + int(w * 1.5)], (49, 149))))
                # cv2.rectangle(img, (pt[0], 0), (pt[0] + int(w * 1.4), img.shape[0]), (0,0,255), 2)
                drawn_indices.add(x)
        # cv2.imwrite('boxed.jpg', img)
    boxes.sort(key=lambda x:x[0])
    boxes = [i[1] for i in boxes]
    return boxes

In [34]:
file = "d:\\primus_cleaned_data\\7.png"
notes_found = staff_to_boxed_notes(file)
print(len(notes_found))
print(notes_found[0].shape)
cv2.imwrite('box.jpg', notes_found[0])

17
(149, 40)


True

In [140]:
import json

test_index = 30
# load list of labelsfrom json
with open('dataset\\labels.json', 'r') as f:
    labels_loaded = json.load(f)
    
notes_count = sum(1 for token in labels_loaded[test_index] if 'note' in token and 'gracenote' not in token)
print(notes_count)
print(len(labels_loaded[test_index]))
print(labels_loaded[test_index])

16
17
['note-F5_half.', 'note-E5_eighth', 'note-D5_eighth', 'note-D5_half.', 'note-C5_eighth', 'note-B4_eighth', 'note-B4_half', 'note-B4_eighth', 'note-C5_eighth', 'note-C5_eighth', 'note-B4_eighth', 'note-A4_eighth', 'note-B4_eighth', 'gracenote-A4_eighth', 'note-G4_quarter', 'note-F4_quarter', 'rest-half']


In [45]:
import json
import joblib
import sys
from tqdm.notebook import tqdm

# load list of labelsfrom json
with open('dataset\\labels.json', 'r') as f:
    labels_loaded = json.load(f)
    
num = len(labels_loaded)
count = 0
correct_samples = []

def check_samples(i):
    file = f"d:\\primus_cleaned_data\\{i}.png"
    found_notes = list(staff_to_boxed_notes(file))
    notes_count = len(labels_loaded[i])
    print(f"checked {i}.png ", end='\r')
    if notes_count == len(found_notes):
        correct_samples.append(i)
        return 1
    else:
        return 0

for i in tqdm(range(num)):
    count += check_samples(i)
    print(f"correctly identified: {count}", end="\r")


  0%|          | 0/48040 [00:00<?, ?it/s]

correctly identified: 12137

KeyboardInterrupt: 

In [40]:
# save list of correct samples' indices to json
import json

with open('dataset\\matched_samples.json', 'w') as f:
    json.dump(correct_samples, f, indent=3)

In [7]:
import json
from tqdm.notebook import tqdm

test_index = 30
# load list of labelsfrom json
with open('dataset\\matched_samples.json', 'r') as f:
    matched_samples = json.load(f)
    
for i in tqdm(matched_samples):
    boxes = staff_to_boxed_notes(f'D:\\primus_cleaned_data\\{i}.png')
    count = 0
    for box in boxes:
        cv2.imwrite(f"D:\\primus_notes\\{i}_{count}.jpg", box)
        count += 1
    print(f"extracted notes in {i}.jpg", end='\r')

  0%|          | 0/41495 [00:00<?, ?it/s]

extracted notes in 48039.jpg