In [2]:
### uses vgg.json file to create binary images of the ground truth fretboard segmentations

import cv2 as cv
import json
import numpy as np
import os
from matplotlib import pyplot as plt
from PIL import Image

# expects fretboard dataset to be in data folder on same level as tabs_generator folder
train_str = "test" # change to train or test to perform on respective data
dataset_path = "../data/fretboard_dataset/"
images_path = f"../data/fretboard_dataset/fretboard_frames_{train_str}/"
image_fns = os.listdir(images_path)
json_fn = os.path.join(dataset_path, f"fretboard_labels_{train_str}_vgg.json")
f = open(json_fn)
data = json.load(f) # data is dict of json contents
k = list(data.keys())

for i, fn in enumerate(image_fns):
    img_path = os.path.join(images_path, fn)
    out_path = os.path.join(images_path, "annotated_"+fn)
    img = np.asarray(Image.open(img_path)) # loads images w/ range of 0:255
    x_list = data[k[i]]["regions"]["0"]["shape_attributes"]["all_points_x"] # list of rectangle coordinates
    y_list = data[k[i]]["regions"]["0"]["shape_attributes"]["all_points_y"]
    x_list.pop(-1) # remove duplicate of first coordinate from end of lists
    y_list.pop(-1)
    poly_corners = np.asarray([list(zip(x_list, y_list))], dtype=np.int32) # repackage into np array w/ correct dtype
    new_img = np.zeros((img.shape[0], img.shape[1]), np.uint8) # create empty binary image mask
    new_img = cv.fillPoly(new_img, poly_corners, 255) # fill polygon of rectangle coordinates
    Image.fromarray(new_img).save(out_path) # save output image

In [1]:
%matplotlib inline
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
import os
import json
from scipy.ndimage import median_filter

dataset_path = "../data/fretboard_dataset/"
garrett_path = "../data/new_dataset/garrett_amogus/"
frames_path = "../data/fretboard_dataset/fretboard_frames_train/"
test_frames_path = "../data/fretboard_dataset/fretboard_frames_test/33_4.png" # canny doesn't find the strings very well for this guitar
data = {}
out = "../data/fretboard_dataset/train.json"
p = frames_path
frames_fn_list = os.listdir(p)
train_fns = []
for f in frames_fn_list:
    if "annotated" not in f:
        train_fns.append(f)

frame_fn = train_fns[111]
for frame_fn in train_fns:
    img = cv.imread(os.path.join(p, frame_fn))
    gray = np.uint8(cv.cvtColor(img, cv.COLOR_BGR2GRAY))
    gamma = 0.5
    lookUpTable = np.empty((1,256), np.uint8)
    for i in range(256): # gamma correction for contrast enhancement
        lookUpTable[0,i] = np.clip(pow(i / 255.0, gamma) * 255.0, 0, 255)
        grayCE = cv.LUT(gray, lookUpTable)
    blurred = cv.GaussianBlur(grayCE, (0,0), 2.0)
    unsharp = cv.addWeighted(grayCE, 3.0, blurred, -2, 0) # unsharp masking
    # plt.figure(figsize=(12,12))
    # plt.imshow(gray.astype(np.uint8), cmap="gray")
    # plt.show()
    # plt.figure(figsize=(12,12))
    # plt.imshow(unsharp.astype(np.uint8), cmap="gray")
    # plt.show()
    canny = cv.Canny(unsharp.astype(np.uint8), 150, 170, apertureSize=3, L2gradient=True)
    cdstP = cv.cvtColor(canny, cv.COLOR_GRAY2RGB)
    linesP = cv.HoughLinesP(canny, 1, np.pi / 180, 50, None, 6, 10)
    linesP = linesP[:400,:,:] # get top N lines
    linesP = linesP.ravel().tolist() # vectorize coordinates
    data[frame_fn] = linesP
    # if linesP is not None:
    #     for i in range(0, len(linesP)):
    #         l = linesP[i][0]
    #         cv.line(cdstP, (l[0], l[1]), (l[2], l[3]), (0,0,255), 3, cv.LINE_AA)

with open(out, "w") as outfile:
    json.dump(data, outfile)
# plt.figure(figsize=(12,12))
# plt.imshow(cdstP, cmap="gray")
# plt.title("Probabilistic Hough Transform")
# plt.show()


Idea for finding the fretboard:
1. create ideal locations of the fretboard markers
2. detect features using corner detection or blob detection
3. keep top N features
4. feed these features' coordinates into a neural network to find fretboard??

In [13]:
import cv2 as cv
import json
import numpy as np
import os
import random
from matplotlib import pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

# paths
dataset_path = "../data/fretboard_dataset/"
train_path = os.path.join(dataset_path, "fretboard_frames_train")
json_fn = os.path.join(dataset_path, f"fretboard_labels_train_vgg.json")
image_fns = os.listdir(images_path)
train_fns = []
for f in image_fns:
    if "annotated" not in f:
        train_fns.append(f)

def load_random_sample():
    f = random.choice(train_fns)
    img = cv.imread(os.path.join(train_path, f))
    gray = np.uint8(cv.cvtColor(img, cv.COLOR_BGR2GRAY))
    gamma = 1.5
    lookUpTable = np.empty((1,256), np.uint8)
    for i in range(256): # gamma correction for contrast enhancement
        lookUpTable[0,i] = np.clip(pow(i / 255.0, gamma) * 255.0, 0, 255)
        grayCE = cv.LUT(gray, lookUpTable)
    lap = cv.Laplacian(gray, cv.CV_64F)
    sharp = grayCE - 0.3 * lap # unsharp masking
    plt.figure(figsize=(12,12))
    plt.imshow(sharp.astype(np.uint8), cmap="gray")
    plt.show()
    canny = cv.Canny(sharp.astype(np.uint8), 200, 250, apertureSize=3, L2gradient=True)
    cdstP = cv.cvtColor(canny, cv.COLOR_GRAY2RGB)
    linesP = cv.HoughLinesP(canny, 1, np.pi / 180, 50, None, 2, 15)
    linesP = linesP[:400,:,:] # get top N lines
    if linesP is not None:
        for i in range(0, len(linesP)):
            l = linesP[i][0]
            cv.line(cdstP, (l[0], l[1]), (l[2], l[3]), (0,0,255), 3, cv.LINE_AA)

[599, 911, 937, 631]
