In [2]:
import numpy as np
import cv2
import os
from roipoly import roipoly
import sys
from numpy.linalg import inv, det
import math
import matplotlib.pyplot as plt

folder = "trainset"
IMAGE_NUM = 50
images =[]

# loading the masks and converting Image to YCrCb
for index in range(IMAGE_NUM):
    filename = os.listdir(folder)[index]

    masks = np.load("masks.npy")
    img = cv2.imread(os.path.join(folder, filename))  
    images.append(cv2.cvtColor(img, cv2.COLOR_BGR2YCR_CB))

# converting from list to array
images = np.asarray(images)

# sorting training set pixels
images_reshaped = np.reshape(images, (50,1080000,3))
masks_reshaped = np.reshape(masks, (50,1080000))

red_px=[]
non_red_px=[]

for index in range(IMAGE_NUM):
    for img_px, mask_px in zip(images_reshaped[index], masks_reshaped[index]):
        if mask_px == True:
            red_px.append(img_px)
        else:
            non_red_px.append(img_px)
            
red_px = np.asarray(red_px)
non_red_px = np.asarray(non_red_px)

# calculate mean and cov for each gaussian classifer
red_px_mean = np.mean(red_px,axis=0)
red_px_conv = np.cov(red_px.T)

non_red_px_mean = np.mean(non_red_px,axis=0)
non_red_px_conv = np.cov(non_red_px.T)



# train linear regression model distance 

# load distances from a .npy from previous labeling
distances = np.load("distances.npy")

training_area = []

# train the linear regression model with area of the minimum area bounding box for barrel in each image
for i in range(IMAGE_NUM):
    img, contours, hierarchy = cv2.findContours(masks[i,:,:].astype(np.uint8), cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
    for contour in contours:
        training_area.append(cv2.minAreaRect(contour)[1][0]*cv2.minAreaRect(contour)[1][1])

# pick up the '_' from images with two barrels, discard the additional barrels
for i, d in np.ndenumerate(distances):
    if "_" in d:
        distances[i] = d[0]

# type cast and convert to numpy array
distances = np.asarray(map(float,distances))
training_area = np.asarray(training_area)

# reshape into column vector
x = training_area.reshape(len(training_area),1)
Y = distances.reshape(len(distances),1)

# polynomial fitting
# X = np.hstack((np.ones((len(x),1)),np.power(x,1),np.power(x,2)))
# X_t = X.transpose((1,0))
# sol_p = np.dot(np.linalg.inv(np.dot(X_t,X)),np.dot(X_t,Y))

# third_order fitting
X = np.hstack((np.ones((len(x),1)),np.power(x,1),np.power(x,2),np.power(x,3)))
X_t = X.transpose((1,0))
sol_to = np.dot(np.linalg.inv(np.dot(X_t,X)),np.dot(X_t,Y))

# line fitting
# X = np.hstack((np.ones((len(x),1)),training_area.reshape((len(x),1))))
# X_t = X.transpose((1,0))
# soln = np.dot(np.linalg.inv(np.dot(X_t,X)),np.dot(X_t,Y))

# third_order fitting
X = np.hstack((np.ones((len(x),1)),np.power(x,1),np.power(x,2),np.power(x,3)))
X_t = X.transpose((1,0))
sol_to = np.dot(np.linalg.inv(np.dot(X_t,X)),np.dot(X_t,Y))

# plot training data
plt.scatter(training_area,distances)


# using the trained linear regression model to generate distances
def area2distance_train(area):
    
    weights = np.array([[  9.84862396e+00],
                         [ -5.62142184e-04],
                         [  1.17923139e-08],
                         [ -7.27030137e-14]])
    #x = [i for i in area]
    x = area
    d_est = weights[0] + weights[1]*x + weights[2]*(x**2) + weights[3]*(x**3)
    return d_est


x_list = [i for i in training_area]
y = area2distance_train(training_area)
y_list = [i for i in y]

# plot the outputs of the trained model
plt.scatter(x_list, y_list)
plt.show()

KeyboardInterrupt: 