In [None]:
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
import os
import time
from skimage.measure import label, regionprops, regionprops_table
import pandas
from IPython.display import clear_output
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pickle

In [None]:
def fft_padding(img, kernel):
  p = img.shape[0] + kernel.shape[0] - 1
  q = img.shape[1] + kernel.shape[1] - 1
  new_img = np.zeros((p, q))
  new_img[0:img.shape[0], 0:img.shape[1]]=img
  new_kernel = np.zeros((p, q))
  new_kernel[0:kernel.shape[0], 0:kernel.shape[1]]=kernel
  return new_img, new_kernel


def convolve(img, kernel, iter):
  new_img, new_kernel = fft_padding(img, kernel)
  fft_img=np.fft.fft2(new_img)
  fft_kernel=np.fft.fft2(new_kernel)
  convolved_img=fft_img * (fft_kernel ** iter)
  convolved_img=np.round(np.fft.ifft2(convolved_img).real)
  return convolved_img


def processing(img):
  #noise reduction
  mean_filter = 1/49 * np.ones((7,7))
  smooth_img = convolve(img, mean_filter, 2)

  #edge detection
  sobel_x = np.array([[-5, 0, 5], [-5, 0, 5], [-5, 0, 5]])
  sobel_y = np.array([[-5, -5, -5], [0, 0, 0], [5, 5, 5]])
  sobel_x = convolve(smooth_img, sobel_x, 1)
  sobel_y = convolve(smooth_img, sobel_y, 1)
  gradient = np.sqrt(np.square(sobel_x) + np.square(sobel_y))
  thresh, ret=cv2.threshold(gradient, 50, 255, cv2.THRESH_BINARY)
  ret=cv2.dilate(ret, (5,5), iterations=2)
  ret=cv2.erode(ret, (9,9), iterations=2)
  return ret[8:, 8:]

In [None]:
#sphere classification
#use this cell to classify spheres manually
if not 'good_imgs' in locals():
  good_imgs = os.listdir(os.getcwd())
for img in good_imgs:
  if not os.path.isfile(img):
    good_imgs.remove(img)

properties = ['area','area_bbox',	'area_convex',	'area_filled',	'axis_major_length',	'axis_minor_length']
props_spheres = []
props_noise = []

for item in good_imgs:
  img=cv2.imread(item, 0)
  color_img=cv2.imread(item)

  processed_img = processing(img)
  contours, hierarchy = cv2.findContours(processed_img.astype('uint8'),\
                                         cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
  for contour in contours:
    if cv2.contourArea(contour) > 200:
      cv2.drawContours(color_img, contour, -1, (0,255,0), -1)
      x,y,w,h = cv2.boundingRect(contour)
      cv2_imshow(color_img[y:y+h, x:x+w])
      time.sleep(1)
      blank = np.zeros((processed_img.shape))
      blank = cv2.drawContours(blank.astype('uint8'), contour, -1, (255, 255, 255), 3)
      label_img = label(blank, connectivity=2)
      props = regionprops_table(label_img, properties=properties)
      verdict = input('approved (1), rejected (0)')
      if verdict == '1':
        props_spheres.append(props)
      else:
        props_noise.append(props)
      clear_output()

In [None]:
#combines and filter the resulting dataframes
measures=pandas.DataFrame()
for i in props_spheres:
  object_df=pandas.DataFrame(i)
  category = ['1' * object_df.shape[0]]
  object_df=pandas.concat([object_df, pandas.DataFrame(category)], axis=1)
  measures=pandas.concat([measures, object_df])


for j in props_noise:
  object_df=pandas.DataFrame(j)
  category = ['0' * object_df.shape[0]]
  object_df=pandas.concat([object_df, pandas.DataFrame(category)], axis=1)
  measures=pandas.concat([measures, object_df])
measures=measures.reset_index(drop=True)
measures.to_csv('measures.csv')

In [None]:
#training the random forest model
if not 'measures.csv' in locals():
  measures = pandas.read_csv('measures.csv')

properties = ['area','area_bbox',	'area_convex',	'area_filled',\
              'axis_major_length',	'axis_minor_length']

y = measures['0']
y.head()
X= measures[properties]
rf_model = RandomForestClassifier()
test_accuracy = []
for k in range(100):
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    stratify=y,
                                                    test_size=0.2,
                                                    random_state=k)
    rf_model.fit(X_train, y_train)
    row = (rf_model.score(X_test, y_test), k)
    test_accuracy.append(row)
    print(f"Train accuracy - :{rf_model.score(X_train, y_train):.3f}")
    print(f"Test accuracy - :{rf_model.score(X_test, y_test):.3f}")

print(max(test_accuracy, key= lambda x: x[0]))
chosen_k = max(test_accuracy, key= lambda x: x[0])[1]
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    stratify=y,
                                                    test_size=0.2,
                                                    random_state=chosen_k)
rf_model.fit(X_train, y_train)
pickle.dump(rf_model, open('rf_model.sav', 'wb'))

In [None]:
from google.colab import files
files.download('rf_model.sav')
files.download('measures.csv')