In [1]:
import os
import sys
while not os.getcwd().endswith('ml'):
    os.chdir('..')
sys.path.insert(0, os.getcwd())

In [2]:
import math
import copy
import cv2
import numpy as np
import random
from shutil import copyfile
from pathlib import Path
from matplotlib import pyplot as plt
from kaggle_problems.rosneft_proppant.workspace.helpers import get_random_color
from kaggle_problems.rosneft_proppant.workspace.common import r2prop_size, prop_size2r, bin2low, bin2high, TARGET_SHAPE
import pandas as pd

class GrayCircleContour:
    def __init__(self):
        self.msk = []
        for r in np.arange(0, 100):
            img = np.zeros(shape=(2 * r + 1, 2 * r + 1))
            cv2.circle(img, (r, r), r, 1, -1)

            self.msk.append(img)


    def get_msk(self, r):
        return self.msk[r]
circleContour = GrayCircleContour()


In [3]:
print(bin2low)
print(bin2high)

{'6': 32, '7': 27, '8': 23, '10': 19, '12': 16, '14': 14, '16': 12, '18': 10, '20': 8, '25': 7, '30': 6, '35': 5, '40': 4, '45': None, '50': 3, '60': None, '70': 2, '80': None, '100': 1}
{'6': 37, '7': 31, '8': 26, '10': 22, '12': 18, '14': 15, '16': 13, '18': 11, '20': 9, '25': 7, '30': 6, '35': 5, '40': 4, '45': None, '50': 3, '60': None, '70': 2, '80': None, '100': 1}


In [4]:
DATA_DIR = "kaggle_problems/rosneft_proppant/workspace/data/colored_main_area"
ORIGINAL_IMG_DIR = "kaggle_problems/rosneft_proppant/workspace/data/test"
DEBUG_IMG_DIR = "kaggle_problems/rosneft_proppant/data/debug"
CIRCLE_DIR = "kaggle_problems/rosneft_proppant/data/circles"

In [5]:
def get_circle_unicolor(img, x, y, r):
    x, y = y, x
    sub_img = get_masked_img(img, x, y, r)
    if (sub_img is None):
        return None
    
    sub_img = sub_img.astype(float)
    
    msk = circleContour.get_msk(r)
    
    cnt_in_circle = np.sum(msk[:, :, 0])
    e = np.sum(sub_img, axis=(0, 1)) / cnt_in_circle
    
    sub_img -= e[np.newaxis, np.newaxis, :]
    sub_img *= msk 
    
    d = math.sqrt(np.sum(sub_img ** 2) / cnt_in_circle / 3)
    return d

def is_circle_unicolor(img, x, y, r):
    value = get_circle_unicolor(img, x, y, r)
    if value is None:
        return False
    return value < 50

def in_range(l, s, r):
    return l <= s and s < r

In [6]:
def get_masked_img(img, x, y, r):
    x_min = x - r
    x_max = x + r + 1
    y_min = y - r
    y_max = y + r + 1
    if (not in_range(0, x_min, img.shape[0])) or \
            (not in_range(0, x_max, img.shape[0])) or \
            (not in_range(0, y_min, img.shape[1])) or \
            (not in_range(0, y_max, img.shape[1])):
        return None

    msk = circleContour.get_msk(r)
    sub_img = img[x_min:x_max, y_min:y_max]

    sub_img = (sub_img * msk).astype(dtype=int)
    return sub_img

In [7]:
def is_colored_circle(img, x, y, r):
    x, y = y, x
    sub_img = get_masked_img(img, x, y, r)
    if (sub_img is None):
        return False
    
    sub_img = sub_img.astype(float)
    msk = helpers.circleContour.get_msk(r)
    
    tmp = np.empty(shape=sub_img.shape)
    tmp[:, :, 0] = np.abs(sub_img[:, :, 0] - sub_img[:, :, 1])
    tmp[:, :, 1] = np.abs(sub_img[:, :, 1] - sub_img[:, :, 2])
    tmp[:, :, 2] = np.abs(sub_img[:, :, 2] - sub_img[:, :, 0])
    
    sub_img = tmp
    
    cnt_in_circle = np.sum(msk)
    
    e = np.sum(sub_img) / cnt_in_circle
    
    sub_img -= e
    sub_img *= msk 
    
    d = np.sum(sub_img ** 2) / cnt_in_circle
    #sreturn d
    return d > 3000
    
    

In [8]:
def get_brith_circle(img, hsv_img, x, y, r):
    x, y = y, x
    hsv_sub_img = get_masked_img(hsv_img, x, y, r)
    if (hsv_sub_img is None):
        return False
    
    hsv_sub_img = hsv_sub_img.astype(float)
    msk = circleContour.get_msk(r)
    
    mean = np.sum(hsv_sub_img[:, :, 1]) / np.sum(msk)
    return mean
    
def is_brith_circle(img, hsv_img, x, y, r): # Яркость
    return get_brith_circle(img, hsv_img, x, y, r) > 50


In [9]:
def get_black_circle(th, x, y, r):
    x, y = y, x
    sub_img = get_masked_img(th, x, y, r)
    if (sub_img is None):
        return False
    sub_img = sub_img.astype(float)
    msk = circleContour.get_msk(r)
    
    sub_img2 = get_masked_img(th, x, y, r - 2)
    if (sub_img2 is None):
        return False
    sub_img2 = sub_img2.astype(float)
    msk2 = circleContour.get_msk(r - 2)
    
    return (np.sum(sub_img == 0) - np.sum(sub_img2 == 0)) / (np.sum(msk == 1) - np.sum(msk2 == 1))

def is_black_circle(th, x, y, r):
    return get_black_circle(th, x, y, r) > 0.9


In [10]:
def get_flare_circle(th, x, y, r):
    x, y = y, x
    sub_img2 = get_masked_img(th, x, y, r - 2)
    if (sub_img2 is None):
        return False
    sub_img2 = sub_img2.astype(float)
    msk2 = circleContour.get_msk(r - 2)
    
    return np.sum(sub_img2 == 255) / np.sum(msk2)

def is_flare_circle(th, x, y, r):
    return get_flare_circle(th, x, y, r) > 0.4


In [11]:
def get_not_black_circle(th, x, y, r):
    x, y = y, x
    sub_img = get_masked_img(th, x, y, r)
    if (sub_img is None):
        return False
    sub_img = sub_img.astype(float)
    msk = circleContour.get_msk(r)
    
    sub_img2 = get_masked_img(th, x, y, r + 2)
    if (sub_img2 is None):
        return False
    sub_img2 = sub_img2.astype(float)
    msk2 = circleContour.get_msk(r + 2)
    
    return (np.sum(sub_img2 == 0) - np.sum(sub_img == 0)) / (np.sum(msk2 == 1) - np.sum(msk == 1))
    
def is_not_black_around_circle(th, x, y, r): # Яркость
    return get_not_black_circle(th, x, y, r) < 0.5


In [12]:
def get_gray_around_circle(img, hsv_img, x, y, r):
    x, y = y, x
    hsv_sub_img = get_masked_img(hsv_img, x, y, r)
    hsv_sub_img2 = get_masked_img(hsv_img, x, y, r + 2)
    if (hsv_sub_img is None or hsv_sub_img2 is None):
        return False
    
    hsv_sub_img = hsv_sub_img.astype(float)
    msk = circleContour.get_msk(r)
    msk2 = circleContour.get_msk(r + 2)
    
    cnt = np.sum(msk)
    cnt2 = np.sum(msk2)
    
    _sum = np.sum(hsv_sub_img[:, :, 1])
    _sum2 = np.sum(hsv_sub_img2[:, :, 1])
    return (_sum2 - _sum) / (cnt2 - cnt)
    
def is_gray_around_circle(img, hsv_img, x, y, r): # Яркость
    return get_gray_around_circle(img, hsv_img, x, y, r) < 30


In [13]:
def get_circles(img, hsv_img, gray, min_r, max_r, img_number):
    debug_dir = "{}/{}".format(DEBUG_IMG_DIR, img_number)
    Path(debug_dir).mkdir(exist_ok=True, parents=True) # create debug dir
    
    _, th = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)
    cv2.imwrite("{}/th.jpg".format(debug_dir), th)
    
    
    circles = cv2.HoughCircles(gray ,cv2.HOUGH_GRADIENT,5,minDist=min_r,
                            param1=200,param2=20,minRadius=min_r,maxRadius=max_r)
    img_with_circle = copy.deepcopy(original_img)
    
    circles = circles[0]
    print("Circle count: {}. r: {}-{}".format(len(circles), min_r, max_r))
    
    
#     circles = cv2.HoughCircles(gray,cv2.HOUGH_GRADIENT,1,minDist=bin2low['40'],
#                             param1=200,param2=1,minRadius=bin2low['40'],maxRadius=bin2high['25'])
#     circles = circles[0]
#     print("Circle count: {}. r: {}-{}".format(len(circles), bin2low['40'], bin2high['25']))
    

    filtered_circle = [(int(circle[0]), int(circle[1]), int(circle[2])) for circle in circles]

    filtered_circle = [circle for circle in filtered_circle if is_black_circle(th, circle[0], circle[1], circle[2])]
    print("After first filter: {}.".format(len(filtered_circle)))
    
    filtered_circle = [circle for circle in filtered_circle if 
                       is_flare_circle(th, circle[0], circle[1], circle[2]) or is_not_black_around_circle(th, circle[0], circle[1], circle[2])]
    print("After second filter: {}.".format(len(filtered_circle)))
    

    
    
#     filtered_circle = [circle for circle in filtered_circle if is_not_black_around_circle(img, hsv_img, circle[0], circle[1], circle[2])]
#     print("After second filter: {}.".format(len(filtered_circle)))
    
#     filtered_circle = [circle for circle in filtered_circle if is_brith_circle(img, hsv_img, circle[0], circle[1], circle[2])]
#     print("After first filter: {}.".format(len(filtered_circle)))


#     filtered_circle = [circle for circle in filtered_circle if is_circle_unicolor(img, circle[0], circle[1], circle[2])]
#     print("After second filter: {}.".format(len(filtered_circle)))

#     filtered_circle = [circle for circle in filtered_circle if is_gray_around_circle(img, hsv_img, circle[0], circle[1], circle[2])]
#     print("After third filter: {}.".format(len(filtered_circle)))

    
    th_with_circle = cv2.merge([th, th, th])
    color = get_random_color()
    for circle in filtered_circle:
        cv2.circle(img_with_circle, (circle[0], circle[1]), circle[2], color, 1)  # fill with 0 because of THRESH_BINARY_INV
        cv2.circle(th_with_circle, (circle[0], circle[1]), circle[2], color, 1)  # fill with 0 because of THRESH_BINARY_INV
                      
    cv2.imwrite("{}/img_with_circle.jpg".format(debug_dir), img_with_circle)
    cv2.imwrite("{}/th_with_circle.jpg".format(debug_dir), th_with_circle)
    return filtered_circle
    
def process_color_img(original_img, img_number):
    debug_dir = "{}/{}".format(DEBUG_IMG_DIR, img_number)
    Path(debug_dir).mkdir(exist_ok=True, parents=True) # create debug dir
    cv2.imwrite("{}/original_img.jpg".format(debug_dir), original_img)
    
    result_circles = []

    
    img_with_circle = copy.deepcopy(original_img)
    img = copy.deepcopy(original_img)
    
#     gaussian_3 = cv2.GaussianBlur(img, (3, 3), 20.0)
#     img = cv2.addWeighted(img, 1.5, gaussian_3, -0.5, 0, img)
    
    hsv_img = cv2.cvtColor(original_img, cv2.COLOR_RGB2HSV)
     
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    
    cv2.imwrite("{}/gray.jpg".format(debug_dir), gray)
    big_circles = get_circles(img, hsv_img, gray, bin2low['20'] + 1, bin2high['16'], img_number)
    
    small_circles = get_circles(img, hsv_img, gray, bin2low['40'], bin2high['25'] - 1, img_number)
    
    print("big_circles: {}, small_circles: {}".format(len(big_circles), len(small_circles)))
    
    if (len(big_circles) > len(small_circles)):
        return '16/20'
    return '20/40_pdcpd_bash_lab'
#         # Выделение кругов
#     circles = cv2.HoughCircles(gray,cv2.HOUGH_GRADIENT,1,minDist=bin2low['20'],
#                             param1=200,param2=1,minRadius=bin2low['20'],maxRadius=bin2high['16'])
#     circles = circles[0]
#     print("Circle count: {}. r: {}-{}".format(len(circles), bin2low['20'], bin2high['16']))
    
    
# #     circles = cv2.HoughCircles(gray,cv2.HOUGH_GRADIENT,1,minDist=bin2low['40'],
# #                             param1=200,param2=1,minRadius=bin2low['40'],maxRadius=bin2high['25'])
# #     circles = circles[0]
# #     print("Circle count: {}. r: {}-{}".format(len(circles), bin2low['40'], bin2high['25']))
    

#     filtered_circle = [(int(circle[0]), int(circle[1]), int(circle[2])) for circle in circles]

#     filtered_circle = [circle for circle in filtered_circle if is_brith_circle(img, hsv_img, circle[0], circle[1], circle[2])]
#     print("After first filter: {}.".format(len(filtered_circle)))


#     filtered_circle = [circle for circle in filtered_circle if is_circle_unicolor(img, circle[0], circle[1], circle[2])]
#     print("After second filter: {}.".format(len(filtered_circle)))

#     it += 1
#     result_circles.extend(filtered_circle)
        
#     return result_circles



In [14]:
train = pd.read_csv("kaggle_problems/rosneft_proppant/workspace/data/labels/train.csv")
COLORED_IMG_DIR = "kaggle_problems/rosneft_proppant/workspace/data/colored_main_area"

colored_img = [int(img[:-len(".jpg")]) for img in os.listdir(COLORED_IMG_DIR) if img.endswith('.jpg')]

colored_train = train[train.ImageId.isin(colored_img)]
#colored_train = colored_train[~colored_train.prop_count.isna()]
colored_train = colored_train[~colored_train['18'].isna()]

In [15]:
def increase_brightness(img, value):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)

    lim = 255 - value
    v[v > lim] = 255
    v[v <= lim] += value

    final_hsv = cv2.merge((h, s, v))
    img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
    return img

def decrease_brightness(img, value):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)

    lim = 255 - value
    v[v < value] = 0
    v[v >= value] -= value

    final_hsv = cv2.merge((h, s, v))
    img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
    return img

In [16]:
all_img = [str(img) + ".jpg" for img in colored_img]

predicted = {}
real = {}
for img_name in all_img:
    print(img_name)
    img_number, _ = img_name.split('.')
    original_img = cv2.imread("{}/{}".format(DATA_DIR, img_name))
    original_img = cv2.resize(original_img, (int(TARGET_SHAPE[1] / 1.5), int(TARGET_SHAPE[0] / 1.5)))
    
    original_img = decrease_brightness(original_img, 100)
    
    
    predicted_fraction = process_color_img(original_img, img_number)
    predicted[img_name] = predicted_fraction
    real[img_name] = str(colored_train[colored_train.ImageId==int(img_number)].fraction.values[0])
    print("predicted_fraction: {}, real_fraction: {}".
          format(predicted_fraction, str(colored_train[colored_train.ImageId==int(img_number)].fraction.values[0])))
#     for circle, i in zip(circles, range(len(circles))):
#         circle_img = get_masked_img(original_img, circle[1], circle[0], circle[2])
#         if circle_img is not None:
#             cv2.imwrite("{}/{}_{}.jpg".format(CIRCLE_DIR, img_number, i), circle_img)

189.jpg
Circle count: 6051. r: 9-13
After first filter: 18.
After second filter: 3.
Circle count: 652. r: 4-6
After first filter: 81.
After second filter: 31.
big_circles: 3, small_circles: 31
predicted_fraction: 20/40_pdcpd_bash_lab, real_fraction: 20/40_pdcpd_bash_lab
77.jpg
Circle count: 4886. r: 9-13
After first filter: 72.
After second filter: 11.
Circle count: 638. r: 4-6
After first filter: 141.
After second filter: 60.
big_circles: 11, small_circles: 60
predicted_fraction: 20/40_pdcpd_bash_lab, real_fraction: 20/40_pdcpd_bash_lab
360.jpg
Circle count: 6309. r: 9-13
After first filter: 101.
After second filter: 12.
Circle count: 249. r: 4-6
After first filter: 123.
After second filter: 36.
big_circles: 12, small_circles: 36
predicted_fraction: 20/40_pdcpd_bash_lab, real_fraction: 20/40_pdcpd_bash_lab
412.jpg
Circle count: 4699. r: 9-13
After first filter: 28.
After second filter: 1.
Circle count: 342. r: 4-6
After first filter: 106.
After second filter: 28.
big_circles: 1, small

Circle count: 4818. r: 9-13
After first filter: 10.
After second filter: 2.
Circle count: 680. r: 4-6
After first filter: 124.
After second filter: 43.
big_circles: 2, small_circles: 43
predicted_fraction: 20/40_pdcpd_bash_lab, real_fraction: 20/40_pdcpd_bash_lab
129.jpg
Circle count: 11726. r: 9-13
After first filter: 169.
After second filter: 30.
Circle count: 794. r: 4-6
After first filter: 185.
After second filter: 46.
big_circles: 30, small_circles: 46
predicted_fraction: 20/40_pdcpd_bash_lab, real_fraction: 20/40_pdcpd_bash_lab
673.jpg
Circle count: 5477. r: 9-13
After first filter: 12.
After second filter: 1.
Circle count: 539. r: 4-6
After first filter: 97.
After second filter: 56.
big_circles: 1, small_circles: 56
predicted_fraction: 20/40_pdcpd_bash_lab, real_fraction: 20/40_pdcpd_bash_lab
317.jpg
Circle count: 10167. r: 9-13
After first filter: 51.
After second filter: 4.
Circle count: 999. r: 4-6
After first filter: 211.
After second filter: 119.
big_circles: 4, small_circl

Circle count: 3442. r: 9-13
After first filter: 324.
After second filter: 46.
Circle count: 57. r: 4-6
After first filter: 17.
After second filter: 2.
big_circles: 46, small_circles: 2
predicted_fraction: 16/20, real_fraction: 16/20
445.jpg
Circle count: 4902. r: 9-13
After first filter: 12.
After second filter: 2.
Circle count: 1073. r: 4-6
After first filter: 243.
After second filter: 103.
big_circles: 2, small_circles: 103
predicted_fraction: 20/40_pdcpd_bash_lab, real_fraction: 20/40_pdcpd_bash_lab
479.jpg
Circle count: 13046. r: 9-13
After first filter: 374.
After second filter: 34.
Circle count: 620. r: 4-6
After first filter: 102.
After second filter: 24.
big_circles: 34, small_circles: 24
predicted_fraction: 16/20, real_fraction: 20/40_pdcpd_bash_lab
135.jpg
Circle count: 4742. r: 9-13
After first filter: 11.
After second filter: 1.
Circle count: 686. r: 4-6
After first filter: 121.
After second filter: 38.
big_circles: 1, small_circles: 38
predicted_fraction: 20/40_pdcpd_bash_

Circle count: 11366. r: 9-13
After first filter: 246.
After second filter: 16.
Circle count: 1046. r: 4-6
After first filter: 145.
After second filter: 36.
big_circles: 16, small_circles: 36
predicted_fraction: 20/40_pdcpd_bash_lab, real_fraction: 20/40_pdcpd_bash_lab
157.jpg
Circle count: 3080. r: 9-13
After first filter: 16.
After second filter: 1.
Circle count: 312. r: 4-6
After first filter: 56.
After second filter: 24.
big_circles: 1, small_circles: 24
predicted_fraction: 20/40_pdcpd_bash_lab, real_fraction: 20/40_pdcpd_bash_lab
631.jpg
Circle count: 4339. r: 9-13
After first filter: 28.
After second filter: 4.
Circle count: 558. r: 4-6
After first filter: 50.
After second filter: 17.
big_circles: 4, small_circles: 17
predicted_fraction: 20/40_pdcpd_bash_lab, real_fraction: 20/40_pdcpd_bash_lab
382.jpg
Circle count: 3837. r: 9-13
After first filter: 87.
After second filter: 6.
Circle count: 60. r: 4-6
After first filter: 9.
After second filter: 6.
big_circles: 6, small_circles: 6


Circle count: 8274. r: 9-13
After first filter: 63.
After second filter: 10.
Circle count: 689. r: 4-6
After first filter: 139.
After second filter: 69.
big_circles: 10, small_circles: 69
predicted_fraction: 20/40_pdcpd_bash_lab, real_fraction: 20/40_pdcpd_bash_lab
568.jpg
Circle count: 8899. r: 9-13
After first filter: 81.
After second filter: 4.
Circle count: 1003. r: 4-6
After first filter: 187.
After second filter: 108.
big_circles: 4, small_circles: 108
predicted_fraction: 20/40_pdcpd_bash_lab, real_fraction: 20/40_pdcpd_bash_lab
232.jpg
Circle count: 7228. r: 9-13
After first filter: 192.
After second filter: 19.
Circle count: 124. r: 4-6
After first filter: 14.
After second filter: 6.
big_circles: 19, small_circles: 6
predicted_fraction: 16/20, real_fraction: 16/20
540.jpg
Circle count: 10404. r: 9-13
After first filter: 172.
After second filter: 19.
Circle count: 486. r: 4-6
After first filter: 99.
After second filter: 21.
big_circles: 19, small_circles: 21
predicted_fraction: 

In [17]:
cnt_bad = 0
for img_name in all_img:
    if (predicted[img_name] != real[img_name]):
        cnt_bad += 1
print(cnt_bad / len(all_img))

0.05


In [18]:
predicted

{'189.jpg': '20/40_pdcpd_bash_lab',
 '77.jpg': '20/40_pdcpd_bash_lab',
 '360.jpg': '20/40_pdcpd_bash_lab',
 '412.jpg': '20/40_pdcpd_bash_lab',
 '599.jpg': '20/40_pdcpd_bash_lab',
 '200.jpg': '20/40_pdcpd_bash_lab',
 '573.jpg': '20/40_pdcpd_bash_lab',
 '598.jpg': '16/20',
 '361.jpg': '20/40_pdcpd_bash_lab',
 '74.jpg': '20/40_pdcpd_bash_lab',
 '439.jpg': '20/40_pdcpd_bash_lab',
 '377.jpg': '16/20',
 '411.jpg': '20/40_pdcpd_bash_lab',
 '766.jpg': '20/40_pdcpd_bash_lab',
 '202.jpg': '16/20',
 '558.jpg': '20/40_pdcpd_bash_lab',
 '612.jpg': '20/40_pdcpd_bash_lab',
 '372.jpg': '16/20',
 '428.jpg': '20/40_pdcpd_bash_lab',
 '206.jpg': '20/40_pdcpd_bash_lab',
 '213.jpg': '16/20',
 '398.jpg': '20/40_pdcpd_bash_lab',
 '159.jpg': '20/40_pdcpd_bash_lab',
 '72.jpg': '16/20',
 '99.jpg': '20/40_pdcpd_bash_lab',
 '588.jpg': '20/40_pdcpd_bash_lab',
 '775.jpg': '20/40_pdcpd_bash_lab',
 '748.jpg': '16/20',
 '760.jpg': '20/40_pdcpd_bash_lab',
 '614.jpg': '20/40_pdcpd_bash_lab',
 '166.jpg': '20/40_pdcpd_bash

In [10]:
def increase_brightness(img, value):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)

    lim = 255 - value
    v[v > lim] = 255
    v[v <= lim] += value

    final_hsv = cv2.merge((h, s, v))
    img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
    return img

In [17]:
original_img = cv2.imread("{}/{}".format(DATA_DIR, '2.jpg'))

In [19]:
img = increase_brightness(original_img, 0)