### Get dot coordinates using blob_log from skimage library

In [1]:
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import skimage.feature
%matplotlib inline

In [2]:
classes = ["adult_males", "subadult_males", "adult_females", "juveniles", "pups", "error"]
coords_cols = ["filename", "x", "y", "category"]

file_names = os.listdir("/data/x/sealion/Train")
file_names = sorted(file_names, key=lambda 
                    item: (int(item.partition('.')[0]) if item[0].isdigit() else float('inf'), item)) 

indices = [531, 946, 34, 30, 290, 406, 380, 913, 621, 811, 7, 421, 292, 66, 593, 490, 909, 800, 215, 
           426, 475, 614, 184, 905, 97, 882, 776, 899, 344, 473, 510, 234, 291, 331, 433, 712, 741, 767, 912]
#indices = [290, 291]
# select a subset of files to run on
file_names = [file_names[i] for i in indices]

In [17]:
# dataframe to store results in
count_df = pd.DataFrame(index=file_names, columns=classes).fillna(0)
coords = []

for filename in file_names:
    print(filename)
    
    # read the Train and Train Dotted images
    image_1 = cv2.imread("/data/x/sealion/TrainDotted/" + filename)
    image_2 = cv2.imread("/data/x/sealion/Train/" + filename)
    
    # absolute difference between Train and Train Dotted
    image_3 = cv2.absdiff(image_1, image_2)
                
    # mask out blackened regions from Train Dotted
    mask_1 = cv2.cvtColor(image_1, cv2.COLOR_BGR2GRAY)
    mask_1[mask_1 < 10] = 0
    mask_1[mask_1 > 0] = 255
    
    mask_2 = cv2.cvtColor(image_2, cv2.COLOR_BGR2GRAY)
    mask_2[mask_2 < 10] = 0
    mask_2[mask_2 > 0] = 255
    
    image_4 = cv2.bitwise_or(image_3, image_3, mask=mask_1)
    
    # Detect bad data. If train and dotted images are very different then somethings wrong.
    avg_diff = image_4.sum() / (image_4.shape[0] * image_4.shape[1])
    print(avg_diff)
    if avg_diff > 60:
        print('Warning: Bad data for %s' % filename)
        continue 
        
    image_5 = cv2.bitwise_or(image_4, image_4, mask=mask_2) 
    
    # convert to grayscale to be accepted by skimage.feature.blob_log
    image_6 = cv2.cvtColor(image_5, cv2.COLOR_BGR2GRAY)
    
    # detect blobs
    blobs = skimage.feature.blob_log(image_6, min_sigma=3, max_sigma=4, num_sigma=2, threshold=0.02)
    
    # prepare the image to plot the results on
    image_7 = cv2.cvtColor(image_6, cv2.COLOR_GRAY2BGR)
    
    sizes = [list()] * 6
    for blob in blobs:
        # get the coordinates for each blob
        y, x, s = blob
        # get the color of the pixel from Train Dotted in the center of the blob
        b, g, r = image_1[int(y)][int(x)][:]
        
        # decision tree to pick the class of the blob by looking at the color in Train Dotted
        class_idx = -1
        if r > 200 and b < 50 and g < 50: # RED
            class_idx = 0
            cv2.circle(image_7, (int(x), int(y)), 8, (0, 0, 255), 2)            
        elif r > 200 and b > 200 and g < 50: # MAGENTA
            class_idx = 1
            cv2.circle(image_7, (int(x), int(y)), 8, (250, 10, 250), 2)            
        elif r < 100 and b < 100 and 150 < g < 200: # GREEN
            class_idx = 4
            cv2.circle(image_7, (int(x), int(y)), 8, (20, 180, 35), 2) 
        elif r < 100 and 100 < b and g < 100: # BLUE
            class_idx = 3
            cv2.circle(image_7, (int(x), int(y)), 8, (180, 60, 30), 2)
        elif r < 150 and b < 50 and g < 100:  # BROWN
            class_idx = 2
            cv2.circle(image_7, (int(x), int(y)), 8, (0, 42, 84), 2)            
        else:
            class_idx = 5
            cv2.circle(image_7, (int(x), int(y)), 8, (255, 255, 155), 2)
            
        sizes[class_idx].append(s)
        class_name = classes[class_idx]
        count_df[class_name][filename] += 1
        record = dict(filename=filename, x=x, y=y, category=cls, )
        if -1 < class_idx < 5:
            coords.append(record)
        coords_df = pd.DataFrame.from_records(coords, columns=coords_cols)
        coords_df.x = coords_df.x.astype('int')
        coords_df.y = coords_df.y.astype('int')
    # output the results
          
#    f, ax = plt.subplots(3, 2, figsize=(10,16))
#    (ax1, ax2, ax3, ax4, ax5, ax6) = ax.flatten()
#    plt.title('%s'%filename)
    
#    ax1.imshow(cv2.cvtColor(image_2[:,:,:], cv2.COLOR_BGR2RGB))
#    ax1.set_title('Train')
#    ax2.imshow(cv2.cvtColor(image_1[:,:,:], cv2.COLOR_BGR2RGB))
#    ax2.set_title('Train Dotted')
#    ax3.imshow(cv2.cvtColor(image_3[:,:,:], cv2.COLOR_BGR2RGB))
#    ax3.set_title('Train Dotted - Train')
#    ax4.imshow(cv2.cvtColor(image_5[:,:,:], cv2.COLOR_BGR2RGB))
#    ax4.set_title('Mask blackened areas of Train Dotted')
#    ax5.imshow(image_6[:,:], cmap='gray')
#    ax5.set_title('Grayscale for input to blob_log')
#    ax6.imshow(cv2.cvtColor(image_7[:,:,:], cv2.COLOR_BGR2RGB))
#    ax6.set_title('Result')

#    plt.show()

531.jpg


163.097349682
946.jpg


113.417159824
34.jpg


154.315978785
30.jpg


31.2134717832


290.jpg


173.808650511
406.jpg


67.2546158589
380.jpg


33.9865956805


913.jpg


25.8726279355


621.jpg


141.90937851
811.jpg


98.1498576208
7.jpg


102.454059496
421.jpg


115.55434783
292.jpg


11.0090252666


66.jpg


10.0156795507


593.jpg


6.46708742401


490.jpg


156.886961145
909.jpg


93.3790219399
800.jpg


156.377165604
215.jpg


120.281212143
426.jpg


4.59449292657


475.jpg


27.8335969079


614.jpg


86.4739226162
184.jpg


92.231638618
905.jpg


146.463161792
97.jpg


7.23316007535


882.jpg


62.3129591392
776.jpg


4.32146405759


899.jpg


6.21177445166


344.jpg


65.4390775954
473.jpg


13.8186259674


510.jpg


8.1044958258


234.jpg


123.899784005
291.jpg


8.37217530004


331.jpg


4.38011716182


433.jpg


8.13684525821


712.jpg


121.709471527
741.jpg


6.15084790713


767.jpg


163.374341254
912.jpg


5.23696682721


### Check count results

In [18]:
count_df

Unnamed: 0,adult_males,subadult_males,adult_females,juveniles,pups,error
531.jpg,0,0,0,0,0,0
946.jpg,0,0,0,0,0,0
34.jpg,0,0,0,0,0,0
30.jpg,2,0,3,3,0,1
290.jpg,0,0,0,0,0,0
406.jpg,0,0,0,0,0,0
380.jpg,2,0,111,46,0,150
913.jpg,1,0,109,33,0,47
621.jpg,0,0,0,0,0,0
811.jpg,0,0,0,0,0,0


### Reference counts

In [16]:
reference = pd.read_csv('/data/x/sealion/Train/train.csv')
reference.ix[indices]

Unnamed: 0,train_id,adult_males,subadult_males,adult_females,juveniles,pups
531,531,0,9,5,6,0
946,946,3,1,34,33,0
34,34,4,1,127,237,0
30,30,2,0,1,3,0
290,290,2,0,0,0,0
406,406,10,12,141,47,3
380,380,2,0,83,31,0
913,913,1,0,20,33,0
621,621,15,12,53,36,30
811,811,7,5,12,0,0


In [16]:
coords_df.to_csv('coords_notebook-bad2.csv', index=False)
count_df.to_csv('counts-bad2.csv', index=True)