In [1]:
import json
import matplotlib.pyplot as plt
import numpy as np 
import pickle
import PIL.Image

import matplotlib.image as mpimg
import skimage.io
from skimage.filters import threshold_otsu
from skimage.segmentation import clear_border
from skimage.measure import label, regionprops
from skimage.morphology import closing, square
from skimage.color import label2rgb
from skimage.transform import resize
import matplotlib.patches as mpatches
from skimage import data
import skimage
from skimage.transform import rotate

import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter

In [2]:
Xb1_tr = np.load('aug_data_split/Xb1_tr.npy')
Xb2_tr = np.load('aug_data_split/Xb2_tr.npy')
Y_tr = np.load('aug_data_split/Y_tr.npy')
A_tr = np.load('aug_data_split/A_tr.npy')

Xb1_te = np.load('aug_data_split/Xb1_te.npy')
Xb2_te = np.load('aug_data_split/Xb2_te.npy')
Y_te = np.load('aug_data_split/Y_te.npy')
A_te = np.load('aug_data_split/A_te.npy')

print('Shapes: ')
print(Xb1_tr.shape)
print(Xb2_tr.shape)
print(Y_tr.shape)
print(A_tr.shape)
print(Xb1_te.shape)
print(Xb2_te.shape)
print(Y_te.shape)
print(A_te.shape)

Shapes: 
(2887, 5625)
(2887, 5625)
(2887, 1)
(2887, 1)
(321, 5625)
(321, 5625)
(321, 1)
(321, 1)


### KNN on the incidence angles to predict the cluster the samples belong to

As it can be seen in the below image, the labels fall into a pattern based on the incidence angles. 
<img src="inc_angle_labels.png">

The 2 clusters of interest are: 
1. The cluster corresponding to pure icebergs - All the groups where there are pure icebergs
2. The cluster corresponding to a mix of icebergs and ships - All the groups where there are a mixture of ships and icebergs.

In order to differentiate the clusters, we use KNN classifier. 

### Predicting cluster using KNN 

The classification is done by choosing an appropriate value of k_neighbours. If the top k_neighbours around a given incidence angle are ships, then it belongs to cluster 1, if the nearest neighbour is a ship then it belopngs to cluster 1, and if the number of nearest icebergs are more, then it belongs to cluster 0. 

In [136]:
def create_cluster_labelss(A,Y,k_neighbours): 
   
    cluster_labels = np.zeros((A.shape[0],A.shape[1]))

    s = 0 
    for samp in A: 
        difference = abs(samp - A).squeeze()
        sort_difference = np.argsort(difference)

        y_difference_sort = Y[sort_difference]
        
        topk = y_difference_sort[0:k_neighbours]
  
        ships = len([1 for l in topk if l == 0])
        icebergs = len([1 for l in topk if l == 1])
        
        if ships > 0 and ships < icebergs:
            cluster_labels[s]=1
        elif(icebergs > ships):
            cluster_labels[s]=0
        else: 
            cluster_labels[s]=1
        
        s+=1 
        
    return cluster_labels 

#Main 
cluster_labels_tr =  create_cluster_labelss(A_tr, Y_tr, k_neighbours = 40)
cluster_labels_te =  create_cluster_labelss(A_te, Y_te, k_neighbours = 40)

In [137]:
np.save('aug_data_split/C_tr.npy',cluster_labels_tr)
np.save('aug_data_split/C_te.npy',cluster_labels_te)

In [141]:
C_tr = np.load('aug_data_split/C_tr.npy')
C_te = np.load('aug_data_split/C_te.npy')

print('Shapes: ')
print(C_tr.shape)
print(C_te.shape)

Shapes: 
(2887, 1)
(321, 1)


In [142]:
c0_labels = Y_tr[np.where(C_tr==0)]

print('Percentage of cluster 0 samples that are icebergs: ',len(np.where(c0_labels==1)[0])/len(c0_labels))
print('Percentage of cluster 0 samples that are ships: ',len(np.where(c0_labels==0)[0])/len(c0_labels))

c1_labels = Y_tr[np.where(C_tr==1)]

print('Percentage of cluster 1 samples that are icebergs: ',len(np.where(c1_labels==1)[0])/len(c1_labels))
print('Percentage of cluster 1 samples that are ships: ',len(np.where(c1_labels==0)[0])/len(c1_labels))

Percentage of cluster 0 samples that are icebergs:  1.0
Percentage of cluster 0 samples that are ships:  0.0
Percentage of cluster 1 samples that are icebergs:  0.30319388214125054
Percentage of cluster 1 samples that are ships:  0.6968061178587495
