In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pickle
import plotly as py
from plotly.offline import plot, iplot
import plotly.graph_objs as go
from sklearn.cluster import DBSCAN
py.offline.init_notebook_mode(connected=True)

%matplotlib notebook

In [2]:
# path in the file
path_to_data = "/home/samba693/DataChallenge/debs2019_dataset2/"
path_to_outliers = "/home/samba693/Documents/DEBS/debs2019/src/ssh-kd/data/outliers.pkl"

In [3]:
# Read in the file
df = pd.read_csv(path_to_data+"in.csv",usecols=[1,2,3,4],names=["laser_id","X","Y","Z"])
num_of_scenes = len(df) / 72000
dataframes = []

for i in range(int(num_of_scenes)):
    start = i * 72000
    end = start + 72000
    dataframes.append(df.iloc[start:end, :])

In [4]:

out_file = open(path_to_data+"out.csv", 'r').readlines()
outfile = [i.rstrip() for i in out_file]
outfile = [i.split(',') for i in outfile]
outfile = [i[1:] for i in outfile]

outfile_list = []
out_cluster = []
for i in outfile:
    a  = {}
    for j in range(0,len(i),2):
        a[i[j]] = int(i[j+1])
    out_cluster.append(sum(a.values()))
    outfile_list.append(a)

In [5]:
# remove the outliers
def remove_out(df,path_out = path_to_outliers):
    pd.options.mode.chained_assignment = None
    outliers = pd.read_pickle(path_out)
    max_rad = outliers[0]
    min_rad = outliers[1]

    df["radius"] = df.X.pow(2).add(df.Y.pow(2).add(df.Z.pow(2))).pow(0.5).round(1)
    rad = np.array(df.radius)
    bool_vec = (rad <= max_rad) & (rad >= min_rad)
#     bool_vec = (rad >= min_rad)
    df = df[~bool_vec]
    df.drop(df[df["radius"] == 0].index, inplace=True)
#     df = df[df['laser_id']>30]
    
    return df

In [7]:
def plot_3d(df):
    fig = plt.figure()
    ax = fig.add_subplot(111,projection='3d')
    ax.scatter(df['X'],df['Z'],df['Y'], marker='o')

In [8]:
def plot_plot(object_data_frame,labels=False):
    if labels:
        labels1 = tuple(object_data_frame['labels'].tolist())
        use_labels = labels1
    else:
        use_labels = 'black'
    x = tuple(object_data_frame['X'].tolist())
    y = tuple(object_data_frame['Y'].tolist())
    z = tuple(object_data_frame['Z'].tolist())
    
    trace = go.Scatter3d(
            x = x,
            y = z,
            z = y,
            mode = 'markers',
            marker = dict(
                        color=use_labels,
                        colorscale='Viridis',
                        size = 1,
                        opacity = 0.8)
            )
    layout = go.Layout(
                    scene = dict(
                    xaxis = dict(
                        nticks=0, range = [-150,150],),
                    yaxis = dict(
                        nticks=0, range = [-100,100],),
                    zaxis = dict(
                        nticks=0, range = [-100,100],),)
                  )
    data=[trace]
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)

In [9]:
temp = dataframes[0]
temp = temp[(temp['X']>=0) & (temp['Y']<=20) & (temp['Z']>=0)]
plot_plot(temp)

In [10]:
temp1 = remove_out(dataframes[0])
temp1 = temp1[(temp1['X']>=0) & (temp1['Y']<=20) & (temp1['Z']>=0)]
plot_plot(temp1)

In [13]:
# clustering with DBSCAN and removing noise
def doClusteringWithDBSCAN(data):    
    clustering = DBSCAN(eps=1, min_samples=16).fit(data)
    labels = clustering.labels_
    return labels

In [12]:
data =  np.array(
                list(
                    zip(
                        np.array(temp1['X']), 
                        np.array(temp1['Y']), 
                        np.array(temp1['Z'])
                    )
                )
            ) 

In [14]:
labels = doClusteringWithDBSCAN(data)

In [15]:
temp1['labels'] = labels

In [16]:
temp2 = temp1[~(temp1['labels']==-1)]

In [18]:
plot_plot(temp2,True)

In [8]:
import math
# calculating degree
calDegrees=lambda x : round(math.degrees(math.atan(x)), 1)
vfunc = np.vectorize(calDegrees)
def tang(x,y):
    """ Get Tan inverse of y/x to get the angle."""
    tag= np.divide(y, x)
    return tag


def get_degrees(c):
    """ Get the degree for the data based upon cordinate plane.
    Tan behaviour in cordinate system.
    1st cord: theta
    2nd cord: 180+theta
    3rd cord: 180+theta
    4th cord: 360+theta
    return: degree of dataframe rows
    """
    
    if c['X']==0 and c['Z']>0:
        return 90
    if c['X']==0 and c['Z']<0:
        return 270
    
    degrees =  vfunc(tang(c['X'],c['Z']))
    if c['X']<0:
        degrees = 180+degrees
    if (c['X']>0 and c['Z']<0):
        degrees = 360+degrees
        
    return degrees

def angle_of_elevation(x,y,z):
    """Get Tan inverse of y/sqrt(x^2+z^2) to get the angle"""
    den = math.sqrt(x**2+z**2)
    etan= np.divide(y,den)
    return etan 


def get_phi(c):
    """ Get the degree for the data based upon cordinate plane.
    return: degree of dataframe rows
    """
    phi =  vfunc(angle_of_elevation(c['X'],c['Y'],c['Z']))
    phi =  90-phi
    return np.float(phi)

def get_r(x,y,z):
    """Get density r"""
    den = math.sqrt(x**2+z**2)
    return den 

def get_den(c):
    r = get_r(c['X'],c['Y'],c['Z'])
    return r

In [9]:
# Using Dataframe apply, a func is run on dataframe, pass the datafram to get_degrees

def get_different_sectors(temp, threshold=0.6):
    temp['angles'] = temp.apply(get_degrees,axis=1)
    temp.angles = temp.angles.astype(np.float)
    unique_angles = sorted(list(set(temp.angles.value_counts().index)))
    indexes_to_split = list(np.where(np.diff(unique_angles)>=threshold)[0]+1)
    start=0
    if len(unique_angles)==0:
        angle_ranges = [unique_angles]
    else:
        angle_ranges = []
    for i in indexes_to_split:
        angle_ranges.append(unique_angles[start:i])
        start = i
    list_of_sectors = []
    angles = np.array(temp.angles)
    for j in angle_ranges:
        max_angle = max(j)
        min_angle = min(j)
        bool_vec = (angles>=min_angle) & (angles<=max_angle)
        list_of_sectors.append(temp[bool_vec])
    list_of_valid = []
    for i,j in enumerate(list_of_sectors):
        if len(j)>10:
            j['labels'] = i
            list_of_valid.append(j)
    return list_of_valid


In [18]:
# plot the different sectors
def plot_sectors(list_of_sec,labels = True):
    df = pd.concat(list_of_sec)
    plot_plot(df, labels)

In [11]:
list_of_valid_sec = get_different_sectors(remove_out(dataframes[3]))

In [20]:
plot_sectors(list_of_valid_sec, False)
print(out_cluster[3])
print(outfile_list[3])

17
{'ToyotaPriusSimple': 4, 'ClothRecyclingContainer': 1, 'DrinkingFountain': 1, 'EmergencyPhone': 1, 'FireHydrant': 2, 'OldBench': 1, 'PhoneBooth': 1, 'PublicBin': 1, 'UndergroundContainer': 2, 'BigSassafras': 1, 'Sassafras': 1, 'ScooterSimple': 1}


In [23]:
plot_plot(remove_out(dataframes[3]))

In [48]:
# return the unique phi and angle
def get_phi_sectors(list_of_valid_sec,thresold = 0.6):
    list_of_sec_phi = []
    for ij in list_of_valid_sec:
        ij['phi'] = ij.apply(get_phi,axis=1)
        unique_phi = sorted(list(set(ij.phi.value_counts().index)))
        indexes_to_split = list(np.where(np.diff(unique_phi)>=thresold)[0]+1)
        start=0
        if len(indexes_to_split)==0:
            phi_ranges = [unique_phi]
        else:
            phi_ranges = []
        for i in indexes_to_split:
            phi_ranges.append(unique_phi[start:i])
            start = i

        angles = np.array(ij.phi)
        for j in phi_ranges:
            max_angle = max(j)
            min_angle = min(j)
            bool_vec = (angles>=min_angle) & (angles<=max_angle)
            list_of_sec_phi.append(ij[bool_vec])

    list_of_valid_sec_phi = []
    for i,j in enumerate(list_of_sec_phi):
        if len(j)>10:
            j['labels'] = i
            list_of_valid_sec_phi.append(j)
    return list_of_valid_sec_phi

In [49]:
list_of_valid_sec_phi = get_phi_sectors(list_of_valid_sec)

In [50]:
plot_sectors([list_of_valid_sec_phi[0]])
# print(list_of_valid_sec_phi[0])

In [51]:
# return the unique phi and angle
def get_valid_r(list_of_sec_phi, thresold=2):
    list_of_r = []
    for ij in list_of_valid_sec_phi:
        ij['r'] = ij.apply(get_den,axis=1)
        temp = ij.r.round(1).value_counts()
        unique_r = sorted(list(set(temp[temp>5].index)))
        indexes_to_split = list(np.where(np.diff(unique_r)>=thresold)[0]+1)
        start=0
        if len(indexes_to_split)==0:
            phi_ranges = [unique_r]
        else:
            phi_ranges = []
        for i in indexes_to_split:
            phi_ranges.append(unique_r[start:i])
            start = i

        angles = np.array(ij.r)
        for j in phi_ranges:
            if len(j)!=0:
#                 print(j)
                max_angle = max(j)
                min_angle = min(j)
                bool_vec = (angles>=min_angle) & (angles<=max_angle)
                list_of_r.append(ij[bool_vec])

    list_of_valid_r = []
    for i,j in enumerate(list_of_r):
        if len(j)>10:
            j['labels'] = i
            list_of_valid_r.append(j)
    return list_of_valid_r

In [52]:
list_of_valid_r = get_valid_r(list_of_valid_sec_phi)

In [53]:
plot_sectors(list_of_valid_r)
# plot_sectors([list_of_valid_r[0]])
print(len(list_of_valid_r[0]))

198


In [44]:
# segmenting intot clusters
def segment(df):
    objects = []
    for i,j in enumerate(df):
        temp = get_valid_r(get_phi_sectors(get_different_sectors(remove_out(j))))
        objects.append(temp)
        print("original clusters = {}, number of detected = {}".format(out_cluster[i],len(temp)))
    return objects

In [45]:
list_of_objects = segment(dataframes)

original clusters = 33, number of detected = 26
original clusters = 25, number of detected = 26
original clusters = 25, number of detected = 26
original clusters = 17, number of detected = 26
original clusters = 16, number of detected = 26
original clusters = 34, number of detected = 26
original clusters = 35, number of detected = 26
original clusters = 20, number of detected = 26
original clusters = 17, number of detected = 26
original clusters = 32, number of detected = 26
original clusters = 39, number of detected = 26
original clusters = 47, number of detected = 26
original clusters = 40, number of detected = 26
original clusters = 12, number of detected = 26


KeyboardInterrupt: 