In [None]:
import pandas as pd
import scipy.stats as st
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from algorithms import *

cham1 = pd.read_csv('chameleon-1.csv',sep=",")
cham1.columns = ['x','y']
cham1 = cham1.to_numpy(np.float64)

In [None]:
# Context matrix
context = np.vstack([cham1.min(axis=0), cham1.max(axis=0)])

In [None]:
# Create dyclee class object 
dyclee = SerialDyClee(phi=0.06, context=context)

In [None]:
# Run dataset
%%time
dyclee.run_dataset(data=cham1)

In [None]:
# combine all clusters 
all_uC = dyclee.A_list + dyclee.O_list

print('There are',len(all_uC),'microclusters')
print('Dense & Semi-dense total:',len(dyclee.A_list))
print('Low dense total:', len(dyclee.O_list))

In [None]:
# Plot hyperboxes 

# create dataframe
cluster_df = pd.DataFrame([uC.get_center() for uC in all_uC], columns=['x', 'y'])

# hyperbox sizes 
sizes = dyclee._get_hyperbox_sizes()


print("the hyperbox size is: ", sizes)
# plot microclusters
sns.scatterplot(x='x',y='y',data=cluster_df)
plt.gca().set_aspect('equal')

# plot hyperboxes
for uC in all_uC: 
    xy = hyperbox_coordinates(uC, sizes)
    plt.gca().add_patch(Rectangle(xy,sizes[0],sizes[1],linewidth=0.5,edgecolor='r',facecolor='none', clip_on=False))

In [None]:
# Plot density  

# Graph legend  
# Dense = 0
# Semi-Dense = 1
# Low-Dense = 2

# create dataframe       
cluster_df = pd.DataFrame([np.append(uC.get_center(), np.array([uC.density_type]), 0) for uC in all_uC], columns=['x', 'y', 'z'])

# Change string values to ints
cluster_df.loc[(cluster_df.z == 'Dense'),'z']= int(0)
cluster_df.loc[(cluster_df.z == 'Semi-Dense'),'z']=int(1)
cluster_df.loc[(cluster_df.z == 'Low-Density'),'z']=int(2)

# Re-format column types 
cluster_df = cluster_df.infer_objects()
cluster_df['x'] = cluster_df['x'].astype(float)
cluster_df['y'] = cluster_df['y'].astype(float)
cluster_df['z'] = cluster_df['z'].astype(np.int64)

# plot microclusters
sns.scatterplot(x='x',y='y',hue="z",data=cluster_df)
plt.gca().set_aspect('equal')

In [None]:
# plot snapshots

snapshot_ordered = {}

for order, timestamps in dyclee.snapshots.items(): 
    for timestamp, cluster_lists in timestamps.items(): 
        snapshot_ordered[timestamp] = cluster_lists
        
timestamp_order = sorted(snapshot_ordered) 

fig, axes = plt.subplots(len(timestamp_order), 2, sharex='col', sharey='row', figsize=(20,600))
cols = ["FinalClusters", "MicroClusters"]
for ax, col in zip(axes[0], cols): ax.set_title(col)

for i, t in enumerate(timestamp_order): 
    final_list = snapshot_ordered[t]['final']
    micro_list = snapshot_ordered[t]['all']

    # plot final cluster
    final_df = pd.DataFrame([np.append(uC.center, [uC.label], 0) for uC in final_list], columns=['x', 'y', 'class'])
    sns.scatterplot(ax=axes[i][0], x='x',y='y',hue='class',data=final_df).legend(loc='lower left', bbox_to_anchor=(1.05,0), ncol=1)
    axes[i][0].set_ylabel(t, rotation=0, size='xx-large', weight='bold')
    
    # plot micro clusters 
    micro_df = pd.DataFrame([np.append(uC.get_center(), [uC.Classk], 0) for uC in micro_list], columns=['x', 'y', 'class'])
    sns.scatterplot(ax=axes[i][1], x='x',y='y',hue='class',data=micro_df).legend(loc='lower left', bbox_to_anchor=(1.05,0), ncol=1)

fig.tight_layout

In [None]:
# plot snapshots with classes displayed on each point

snapshot_ordered = {}

for order, timestamps in dyclee.snapshots.items(): 
    for timestamp, cluster_lists in timestamps.items(): 
        snapshot_ordered[timestamp] = cluster_lists
        
timestamp_order = sorted(snapshot_ordered) 

fig, axes = plt.subplots(len(timestamp_order), 2, sharex='col', sharey='row', figsize=(20,600))
cols = ["FinalClusters", "MicroClusters"]
for ax, col in zip(axes[0], cols): ax.set_title(col)

for i, t in enumerate(timestamp_order): 
    final_list = snapshot_ordered[t]['final']
    micro_list = snapshot_ordered[t]['all']

    # plot final cluster
    final_df = pd.DataFrame([np.append(uC.center, [uC.label], 0) for uC in final_list], columns=['x', 'y', 'class'])
    f_plot = sns.scatterplot(ax=axes[i][0], x='x',y='y',hue='class',data=final_df)
    f_plot.legend(loc='lower left', bbox_to_anchor=(1.05,0), ncol=1)
    for line in range(0,final_df.shape[0]): 
        f_plot.text(final_df['x'][line], final_df['y'][line], final_df['class'][line], horizontalalignment='left', size='medium', color='black', weight='normal')
    axes[i][0].set_ylabel(t, rotation=0, size='xx-large', weight='bold')
    
    # plot micro clusters 
    micro_df = pd.DataFrame([np.append(uC.get_center(), [uC.Classk], 0) for uC in micro_list], columns=['x', 'y', 'class'])
    m_plot = sns.scatterplot(ax=axes[i][1], x='x',y='y',hue='class',data=micro_df)
    m_plot.legend(loc='lower left', bbox_to_anchor=(1.05,0), ncol=1)
    for line in range(0,micro_df.shape[0]): 
        if micro_df['class'][line] != 'Unclassed': 
            m_plot.text(micro_df['x'][line], micro_df['y'][line], micro_df['class'][line], horizontalalignment='left', size='medium', color='black', weight='normal')
    

fig.tight_layout