# UrbanGB Dataset

- **Dataset Paper**: C. Baldassi, 'Recombinator-k-means: A population based algorithm that exploits k-means++ for recombination', [Web Link], 2019
- **Dataset Source**: https://archive.ics.uci.edu/ml/datasets/UrbanGB%2C+urban+road+accidents+coordinates+labelled+by+the+urban+center#

- **UCI**: Dua, D. and Graff, C. (2019). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.

In [None]:
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd

from DyClee.algorithms import SerialDyClee
from DyClee.plotting import *

In [None]:
df = pd.read_csv('urbanGB.txt')
print(df.shape)
print(df.head)

In [None]:
# Restrict to western region
df = df[(df['lon'] >= -4) & (df['lon'] <= -2)]
df = df[(df['lat'] >= 51) & (df['lat'] <= 52)]
print(df.shape)

In [None]:
ax = plt.axes(projection=ccrs.EckertVI())
ax.set_extent((-5.55599, 1.75834, 50.0797, 57.6956))
ax.coastlines(resolution='10m')
plt.show()

In [None]:
ax = plt.axes(projection=ccrs.EckertVI())
ax.set_extent((-4, -2, 51, 52))
ax.coastlines(resolution='10m')
ax.scatter(df['lon'], df['lat'], transform=ccrs.PlateCarree())
plt.savefig('map.png')
plt.show()

In [None]:
#Rescale per the README
# df['lon'] = df['lon'] / 1.7 

X = df.to_numpy(dtype=np.float64)

# Context matrix
context = np.vstack([X.min(axis=0), X.max(axis=0)])

# Create dyclee class object 
dyclee = SerialDyClee(phi=0.03, context=context)

In [None]:
%%time

# Run dyclee
results = dyclee.run_dataset(data=X)

In [None]:
all_uC = dyclee.A_list + dyclee.O_list
for uC in all_uC:
    uC.center = (uC.center * dyclee.context[2]) + dyclee.context[0]
cluster_df = pd.DataFrame([uC.center for uC in all_uC], columns=['x', 'y'])
labels = [uC.Classk for uC in all_uC]
labels = strip_labels(labels)
labels = [float(label) if label != "Unclassed" else -1.0 for label in labels]
hyperbox_size = dyclee._get_hyperbox_sizes()

plt.figure(figsize=(20, 10))
ax = plt.axes(projection=ccrs.EckertVI())
ax.set_extent((-4, -2, 51, 52))
ax.coastlines(resolution='10m')
ax.scatter(cluster_df['x'], cluster_df['y'], c=labels, cmap='hsv', transform=ccrs.PlateCarree())

for uC in all_uC:
    center = uC.center
    #ax.text(center[0], center[1], uC.Classk, horizontalalignment='left', size='medium', color='black', weight='normal', transform=ccrs.PlateCarree())
    
    Xmin = (center[0] - hyperbox_size[0]/2)
    ymin = (center[1] - hyperbox_size[1]/2)
    xy = (Xmin, ymin)
    #plt.gca().add_patch(Rectangle(xy,hyperbox_size[0],hyperbox_size[1],linewidth=0.5,edgecolor='r',facecolor='none', clip_on=False, transform=ccrs.PlateCarree()))

plt.savefig('map.png')
plt.show()