# Spatial clustering analysis

## KDE all data
Kernal density analysis with occupancy data from each time-stamp

In [None]:
import numpy as np
import geopandas as gpd
from scipy.stats import gaussian_kde

In [None]:
all_data=gpd.read_feather('./occupancy.feather')
print('read')

all_data['x'] = all_data['geometry'].x
all_data['y'] = all_data['geometry'].y

td = all_data[['x', 'y']].copy()

kde = gaussian_kde(td.T)
print('kde done')

x_grid, y_grid = np.meshgrid(np.linspace(-3,32,35*4+1), np.linspace(-12,6,18*4+1))
grid_points = np.vstack([x_grid.ravel(), y_grid.ravel()])
density_values = kde(grid_points)

np.save('OUTPUT.npy', density_values)

## Density-based clustering x-y-z Test
DBSCAN sest on x,y and count data

In [None]:
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.neighbors import NearestNeighbors
import itertools
from sklearn.metrics import silhouette_samples

# Read occupancy position count file
occupancy_position = pd.read_feather('./occupancy_position.feather')

# Random sample occupancy position file
ro = occupancy_position.sample(frac=1, random_state=42)

# Reduce decimal points and add up counts for duplicate coordinates
ro['x'] = ro['x'].round(2)
ro['y'] = ro['y'].round(2)
ro_1 = ro.groupby(['x', 'y'], as_index=False)['Count'].sum()

# Select the features for clustering
features = ro_1[['x', 'y', 'Count']].values

In [None]:
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.neighbors import NearestNeighbors
import itertools
from sklearn.metrics import silhouette_samples

# Read occupancy position count file
occupancy_position = pd.read_feather('./occupancy_position.feather')

# Random sample occupancy position file
ro = occupancy_position.sample(frac=1, random_state=42)

# Reduce decimal points and add up counts for duplicate coordinates
ro['x'] = ro['x'].round(2)
ro['y'] = ro['y'].round(2)
ro_1 = ro.groupby(['x', 'y'], as_index=False)['Count'].sum()

# Select the features for clustering
features = ro_1[['x', 'y', 'Count']].values

# finish test run the final plot
dbscan = DBSCAN(eps=2, min_samples=200)
labels = dbscan.fit_predict(features)

# Get the number of clusters (excluding noise points)
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
n_noise = list(labels).count(-1)

In [None]:
print('Number of clusters:', n_clusters)
print('Number of noise points:', n_noise)

In [None]:
# sampled sihouette
# Define the number of samples to use for silhouette calculation
# Randomly sample the data
random_indices = np.random.choice(len(features), size=100000, replace=False)
sampled_data = features[random_indices]
sampled_labels = labels[random_indices]

# Calculate silhouette samples for the sampled data
silhouette_samples = silhouette_samples(sampled_data, sampled_labels)

# Calculate the mean silhouette score
silhouette_score = np.mean(silhouette_samples)

silhouette_score