In [139]:
# https://stackoverflow.com/questions/19412462/getting-distance-between-two-points-based-on-latitude-longitude
# https://www.kdnuggets.com/2020/04/dbscan-clustering-algorithm-machine-learning.html
# https://scikit-learn.org/stable/modules/generated/sklearn.cluster.DBSCAN.html
# https://docs.kepler.gl/docs/keplergl-jupyter

In [140]:

import numpy as np

import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

from IPython.core.display import HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))

from sklearn.cluster import DBSCAN
from geopy import distance
import csv

In [141]:
def load_carvansaras(path):
    carvansaras = list()
    with open(path) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=",")
        FIRST_LINE = True
        for row in csv_reader:
            if FIRST_LINE:
                FIRST_LINE = False
                continue
            carvansaras.append([row[0], row[1], row[2]])

    return carvansaras

In [142]:
carvansaras = load_carvansaras("carvansara.csv")

In [143]:
names = []
latitudes = []
longitudes = []
for carvansara in carvansaras:
#     try:
    names.append(carvansara[1])
    latitude, longitude = carvansara[2].replace("-", ",").split(",")
    latitudes.append(float(latitude))
    longitudes.append(float(longitude))
#     except:
#         pass

In [144]:
df = pd.DataFrame(list(zip(names, latitudes, longitudes)),
               columns =['Name', 'Latitude', 'Longitude'])

In [145]:
print(len(df))

205


In [146]:
df.head()

Unnamed: 0,Name,Latitude,Longitude
0,کاروانسرای,38.344328,45.834966
1,کاروانسرای خواجه نظر,38.977452,45.577038
2,کاروانسرای جمال آباد,37.271923,47.843075
3,کاروانسرای رباط شرق,36.26649,60.655253
4,کاروانسرای منظریه,34.891223,50.819861


In [147]:
df.tail()

Unnamed: 0,Name,Latitude,Longitude
200,کاروانسرا_غیب_الله,27.302168,54.472015
201,BaqerAbad_Caravansary,34.930295,50.823524
202,هتل_کاروانسرای_شمسی,32.105542,54.118904
203,کاروانسرای_تاریخی_بلاد_شاپور,30.788056,50.561667
204,کاروانسرا_برکه_سلطان,27.242924,55.510821


In [148]:
def geo_distance(coordinates_from, coordinates_to):
    return distance.distance(coordinates_from, coordinates_to).km

In [149]:
locations = df[["Latitude", "Longitude"]].to_numpy()

In [150]:
# Compute DBSCAN
db = DBSCAN(eps=1, min_samples=2, metric=geo_distance).fit(locations)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)

In [151]:
print('Estimated number of clusters: %d' % n_clusters)
print('Estimated number of noise points: %d' % n_noise_)

Estimated number of clusters: 4
Estimated number of noise points: 197


In [152]:
labels
# Noisy samples are given the label -1

array([-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1,  0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1,  2, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  3, -1,  3, -1, -1, -1,
       -1, -1, -1,  0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  2, -1, -1, -1, -1, -1, -1,
       -1])

In [153]:
df["label"] = labels

In [154]:
n_clusters

4

In [155]:
cluster_num = -1

In [156]:

cluster_num+=1
print("@", cluster_num)
check_loc = df[df["label"] == cluster_num]
check_loc

@ 0


Unnamed: 0,Name,Latitude,Longitude,label
23,کاروانسرای شاهی میبد,32.227928,54.009365,0
173,Shah_Abbasi_Caravanserai,32.227965,54.009349,0
