# Clustering Israel Cities according to coordinates

In [8]:
# loading only the packages we need
import numpy as np
from os.path import join
from sklearn.cluster import KMeans
import pandas as pd

In [9]:
folder = r'C:\Users\User\Documents\DMBI_hackathon_2018'
cities_csv = join(folder, r'israel_coordinates.csv')

In [14]:
# load city file
coor_df = pd.read_csv(cities_csv, index_col='City')
coor_df.dropna(inplace=True)  # remove NAs
cities = coor_df.index  # save list of cities for labeling

The data looks like this:

In [16]:
coor_df.head()

Unnamed: 0_level_0,Latitude,Longitude
City,Unnamed: 1_level_1,Unnamed: 2_level_1
Yerushalayim,31.78,35.22
Tel aviv-yafo,32.07,34.77
Haifa,32.82,34.99
Rishon lez?iyyon,31.96,34.80
Ashdod,31.80,34.64
Be'er sheva,31.25,34.80
Petah? tiqwa,32.09,34.88
Netanya,32.33,34.86
H?olon,32.02,34.76
Bene beraq,32.09,34.85


In [22]:
# and the list of cities:
cities

Index(['Yerushalayim', 'Tel aviv-yafo', 'Haifa', 'Rishon lez?iyyon', 'Ashdod',
       'Be'er sheva', 'Petah? tiqwa', 'Netanya', 'H?olon', 'Bene beraq',
       ...
       'Shilo', 'Har adar', 'Timrat', 'Sallama', 'Kefar shemaryahu', 'Talmon',
       'Bet yizhaq', 'Nein', 'Na'ura', 'Rumat heib'],
      dtype='object', name='City', length=193)

In [23]:
# fitting function
def kmeans_print_groups(n_clusters, X=coor_df, cities=cities):
    """Receives a number of clusters (int) and prints the scrolls in each cluster after performing
    K-means on the X matrix."""
    kmeans = KMeans(n_clusters=n_clusters, n_init=100, max_iter=3000).fit(X)
    for i in range(max(kmeans.labels_) + 1):  # label
        print(f"Group {i+1}: {sorted(cities[(kmeans.labels_ == i)])}")  

### Fitting based on number of clusters (K)

In [24]:
kmeans_print_groups(n_clusters=10)

Group 1: ["'akko", "'atlit", "'ir hahamisha", "Basmat tab'un", "Daliyat al karmel-'isifya", 'Haifa', "I'billin", 'Ibtin', 'Judeide-maker', 'Kabul', 'Kafar manda', 'Kafar yasif', "Mazra'a", 'Nahariyya', 'Nesher', 'Qiryat atta', 'Qiryat bialik', 'Qiryat motzkin', "Qiryat tiv'on", 'Qiryat yam', 'Ramat yishay', 'Rekhasim', 'Segev-shalom', "Sha'ab", "Shefar'am", 'Sheikh dannun', 'Shelomi', 'Tamra', 'Tirat karmel', "Yoqne'am illit"]
Group 2: ["'immanu'el", 'Alfe menashe', 'Azur', 'Bat yam', "Be'er ya'aqov", 'Bene beraq', 'Bet arye', 'Bet dagan', 'Elqana', 'Ganne tiqwa', 'Gedera', "Giv'at shemuel", "Giv'atayim", 'H?olon', 'Herzeliyya', 'Hod hasharon', 'Jaljulye', 'Kafar qasem', 'Kefar habad', 'Kefar sava', 'Kefar shemaryahu', 'Lod', 'Mazkeret batya', 'Nehalim', 'Nes z?iyyona', 'Or yehuda', 'Oranit', 'Petah? tiqwa', 'Qarne shomeron', "Qiryat 'eqron", 'Qiryat ono', "Ra'anana", "Ramat ef'al", 'Ramat gan', 'Ramat hasharon', 'Ramla', 'Reh?ovot', 'Rishon lez?iyyon', 'Rosh haayin', 'Savyon', "Sha'ar

In [25]:
kmeans_print_groups(n_clusters=40)

Group 1: ['Bet yizhaq', 'Even yehuda', 'Kefar yona', 'Netanya', 'Nordiyya', 'Pardesiyya', 'Qalansawe', 'Tayibe', 'Tel mond', 'Tire', 'Z?oran-qadima']
Group 2: ["Binyamina-giv'at ada"]
Group 3: ['Beit jann', 'Deir hanna', 'Mugar', 'Rame', 'Sallama']
Group 4: ['Dimona', 'Yeroham']
Group 5: ["'ein qiniyye", "'ofra", 'Bet el', "Giv'at ze'ev", "Kokhav ya'aqov", 'Talmon']
Group 6: ['Ashdod', 'Bene ayish', 'Gan yavne', 'Gedera', 'Qiryat gat', "Qiryat mal'akhi"]
Group 7: ['Elat']
Group 8: ["I'billin", 'Kabul', 'Kafar manda', 'Sakhnin', 'Segev-shalom', "Sha'ab", 'Shagor', "Shefar'am", 'Tamra']
Group 9: ['Azur', 'Bat yam', 'Bene beraq', 'Bet dagan', 'Ganne tiqwa', "Giv'at shemuel", "Giv'atayim", 'H?olon', 'Kefar habad', 'Nehalim', 'Or yehuda', 'Petah? tiqwa', 'Qiryat ono', "Ramat ef'al", 'Ramat gan', 'Savyon', 'Tel aviv-yafo', 'Yehud-newe efrayim']
Group 10: ['Netivot', 'Ofaqim']
Group 11: ['Merkaz shappira']
Group 12: ["'atlit", 'Fureidis', 'Jisr az-zarqa', "Or 'aqiva", "Zikhron ya'aqov"]
Group

In [None]:
# TODO: add elevation consideration?