In [1]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import lxml
import os
import fnmatch
import glob
import random
from bs4 import BeautifulSoup as Soup
from lxml import etree
from sklearn.cluster import KMeans 

In [None]:
# a generalized function would look like...

path = "/Users/KareemAbdo_1/Desktop/presavedMaps"
files = os.listdir(path)
kml_files = glob.glob('*.{}'.format('kml'))

def kml_to_csv(map_kml):
    with open(map_kml) as data:
        kml_soup = Soup(data, 'lxml-xml') #parse as XML
    coords = kml_soup.find_all('coordinates')#find all the coordinates
    coords_list = []
    for x in coords:
        coords_list.append(str(x)) #store coordinates in a list

    names = kml_soup.find_all('name') #find all coords
    names_list = []
    for x in names:
        names_list.append(str(x))

    #creating our dataframe
    coord_col = ['Longitude','Latitude','Name']
    df_coord = pd.DataFrame(columns = coord_col)
    # split coordinates list into coordinates to populate the dataframe
    for i, j in zip(range(len(coords_list)),range(2,len(names_list))):
        item = coords_list[i]
        string_split = item.split(',')
        coord1 = float(string_split[0][17:])
        coord2 = float(string_split[1])
        coord3 = names_list[j][6:-7]
        df_coord.loc[i] = [coord1, coord2, coord3]

    #Creating kmeans instance for 3 clusters
    kmeans_1 = KMeans(n_clusters=3)
    #using fit_predict to cluster the dataset
    X = df_coord[['Longitude', 'Latitude']].values
    predictions = kmeans_1.fit_predict(X)

    clustered = pd.concat([df_coord.reset_index(), pd.DataFrame({'Cluster':predictions})], axis=1)
    clustered.drop('index',axis=1,inplace = True)

    conditions = [
        clustered['Cluster'] == 0,
        clustered['Cluster'] == 1,
        clustered['Cluster'] == 2
    ]
    choices = ['Night 1', 'Night 2','Night 3']
    clustered['Night'] = np.select(conditions, choices, default='black')
    clustered.sort_values(by=['Night']).to_csv('clustered_map{}.csv'.format(random.randint(1,100)))
    

path = "/Users/KareemAbdo_1/Desktop/presavedMaps/preprocessedMaps"    
path_to_watch = path
print('Your folder path is"',path,'"')
before = dict ([(f, None) for f in os.listdir (path_to_watch)])
while 1:
        after = dict ([(f, None) for f in os.listdir (path_to_watch)])
        added = [f for f in after if not f in before]
        if added:
                print("Added: ", ", ".join (added))
                print(added[0])
                if(fnmatch.fnmatch(added[0], '*.kml')):
                    kml_to_csv("preprocessedMaps/"+added[0])
                    print(1)
                before = after
        else:
             before = after

    


Your folder path is" /Users/KareemAbdo_1/Desktop/presavedMaps/preprocessedMaps "
Added:  Untitled map.kml
Untitled map.kml
1
Added:  barsClubsHouston.kml
barsClubsHouston.kml
1


In [None]:
df_coord

In [None]:
fig = plt.figure(figsize=(16,8))
cmap= plt.cm.rainbow
norm = matplotlib.colors.BoundaryNorm(np.arange(0,10,1),cmap.N)
plt.scatter(df_coord['Longitude'], df_coord['Latitude'], cmap=cmap, norm=norm, s=150, edgecolor='none')
plt.xlabel('Latitude',fontsize = 18)
plt.ylabel('Longitude', fontsize=18)
plt.grid()
plt.show()

In [None]:
fig = plt.figure(figsize=(16,8))
cmap=plt.cm.rainbow
norm = matplotlib.colors.BoundaryNorm(np.arange(0,4,1), cmap.N)
plt.scatter(clustered['Longitude'], clustered['Latitude'],c=clustered['Cluster'],
           cmap = cmap, norm=norm, s=150, edgecolor='none')
plt.colorbar(ticks=np.linspace(0,3,4))
centers = kmeans_1.cluster_centers_
plt.scatter(centers[:,0],centers[:,1], c='black', s=100, alpha=0.3);
plt.xlabel('Latitude', fontsize=14)
plt.ylabel('Longitude', fontsize=14)
plt.title('k-means clustering results (3 clusters)', fontsize=14)
plt.grid()
plt.show()

