# ANC Neighbors

This is a geospatial analysis of ~600 household addresses to connect church members (Austin New Church) across Austin metro in a data-driven fashion.

Project overview: 
- Load and extract addresses from database and convert them to geospatial coordinates 
- Apply k-means clustering to identify geospatial clusters and classify new datapoints 
- Perform basic descriptive statistics and visualizations for geospatial clusters

In [1]:
import pandas as pd
import os
os.environ["PROJ_LIB"] = '/Users/tbw665/anaconda3/share/proj/';
import numpy as np

import folium 

from mpl_toolkits.basemap import Basemap
import matplotlib
from PIL import Image
import matplotlib.pyplot as plt
from pylab import rcParams
%matplotlib inline
rcParams['figure.figsize'] = (16,12)

import os.path

from sklearn.cluster import KMeans
from collections import Counter


In [215]:
# read in data

df = pd.read_excel('ANCPartners07-17-2020.xlsx')

# drop blank Address
df = df[df['Street Address'].notnull()]

# extract first 5 digits of zipcode
df['Zip'] = df['Zip'].str[:5]

# subset dataframe 
sdf = df[['Breeze ID', 
              'Street Address', 
              'City', 
              'State', 
              'Zip']]
sdf.columns

Index(['Breeze ID', 'Street Address', 'City', 'State', 'Zip'], dtype='object')

In [216]:
sdf.head()

Unnamed: 0,Breeze ID,Street Address,City,State,Zip
0,7171383,1233 Strickland Dr,Austin,TX,78748
1,7171382,1233 Strickland Dr,Austin,TX,78748
2,9590190,14710 general williamson drive,austin,TX,78734
3,7170080,7312 Lunar Dr.,Austin,TX,78745
4,17216618,8010 E State Highway 29,Georgetown,TX,78626


In [91]:
sdf.dtypes

Breeze ID          int64
Street Address    object
City              object
State             object
Zip               object
dtype: object

In [217]:
# concatenate to one variable
cols = ['Street Address', 'City', 'State', 'Zip']


sdf.loc[:, ['Address']] = sdf.loc[:, cols].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
# replace double space with single space

sdf.loc[:, ['Address']] = sdf.Address.str.replace('  ', ' ')

sdf.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[k] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Unnamed: 0,Breeze ID,Street Address,City,State,Zip,Address
0,7171383,1233 Strickland Dr,Austin,TX,78748,1233 Strickland Dr Austin TX 78748
1,7171382,1233 Strickland Dr,Austin,TX,78748,1233 Strickland Dr Austin TX 78748
2,9590190,14710 general williamson drive,austin,TX,78734,14710 general williamson drive austin TX 78734
3,7170080,7312 Lunar Dr.,Austin,TX,78745,7312 Lunar Dr. Austin TX 78745
4,17216618,8010 E State Highway 29,Georgetown,TX,78626,8010 E State Highway 29 Georgetown TX 78626


In [218]:
# reduce to unique addresses (households)
unique_add_arr = sdf['Address'].unique()

# convert to df
unique_add_df = pd.DataFrame(data=unique_add_arr.flatten())
unique_add_df.rename(columns={'0':'Address'}, inplace=True)

# rename colname
unique_add_df.columns = unique_add_df.columns.astype(str)
unique_add_df.rename(columns={'0':'Address'}, inplace=True)

unique_add_df.head()

Unnamed: 0,Address
0,1233 Strickland Dr Austin TX 78748
1,14710 general williamson drive austin TX 78734
2,7312 Lunar Dr. Austin TX 78745
3,8010 E State Highway 29 Georgetown TX 78626
4,902 Bluebird Dr. Manchaca TX 78652


In [None]:
# convert street address to geospatial coordinates (latitude, longitude)
from googlemaps import Client as GoogleMaps
from time import sleep
gmaps = GoogleMaps('AIzaSyBKXfvOQ0h7uDf_Utu_xpJlCKMiyk5OrvQ')


for x in range(len(unique_add_df)):
    try:
        sleep(1) # add delay
        geocode_result = gmaps.geocode(unique_add_df.loc[x, 'Address'])
        unique_add_df.loc[x, 'lat'] = geocode_result[0]['geometry']['location'] ['lat']
        unique_add_df.loc[x, 'long'] = geocode_result[0]['geometry']['location']['lng']
    except IndexError:
        print("Address was wrong...")
    except Exception as e:
        print("Unexpected error occurred.", e )

unique_add_df

In [69]:
# save for easier use later
unique_add_df.to_csv('unique_add_lat_long.csv')

In [None]:
# join with Breeze ID info
sdf = sdf.merge(unique_add_df, on = 'Address')

In [35]:
sdf.loc[x, 'lat'] = geocode_result[0]['geometry']['location'] ['lat']

In [79]:
# save for easier use later
sdf.to_csv('ANCPartners07-17-2020_with_lat_long.csv', sep=',')

In [181]:
# exploratory, interactive map
import folium
from folium.plugins import FastMarkerCluster

folium_map = folium.Map(location=[30.2711286, -97.7436995],
                        zoom_start=12)


FastMarkerCluster(data=list(zip(sdf['lat'].values, sdf['long'].values))).add_to(folium_map)
folium.LayerControl().add_to(folium_map)
folium_map.save('geocode.html')
folium_map


# build training classifier

In [289]:
sdf = pd.read_csv('./cb_2018_us_zcta510_500k/ANCPartners07-17-2020_with_cluster_lbls_k8_training.csv')
sdf.head()

Unnamed: 0,Breeze ID,First Name,Last Name,Gender,Status,Marital Status,Race/Ethnicity,Birthdate,Age,Mobile,...,Mobile Phone Carrier,Email,Street Address,City,State,Zip,Address,lat,long,cluster_lbl
0,11480687,Don,Brimberry,,,,White,,,(512) 656-6572,...,ATT,don@brimberrypottery.com,8614 Millway Drive,Austin,TX,78757,8614 Millway Drive Austin TX 78757,30.372528,-97.735653,Northwest
1,11480688,Terra,Brimberry,,,,White,,,(512) 820-7543,...,Verizon,tdbrimberry@gmail.com,8614 Millway Drive,Austin,TX,78757,8614 Millway Drive Austin TX 78757,30.372528,-97.735653,Northwest
2,12017521,Kel,Catterton,,,,White,,,(913) 945-0374,...,ATT,mrscatterton@hotmail.com,5111 Woodrow Avenue,Austin,TX,78756,5111 Woodrow Avenue Austin TX 78756,30.322094,-97.736932,Northwest
3,12017528,Phillip,Catterton,,,,White,,,(817) 505-5148,...,ATT,phillip.catterton@gmail.com,5111 Woodrow Avenue,Austin,TX,78756,5111 Woodrow Avenue Austin TX 78756,30.322094,-97.736932,Northwest
4,7171505,Robert,Deckard,M,Partner,Divorced,White,2/25/70,50.0,(512) 789-7740,...,Other,bdeckard92@gmail.com,12007 N Lamar Blvd,AUSTIN,TX,78753,12007 N Lamar Blvd AUSTIN TX 78753,30.39273,-97.679802,Northwest


In [290]:
X=sdf.loc[:,['lat','long']]

In [291]:
# run initial KMeans clustering

id_n=8
model = KMeans(n_clusters=id_n, random_state=0)
kmeans = model.fit(X)
id_label=kmeans.labels_

In [292]:
kmeans.cluster_centers_

array([[ 30.35266913, -97.72032272],
       [ 30.06238743, -97.87615356],
       [ 30.23534942, -97.88350643],
       [ 29.65515776, -98.30047522],
       [ 30.71863799, -98.24802953],
       [ 30.23962884, -97.39499461],
       [ 30.52666663, -97.75956241],
       [ 30.19006783, -97.79127969]])

In [293]:
sdf.head()

Unnamed: 0,Breeze ID,First Name,Last Name,Gender,Status,Marital Status,Race/Ethnicity,Birthdate,Age,Mobile,...,Mobile Phone Carrier,Email,Street Address,City,State,Zip,Address,lat,long,cluster_lbl
0,11480687,Don,Brimberry,,,,White,,,(512) 656-6572,...,ATT,don@brimberrypottery.com,8614 Millway Drive,Austin,TX,78757,8614 Millway Drive Austin TX 78757,30.372528,-97.735653,Northwest
1,11480688,Terra,Brimberry,,,,White,,,(512) 820-7543,...,Verizon,tdbrimberry@gmail.com,8614 Millway Drive,Austin,TX,78757,8614 Millway Drive Austin TX 78757,30.372528,-97.735653,Northwest
2,12017521,Kel,Catterton,,,,White,,,(913) 945-0374,...,ATT,mrscatterton@hotmail.com,5111 Woodrow Avenue,Austin,TX,78756,5111 Woodrow Avenue Austin TX 78756,30.322094,-97.736932,Northwest
3,12017528,Phillip,Catterton,,,,White,,,(817) 505-5148,...,ATT,phillip.catterton@gmail.com,5111 Woodrow Avenue,Austin,TX,78756,5111 Woodrow Avenue Austin TX 78756,30.322094,-97.736932,Northwest
4,7171505,Robert,Deckard,M,Partner,Divorced,White,2/25/70,50.0,(512) 789-7740,...,Other,bdeckard92@gmail.com,12007 N Lamar Blvd,AUSTIN,TX,78753,12007 N Lamar Blvd AUSTIN TX 78753,30.39273,-97.679802,Northwest


In [294]:
df_train = sdf
df_train['cluster_lbl'] = kmeans.labels_

In [295]:
# count households per cluster
df_train.drop_duplicates(subset=['Address'])[['Address', 'cluster_lbl']].groupby('cluster_lbl').agg(['count'])

Unnamed: 0_level_0,Address
Unnamed: 0_level_1,count
cluster_lbl,Unnamed: 1_level_2
0,22
1,26
2,41
3,3
4,1
5,2
6,10
7,49


In [153]:
from typing import Optional, List
import branca as bc

def add_categorical_legend(
    folium_map: folium.Map,
    title: str,
    colors: List[str],
    labels: List[str],
) -> folium.Map:
    """
    Given a Folium map, add to it a categorical legend with the given title, colors, and corresponding labels.
    The given colors and labels will be listed in the legend from top to bottom.
    Return the resulting map.
    
    Based on `this example <http://nbviewer.jupyter.org/gist/talbertc-usgs/18f8901fc98f109f2b71156cf3ac81cd>`_.
    """
    # Error check
    if len(colors) != len(labels):
        raise ValueError("colors and labels must have the same length.")

    color_by_label = dict(zip(labels, colors))

    # Make legend HTML
    template = f"""
    {{% macro html(this, kwargs) %}}

    <!doctype html>
    <html lang="en">
    <head>
      <meta charset="utf-8">
      <meta name="viewport" content="width=device-width, initial-scale=1">
    </head>
    <body>
    <div id='maplegend' class='maplegend'>
      <div class='legend-title'>{title}</div>
      <div class='legend-scale'>
        <ul class='legend-labels'>
    """

    for label, color in color_by_label.items():
        template += f"<li><span style='background:{color}'></span>{label}</li>"

    template += """
        </ul>
      </div>
    </div>

    </body>
    </html>

    <style type='text/css'>
      .maplegend {
        position: absolute;
        z-index:9999;
        background-color: rgba(255, 255, 255, 1);
        border-radius: 5px;
        border: 2px solid #bbb;
        padding: 10px;
        font-size:12px;
        right: 10px;
        bottom: 20px;
      }
      .maplegend .legend-title {
        text-align: left;
        margin-bottom: 5px;
        font-weight: bold;
        font-size: 90%;
        }
      .maplegend .legend-scale ul {
        margin: 0;
        margin-bottom: 5px;
        padding: 0;
        float: left;
        list-style: none;
        }
      .maplegend .legend-scale ul li {
        font-size: 80%;
        list-style: none;
        margin-left: 0;
        line-height: 18px;
        margin-bottom: 2px;
        }
      .maplegend ul.legend-labels li span {
        display: block;
        float: left;
        height: 16px;
        width: 30px;
        margin-right: 5px;
        margin-left: 0;
        border: 0px solid #ccc;
        }
      .maplegend .legend-source {
        font-size: 80%;
        color: #777;
        clear: both;
        }
      .maplegend a {
        color: #777;
        }
    </style>
    {% endmacro %}
    """

    macro = bc.element.MacroElement()
    macro._template = bc.element.Template(template)
    folium_map.get_root().add_child(macro)

    return folium_map

In [161]:



# Use colors from https://colorbrewer2.org/#type=qualitative&scheme=Dark2&n=5
colors = ['#7fc97f','#beaed4','#fdc086','#ffff99','#386cb0', '#f0027f', '#bf5b17', '#666666' ]
categories = sorted(df_train.cluster_lbl.unique().tolist())


# Color by category
color_dict = { 
    cat: colors[categories.index(cat)]
    for cat in df_train.cluster_lbl.values
}
                  

lat = df_train['lat'].values
lon = df_train['long'].values

m = folium.Map(location=[np.mean(df_train['lat'].values), np.mean(df_train['long'].values)],
                        zoom_start=9, max_zoom=13)

df_train.apply(lambda row:folium.CircleMarker(location=[row["lat"], row["long"]], 
                                              radius=8, fill = True, color = 'black', fill_color=color_dict[row['cluster_lbl']], popup=str(row['cluster_lbl']), fill_opacity=0.6)
                                             .add_to(m), axis=1)
# Add map legend 
m = add_categorical_legend(m, "Category", colors=colors, labels=categories)

m

In [296]:
# after visual inspection, manually change outliers , down to 5 clusters

df_train.cluster_lbl.replace({3: 1, 
                            5: 0,  
                            4: 6}, 
                           inplace=True)


In [184]:

# colors = ['#7fc97f','#beaed4','#fdc086','#ffff99','#386cb0']
categories = sorted(df_train.cluster_lbl.unique().tolist())
cmap = plt.cm.get_cmap('Accent', len(categories))   
colors = []

for i in range(cmap.N):
    rgba = cmap(i)
    colors.append(matplotlib.colors.rgb2hex(rgba))

# Color by category
color_dict = { 
    cat: colors[categories.index(cat)]
    for cat in df_train.cluster_lbl.values
}
                  

lat = df_train['lat'].values
lon = df_train['long'].values

m = folium.Map(location=[np.mean(df_train['lat'].values), np.mean(df_train['long'].values)],
                        zoom_start=9, max_zoom=13)

df_train.apply(lambda row:folium.CircleMarker(location=[row["lat"], row["long"]], 
                                              radius=8, fill = True, color = 'black', fill_color=color_dict[row['cluster_lbl']], popup=str(row['cluster_lbl']), fill_opacity=0.6)
                                             .add_to(m), axis=1)
# Add map legend 
m = add_categorical_legend(m, "Category", colors=colors, labels=categories)

m

In [297]:
# change encoding ascending, south -> north
df_train.cluster_lbl = df_train.cluster_lbl.map({1: 0, 
                                             2: 1,
                                             7: 2,
                                             0: 3,
                                             6: 4})
# name clusters based on geography
df_train.cluster_lbl = df_train.cluster_lbl.map({0: 'Buda/Kyle', 
                                         1: 'Southwest', 
                                         2: 'S Central', 
                                         3: 'N Central/Northwest', 
                                         4: 'Far North'})

In [298]:
categories = sorted(df_train.cluster_lbl.unique().tolist())
cmap = plt.cm.get_cmap('Accent', len(categories))   
colors = []

for i in range(cmap.N):
    rgba = cmap(i)
    colors.append(matplotlib.colors.rgb2hex(rgba))

# Color by category
color_dict = { 
    cat: colors[categories.index(cat)]
    for cat in df_train.cluster_lbl.values
}
                  

lat = df_train['lat'].values
lon = df_train['long'].values

m = folium.Map(location=[np.mean(df_train['lat'].values), np.mean(df_train['long'].values)],
                        zoom_start=9, max_zoom=13)

df_train.apply(lambda row:folium.CircleMarker(location=[row["lat"], row["long"]], 
                                              radius=8, fill = True, color = 'black', fill_color=color_dict[row['cluster_lbl']], popup=str(row['cluster_lbl']), fill_opacity=0.6)
                                             .add_to(m), axis=1)
# Add map legend 
m = add_categorical_legend(m, "Category", colors=colors, labels=categories)

m

# apply classifier to new addresses

In [247]:
df_new = pd.read_csv('ANC-people-080921.csv')

# drop blank Address
df_new = df_new[df_new['Street Address'].notnull()]

# extract first 5 digits of zipcode
df_new['Zip'] = df_new['Zip'].str[:5]

# subset dataframe 
sdf_new = df_new[['Street Address', 
              'City', 
              'State', 
              'Zip']]
sdf_new.columns

Index(['Street Address', 'City', 'State', 'Zip'], dtype='object')

In [248]:
# concatenate to one variable
cols = ['Street Address', 'City', 'State', 'Zip']


sdf_new['Address'] = sdf_new.loc[:, cols].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
# replace double space with single space

sdf_new['Address'] = sdf_new.Address.str.replace('  ', ' ')

sdf_new.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,Street Address,City,State,Zip,Address
0,3818 Sebastian Cove,Round Rock,TX,78681,3818 Sebastian Cove Round Rock TX 78681
1,2501 Wickersham Lane #932,Austin,TX,78741,2501 Wickersham Lane #932 Austin TX 78741
2,"1809 Social Drive, apt 2237",PFLUGERVILLE,tx,78660,"1809 Social Drive, apt 2237 PFLUGERVILLE tx 78660"
3,1233 Strickland Dr,Austin,TX,78748,1233 Strickland Dr Austin TX 78748
4,1233 Strickland Dr,Austin,TX,78748,1233 Strickland Dr Austin TX 78748


In [249]:
# reduce to unique addresses (households)
unique_add_arr_new = sdf_new['Address'].unique()

# convert to df
unique_add_df_new = pd.DataFrame(data=unique_add_arr_new.flatten())
unique_add_df_new.rename(columns={'0':'Address'}, inplace=True)

# rename colname
unique_add_df_new.columns = unique_add_df_new.columns.astype(str)
unique_add_df_new.rename(columns={'0':'Address'}, inplace=True)

unique_add_df_new.head()

Unnamed: 0,Address
0,3818 Sebastian Cove Round Rock TX 78681
1,2501 Wickersham Lane #932 Austin TX 78741
2,"1809 Social Drive, apt 2237 PFLUGERVILLE tx 78660"
3,1233 Strickland Dr Austin TX 78748
4,14710 general williamson drive austin TX 78734


In [251]:
# get only new addresses
unique_add_df_new = list(set(unique_add_df_new.Address) - set(unique_add_df.Address))

In [262]:
unique_add_df_new = pd.DataFrame({'Address':unique_add_df_new})

In [None]:
# convert street address to geospatial coordinates (latitude, longitude)
from googlemaps import Client as GoogleMaps
from time import sleep
gmaps = GoogleMaps('AIzaSyAGwfZZgqya-PaltuxjnttgrBZ0ov_aJlM')


for x in range(len(unique_add_df)):
    try:
        sleep(1) # add delay
        geocode_result = gmaps.geocode(unique_add_df_new.loc[x, 'Address'])
        unique_add_df_new.loc[x, 'lat'] = geocode_result[0]['geometry']['location'] ['lat']
        unique_add_df_new.loc[x, 'long'] = geocode_result[0]['geometry']['location']['lng']
    except IndexError:
        print("Address was wrong...")
    except Exception as e:
        print("Unexpected error occurred.", e )

unique_add_df_new

In [266]:
# join with Breeze ID info
sdf_new = sdf_new.merge(unique_add_df_new, on = 'Address')

In [333]:
# plot new address without classification

                  
m = folium.Map(location=[np.mean(sdf_new['lat'].values), np.mean(sdf_new['long'].values)],
                        zoom_start=9, max_zoom=13)

sdf_new.apply(lambda row:folium.CircleMarker(location=[row["lat"], row["long"]], 
                                              radius=8, fill = True, color = 'black', fill_color='black', popup=str(row['cluster_lbl']), fill_opacity=0.6)
                                             .add_to(m), axis=1)

m

In [270]:
# apply model to new data

X=sdf_new.loc[:,['lat','long']]
kmeans.predict(X)

array([6, 7, 0, 7, 2, 2, 6, 7, 7, 7, 0, 2, 7, 2, 7, 7, 2, 7, 7, 1, 0, 0,
       7, 7, 0, 0, 7, 0, 7, 0, 7, 7, 7, 2, 0, 0, 6, 2, 0, 3, 2, 2, 7, 7,
       2, 2, 7, 0, 6, 0, 0, 0, 6, 6, 5, 6, 6, 0, 1, 5, 7, 7, 0, 0, 1, 7,
       7, 0, 7, 7, 7, 7, 2, 1, 0, 0, 0, 6, 2, 2, 2, 1, 6, 0, 1, 1, 0, 0,
       0, 2, 2, 2, 7, 7, 0, 0, 0, 0, 0, 0, 2, 0, 0, 7, 7, 0, 0, 6, 0, 2,
       7, 7, 2, 1, 0, 7, 0, 1], dtype=int32)

In [281]:
sdf_new.loc[:, 'cluster_lbl'] = kmeans.predict(X)

In [282]:
sdf_new

Unnamed: 0,Street Address,City,State,Zip,Address,lat,long,cluster_lbl
0,3818 Sebastian Cove,Round Rock,TX,78681,3818 Sebastian Cove Round Rock TX 78681,30.551662,-97.745087,6
1,2501 Wickersham Lane #932,Austin,TX,78741,2501 Wickersham Lane #932 Austin TX 78741,30.223290,-97.725187,7
2,"1809 Social Drive, apt 2237",PFLUGERVILLE,tx,78660,"1809 Social Drive, apt 2237 PFLUGERVILLE tx 78660",30.444158,-97.644009,0
3,"7707 S Interstate 35, Apt. 721",Austin,TX,78744,"7707 S Interstate 35, Apt. 721 Austin TX 78744",30.179265,-97.776098,7
4,7207 Fence Line Dr.,Austin,TX,78749,7207 Fence Line Dr. Austin TX 78749,30.218670,-97.847909,2
...,...,...,...,...,...,...,...,...
113,176 Tranquility Mountain,Buda,TX,78610,176 Tranquility Mountain Buda TX 78610,30.056282,-97.841024,1
114,8216 Briarwood Ln.,Austin,TX,78757,8216 Briarwood Ln. Austin TX 78757,30.365767,-97.733544,0
115,10412 Alemoor Dr.,austin,TX,78747,10412 Alemoor Dr. austin TX 78747,30.135647,-97.764093,7
116,2021 Guadalupe St Apt 812A,Austin,TX,78705,2021 Guadalupe St Apt 812A Austin TX 78705,30.283213,-97.741752,0


In [283]:
# after visual inspection, manually change outliers , down to 5 clusters

sdf_new.cluster_lbl.replace({3: 1, 
                            5: 0,  
                            4: 6}, 
                           inplace=True)

# change encoding ascending, south -> north
sdf_new.cluster_lbl = sdf_new.cluster_lbl.map({1: 0, 
                                             2: 1,
                                             7: 2,
                                             0: 3,
                                             6: 4})

sdf_new.cluster_lbl = sdf_new.cluster_lbl.map({0: 'Buda/Kyle', 
                                         1: 'Southwest', 
                                         2: 'S Central', 
                                         3: 'N Central/Northwest', 
                                         4: 'Far North'})

In [326]:
# plot new addresses after classifying

categories = sorted(sdf_new.cluster_lbl.unique().tolist())
cmap = plt.cm.get_cmap('Accent', len(categories))   
colors = []

for i in range(cmap.N):
    rgba = cmap(i)
    colors.append(matplotlib.colors.rgb2hex(rgba))

# Color by category
color_dict = { 
    cat: colors[categories.index(cat)]
    for cat in sdf_new.cluster_lbl.values
}
                  

m = folium.Map(location=[np.mean(sdf_new['lat'].values), np.mean(sdf_new['long'].values)],
                        zoom_start=9, max_zoom=13)

sdf_new.apply(lambda row:folium.CircleMarker(location=[row["lat"], row["long"]], 
                                              radius=8, fill = True, color = 'black', fill_color=color_dict[row['cluster_lbl']], popup=str(row['cluster_lbl']), fill_opacity=0.6)
                                             .add_to(m), axis=1)
# Add map legend 
m = add_categorical_legend(m, "Category", colors=colors, labels=categories)

m

In [324]:
df_full = df_train[sdf_new.columns].append(sdf_new)

In [325]:
# plot all addresses 

categories = sorted(df_full.cluster_lbl.unique().tolist())
cmap = plt.cm.get_cmap('Accent', len(categories))   
colors = []

for i in range(cmap.N):
    rgba = cmap(i)
    colors.append(matplotlib.colors.rgb2hex(rgba))

# Color by category
color_dict = { 
    cat: colors[categories.index(cat)]
    for cat in df_full.cluster_lbl.values
}
                

m = folium.Map(location=[np.mean(df_full['lat'].values), np.mean(df_full['long'].values)],
                        zoom_start=9, max_zoom=13)

df_full.apply(lambda row:folium.CircleMarker(location=[row["lat"], row["long"]], 
                                              radius=8, fill = True, color = 'black', fill_color=color_dict[row['cluster_lbl']], popup=str(row['cluster_lbl']), fill_opacity=0.6)
                                             .add_to(m), axis=1)
# Add map legend 
m = add_categorical_legend(m, "Category", colors=colors, labels=categories)

m