In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from shapely.geometry import Point
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
import math
import geopy.distance
from datetime import datetime
from tqdm.notebook import tqdm
import glob
import json
from fuzzywuzzy import fuzz, process

%matplotlib inline

ModuleNotFoundError: No module named 'shapely'

## Read in Hub-Violations and Clusters DataFrames (geocodio versions)

In [13]:
hub_covid_violations_df = pd.read_csv('../data/hub_covid_violations.csv')

In [None]:
hub_covid_violations_df.head()

In [None]:
geocodio_df = pd.read_csv('../data/geocodio_input_df_geocodio_d5d27c584d4aa85e57bb523066377b276f89cac3.csv')

In [None]:
geocodio_df.head()

In [None]:
hub_geocodio = hub_covid_violations_df.join(geocodio_df, rsuffix='_geo')

In [None]:
hub_geocodio.columns

In [None]:
hub_geocodio = hub_geocodio.drop(['Latitude', 'Longitude', 'Unit Type', 
                                  'Unit Number', 'Country', 'Source', 'Unnamed: 0'], axis=1)

In [None]:
hub_geocodio.columns = ['Request #', 'Status', 'Date / Time Opened', 'Date / Time Closed',
       'Contact Type', 'State Issue', 'Closed When Created', 'Address', 'City',
       'Council District', 'ZIP', 'Mapped Location', 'Address_geo', 'City_geo', 'ZIP_geo', 'state',
       'Latitude', 'Longitude', 'Accuracy Score', 'Accuracy Type',
       'Number', 'Street', 'City.1', 'State', 'County', 'Zip']

In [None]:
hub_geocodio.T

In [3]:
clusters = pd.read_csv('../data/clusters_corrected-Sheet1-2_geocodio_d08379641999d429faecf949b19c4c6f08c2732c.csv')

In [4]:
clusters.head()

Unnamed: 0,Cluster Name,Street,City,State,Facility Type,Cluster Start Date,# Cases,Latitude_wrong,Longitude_wrong,Latitude,...,Number,Street.1,Unit Type,Unit Number,City.1,State.1,County,Zip,Country,Source
0,Vanderbilt Parties,,,TN,Social Gathering,3/11/20,49,36.125891,-86.822863,35.858564,...,,,,,,TN,,,US,US Census Bureau
1,Event at Clementine Hall,4710 Charlotte Avenue,Nashville,TN,Social Gathering,3/14/20,23,36.152444,-86.846772,36.1523,...,4710.0,Charlotte Ave,,,Nashville,TN,Davidson County,37209.0,US,TIGER/Line® dataset from the US Census Bureau
2,Religious Retreat,,,TN,Social Gathering,3/25/20,18,,,35.858564,...,,,,,,TN,,,US,US Census Bureau
3,The Health Center at Richland Place,504 Elmington Avenue,Nashville,TN,LTCF,4/3/20,47,36.12875,-86.819533,36.128791,...,504.0,Elmington Ave,,,Nashville,TN,Davidson County,37205.0,US,TIGER/Line® dataset from the US Census Bureau
4,Trevecca Center for Rehab and Healing,329 Murfreesboro Pike,Nashville,TN,LTCF,4/4/20,102,36.144562,-86.756749,36.144764,...,329.0,Murfreesboro Pike,,,Nashville,TN,Davidson County,37210.0,US,City of Nashville


In [5]:
clusters = clusters.drop([
    'Latitude_wrong', 'Longitude_wrong', 'Number', 'Street.1', 'Unit Type', 'Unit Number', 
    'City.1', 'State.1', 'Country', 'Source'
], axis=1)

In [6]:
clusters.head()

Unnamed: 0,Cluster Name,Street,City,State,Facility Type,Cluster Start Date,# Cases,Latitude,Longitude,Accuracy Score,Accuracy Type,County,Zip
0,Vanderbilt Parties,,,TN,Social Gathering,3/11/20,49,35.858564,-86.349357,1.0,state,,
1,Event at Clementine Hall,4710 Charlotte Avenue,Nashville,TN,Social Gathering,3/14/20,23,36.1523,-86.843597,1.0,range_interpolation,Davidson County,37209.0
2,Religious Retreat,,,TN,Social Gathering,3/25/20,18,35.858564,-86.349357,1.0,state,,
3,The Health Center at Richland Place,504 Elmington Avenue,Nashville,TN,LTCF,4/3/20,47,36.128791,-86.818522,1.0,range_interpolation,Davidson County,37205.0
4,Trevecca Center for Rehab and Healing,329 Murfreesboro Pike,Nashville,TN,LTCF,4/4/20,102,36.144764,-86.755659,1.0,rooftop,Davidson County,37210.0


In [7]:
clusters.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62 entries, 0 to 61
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cluster Name        62 non-null     object 
 1   Street              51 non-null     object 
 2   City                51 non-null     object 
 3   State               56 non-null     object 
 4   Facility Type       62 non-null     object 
 5   Cluster Start Date  62 non-null     object 
 6   # Cases             62 non-null     int64  
 7   Latitude            62 non-null     float64
 8   Longitude           62 non-null     float64
 9   Accuracy Score      62 non-null     float64
 10  Accuracy Type       56 non-null     object 
 11  County              51 non-null     object 
 12  Zip                 51 non-null     float64
dtypes: float64(4), int64(1), object(8)
memory usage: 6.4+ KB


In [8]:
clusters['Facility Type'].value_counts().sort_values(ascending=False)

LTCF                     20
Congregate Living         8
Bar                       6
Social Gathering          6
Correctional Facility     6
Construction              5
Other                     3
Commercial-Warehouse      2
Gym                       1
Restaurant                1
Church                    1
School                    1
Office                    1
College / University      1
Name: Facility Type, dtype: int64

In [9]:
cluster_info = clusters.groupby(['Facility Type'])['# Cases'].agg(['nunique', 'sum', 'mean'])
cluster_info

Unnamed: 0_level_0,nunique,sum,mean
Facility Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bar,5,109,18.166667
Church,1,10,10.0
College / University,1,10,10.0
Commercial-Warehouse,2,302,151.0
Congregate Living,8,464,58.0
Construction,5,185,37.0
Correctional Facility,6,557,92.833333
Gym,1,14,14.0
LTCF,17,902,45.1
Office,1,18,18.0


In [18]:
clusters_by_type = pd.read_csv('../data/clusters_by_type.csv')

In [19]:
clusters_by_type.head()

Unnamed: 0,Cluster Type,Number of Clusters
0,Bar,7
1,Church,2
2,College / University,5
3,Commercial-Warehouse,12
4,Congregate Living,13


## Merge Clusters and Violations DataFrames
### Using coordinates - best match is 3, but currently rounding is deactivated.

In [20]:
clusters_violations = clusters[clusters['Latitude'].notna()].merge(
    hub_geocodio, 
    on=['Latitude', 'Longitude'], 
    how='inner',
    suffixes=['_clusters', '_violations']
)

In [21]:
clusters_violations['Cluster Name'].value_counts()

Kid Rock's Big Ass Honky Tonk                    47
Tootsie's                                        27
Winner's                                         27
Dogwood                                          24
Event at Clementine Hall                         20
Dawghouse Saloon                                  9
Montgomery Bell Academy job site                  8
Tyson Foods                                       6
Hermitage Hall                                    6
Miss Kelli's                                      6
Good Samaritan Health & Healing                   6
Life Care Center Old Hickory Village              5
Bethany Center for Rehab and Healing              4
Link Systems Electric                             4
The Opal at Music City                            3
Lois DeBerry Special Needs Facility               3
Nashville Rescue Mission - Women's Campus         3
Riverbend Max Security Prison-2                   3
Debra Johnson Rehab/TN Prison for Women           3
Creekside Ce

In [22]:
clusters_violations.columns

Index(['Cluster Name', 'Street_clusters', 'City_clusters', 'State_clusters',
       'Facility Type', 'Cluster Start Date', '# Cases', 'Latitude',
       'Longitude', 'Accuracy Score_clusters', 'Accuracy Type_clusters',
       'County_clusters', 'Zip_clusters', 'Request #', 'Status',
       'Date / Time Opened', 'Date / Time Closed', 'Contact Type',
       'State Issue', 'Closed When Created', 'Address', 'City_violations',
       'Council District', 'ZIP', 'Mapped Location', 'Address_geo', 'City_geo',
       'ZIP_geo', 'state', 'Accuracy Score_violations',
       'Accuracy Type_violations', 'Number', 'Street_violations', 'City.1',
       'State_violations', 'County_violations', 'Zip_violations'],
      dtype='object')

In [23]:
clusters_violations[clusters_violations['Cluster Name'] == "Kid Rock's Big Ass Honky Tonk"]['Street_violations'].value_counts()

Broadway    47
Name: Street_violations, dtype: int64

In [24]:
hub_geocodio[hub_geocodio['Accuracy Score']>.9]

Unnamed: 0,Request #,Status,Date / Time Opened,Date / Time Closed,Contact Type,State Issue,Closed When Created,Address,City,Council District,...,Latitude,Longitude,Accuracy Score,Accuracy Type,Number,Street,City.1,State,County,Zip
0,346122,Closed,2020-04-15 11:33:26,2020-04-16 14:55:01,,False,False,928 6th Ave S,NASHVILLE,17.0,...,36.148719,-86.772077,1.0,rooftop,928,6th Ave S,Nashville,TN,Davidson County,37203
1,339374,Closed,2020-04-04 18:13:59,2020-04-09 13:07:22,,False,False,500 Gallatin Ave,NASHVILLE,5.0,...,36.181520,-86.749629,1.0,rooftop,500,Gallatin Ave,Nashville,TN,Davidson County,37206
2,343513,Closed,2020-04-10 10:40:08,2020-04-14 19:39:09,,False,False,5720 Crossings Blvd,,,...,36.044155,-86.647169,1.0,range_interpolation,5720,Crossings Blvd,Antioch,TN,Davidson County,37013
3,442996,Closed,2020-08-12 02:05:29,2020-08-12 16:03:35,,False,False,7689 Hwy 70 S,NASHVILLE,22.0,...,36.081001,-86.956301,1.0,range_interpolation,7689,Hwy 70 S,Nashville,TN,Davidson County,37221
5,407667,Closed,2020-07-04 19:59:51,2020-07-06 17:55:19,,False,False,945 Allen Rd,NASHVILLE,15.0,...,36.150484,-86.665820,1.0,rooftop,945,Allen Rd,Nashville,TN,Davidson County,37214
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11626,492673,Closed,2020-11-01 01:52:56,2020-11-02 16:09:09,,False,False,7102 Charlotte Pike,NASHVILLE,35.0,...,36.128622,-86.906774,1.0,range_interpolation,7102,Charlotte Pike,Nashville,TN,Davidson County,37209
11627,493369,Closed,2020-11-02 17:41:20,2020-11-03 15:46:04,,False,False,305 Manchester Ave,NASHVILLE,6.0,...,36.186782,-86.735788,1.0,rooftop,305,Manchester Ave,Nashville,TN,Davidson County,37206
11628,493733,Closed,2020-11-02 20:43:33,2020-11-03 15:51:51,,False,False,2801 Foster Ave,NASHVILLE,16.0,...,36.113989,-86.743415,1.0,range_interpolation,2801,Foster Ave,Nashville,TN,Davidson County,37210
11629,492999,Closed,2020-11-02 14:36:22,2020-11-02 16:15:55,,False,False,8080 TN-100,NASHVILLE,35.0,...,36.045024,-86.953730,1.0,range_interpolation,8080,Hwy 100,Nashville,TN,Davidson County,37221


### Create DataFrame of matching addresses, using FuzzyWuzzy

In [25]:
clusters_clean = clusters[clusters['Street'].notna()]

In [26]:
def fuzzy_merge(df_1, df_2, key1, key2, threshold=95, limit=2):
    """
    :param df_1: the left table to join
    :param df_2: the right table to join
    :param key1: key column of the left table
    :param key2: key column of the right table
    :param threshold: how close the matches should be to return a match, based on Levenshtein distance
    :param limit: the amount of matches that will get returned, these are sorted high to low
    :return: dataframe with boths keys and matches
    """
    s = df_2[key2].tolist()

    m = df_1[key1].apply(lambda x: process.extract(x, s, limit=limit))    
    df_1['matches'] = m

    m2 = df_1['matches'].apply(lambda x: ', '.join([i[0] for i in x if i[1] >= threshold]))
    df_1['matches'] = m2

    return df_1

In [27]:
clusters_violations_fuzzy = fuzzy_merge(clusters_clean, hub_geocodio, 'Street', 'Address', limit=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_1['matches'] = m
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_1['matches'] = m2


In [28]:
clusters_violations_fuzzy.T

Unnamed: 0,1,3,4,5,6,7,8,9,10,11,...,48,49,50,52,53,57,58,59,60,61
Cluster Name,Event at Clementine Hall,The Health Center at Richland Place,Trevecca Center for Rehab and Healing,Tyson Foods,Nashville Center for Rehab and Healing,CDM Jail,Nashville Community Care & Rehabilitation Center,Cargill,Bethany Center for Rehab and Healing,Knowles Assisted Living,...,Lois DeBerry Special Needs Facility,Riverbend Max Security Prison-2,Debra Johnson Rehab/TN Prison for Women,Green Hills Center for Rehab and Healing,Tennessee Titans,One Stone Church Service,Miss Kelli's,Nashville Rescue Mission - Women's Campus,Link Systems Electric,Iron Tribe Belmont
Street,4710 Charlotte Avenue,504 Elmington Avenue,329 Murfreesboro Pike,201 Cartwright Street,832 Wedgewood Avenue,5113 Harding Place,1414 County Hospital Road,2621 Eugenia Avenue,421 Ocala Drive,1010 Camilla Caldwell Lane,...,7575 Cockrill Bend Blvd,7475 Cockrill Bend Blvd,3881 Stewarts Ln,3939 Hillsboro Cir,460 Great Circle Rd,1101 Stainback Ave,207 Printers Alley,1716 Rosa L Parks Blvd,444 McNally Dr,3201 Belmont Blvd
City,Nashville,Nashville,Nashville,Goodlettsville,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville,...,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville
State,TN,TN,TN,TN,TN,TN,TN,TN,TN,TN,...,TN,TN,TN,TN,TN,TN,TN,TN,TN,TN
Facility Type,Social Gathering,LTCF,LTCF,Commercial-Warehouse,LTCF,Correctional Facility,LTCF,Commercial-Warehouse,LTCF,LTCF,...,Correctional Facility,Correctional Facility,Correctional Facility,LTCF,Other,Church,Bar,Congregate Living,Other,Gym
Cluster Start Date,3/14/20,4/3/20,4/4/20,4/6/20,4/8/20,4/13/20,4/15/20,4/21/20,4/27/20,4/27/20,...,9/1/20,9/1/20,9/1/20,9/7/20,9/12/20,9/22/20,10/1/20,10/1/20,10/16/20,10/20/20
# Cases,23,47,102,280,12,22,47,22,133,41,...,62,60,216,45,23,10,14,70,12,14
Latitude,36.1523,36.1288,36.1448,36.3313,36.1348,36.0896,36.1741,36.116,36.0513,36.1741,...,36.1848,36.1848,36.1994,36.1049,36.1989,36.1898,36.1639,36.1817,36.0926,36.117
Longitude,-86.8436,-86.8185,-86.7557,-86.7108,-86.782,-86.6885,-86.8456,-86.7547,-86.7146,-86.8456,...,-86.8992,-86.8992,-86.8669,-86.8196,-86.8044,-86.7652,-86.7785,-86.7968,-86.7403,-86.7983
Accuracy Score,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,0.9,1


In [29]:
clusters_violations_fuzzy[clusters_violations_fuzzy['matches'] != ''].shape

(28, 14)

In [30]:
clusters.columns

Index(['Cluster Name', 'Street', 'City', 'State', 'Facility Type',
       'Cluster Start Date', '# Cases', 'Latitude', 'Longitude',
       'Accuracy Score', 'Accuracy Type', 'County', 'Zip'],
      dtype='object')

In [31]:
hub_geocodio.columns

Index(['Request #', 'Status', 'Date / Time Opened', 'Date / Time Closed',
       'Contact Type', 'State Issue', 'Closed When Created', 'Address', 'City',
       'Council District', 'ZIP', 'Mapped Location', 'Address_geo', 'City_geo',
       'ZIP_geo', 'state', 'Latitude', 'Longitude', 'Accuracy Score',
       'Accuracy Type', 'Number', 'Street', 'City.1', 'State', 'County',
       'Zip'],
      dtype='object')

## Merge Fuzzied Clusters to Hub Violations DF

In [32]:
fuzzy_merge_df = clusters_violations_fuzzy.merge(
    hub_geocodio, left_on='matches', right_on='Address', suffixes=['_clusters', '_violations'])

In [33]:
fuzzy_merge_df

Unnamed: 0,Cluster Name,Street_clusters,City_clusters,State_clusters,Facility Type,Cluster Start Date,# Cases,Latitude_clusters,Longitude_clusters,Accuracy Score_clusters,...,Latitude_violations,Longitude_violations,Accuracy Score_violations,Accuracy Type_violations,Number,Street_violations,City.1,State_violations,County_violations,Zip_violations
0,Trevecca Center for Rehab and Healing,329 Murfreesboro Pike,Nashville,TN,LTCF,4/4/20,102,36.144764,-86.755659,1.0,...,36.144764,-86.755659,1.0,rooftop,329,Murfreesboro Pike,Nashville,TN,Davidson County,37210
1,Montgomery Bell Academy job site,4001 Harding Pike,Nashville,TN,Construction,5/8/20,75,36.093008,-86.845540,0.7,...,36.093008,-86.845540,0.7,range_interpolation,4001,Harding Pl,Nashville,TN,Davidson County,37215
2,Montgomery Bell Academy job site,4001 Harding Pike,Nashville,TN,Construction,5/8/20,75,36.093008,-86.845540,0.7,...,36.093008,-86.845540,0.7,range_interpolation,4001,Harding Pl,Nashville,TN,Davidson County,37215
3,Montgomery Bell Academy job site,4001 Harding Pike,Nashville,TN,Construction,5/8/20,75,36.093008,-86.845540,0.7,...,36.093008,-86.845540,0.7,range_interpolation,4001,Harding Pl,Nashville,TN,Davidson County,37215
4,Montgomery Bell Academy job site,4001 Harding Pike,Nashville,TN,Construction,5/8/20,75,36.093008,-86.845540,0.7,...,36.093008,-86.845540,0.7,range_interpolation,4001,Harding Pl,Nashville,TN,Davidson County,37215
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
163,Link Systems Electric,444 McNally Dr,Nashville,TN,Other,10/16/20,12,36.092556,-86.740288,0.9,...,36.092556,-86.740288,0.9,range_interpolation,444,Mc Nally Dr,Nashville,TN,Davidson County,37211
164,Link Systems Electric,444 McNally Dr,Nashville,TN,Other,10/16/20,12,36.092556,-86.740288,0.9,...,36.092556,-86.740288,0.9,range_interpolation,444,Mc Nally Dr,Nashville,TN,Davidson County,37211
165,Link Systems Electric,444 McNally Dr,Nashville,TN,Other,10/16/20,12,36.092556,-86.740288,0.9,...,36.092556,-86.740288,0.9,range_interpolation,444,Mc Nally Dr,Nashville,TN,Davidson County,37211
166,Iron Tribe Belmont,3201 Belmont Blvd,Nashville,TN,Gym,10/20/20,14,36.116961,-86.798286,1.0,...,36.119203,-86.795683,1.0,street_center,,Belmont Blvd,Nashville,TN,Davidson County,37212


In [34]:
fuzzy_merge_df.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,158,159,160,161,162,163,164,165,166,167
Cluster Name,Trevecca Center for Rehab and Healing,Montgomery Bell Academy job site,Montgomery Bell Academy job site,Montgomery Bell Academy job site,Montgomery Bell Academy job site,Montgomery Bell Academy job site,Montgomery Bell Academy job site,Montgomery Bell Academy job site,Montgomery Bell Academy job site,Grand Hyatt Hotel (Power Design Inc) job site,...,Miss Kelli's,Miss Kelli's,Miss Kelli's,Nashville Rescue Mission - Women's Campus,Link Systems Electric,Link Systems Electric,Link Systems Electric,Link Systems Electric,Iron Tribe Belmont,Iron Tribe Belmont
Street_clusters,329 Murfreesboro Pike,4001 Harding Pike,4001 Harding Pike,4001 Harding Pike,4001 Harding Pike,4001 Harding Pike,4001 Harding Pike,4001 Harding Pike,4001 Harding Pike,1000 Broadway,...,207 Printers Alley,207 Printers Alley,207 Printers Alley,1716 Rosa L Parks Blvd,444 McNally Dr,444 McNally Dr,444 McNally Dr,444 McNally Dr,3201 Belmont Blvd,3201 Belmont Blvd
City_clusters,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville,...,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville,Nashville
State_clusters,TN,TN,TN,TN,TN,TN,TN,TN,TN,TN,...,TN,TN,TN,TN,TN,TN,TN,TN,TN,TN
Facility Type,LTCF,Construction,Construction,Construction,Construction,Construction,Construction,Construction,Construction,Construction,...,Bar,Bar,Bar,Congregate Living,Other,Other,Other,Other,Gym,Gym
Cluster Start Date,4/4/20,5/8/20,5/8/20,5/8/20,5/8/20,5/8/20,5/8/20,5/8/20,5/8/20,5/19/20,...,10/1/20,10/1/20,10/1/20,10/1/20,10/16/20,10/16/20,10/16/20,10/16/20,10/20/20,10/20/20
# Cases,102,75,75,75,75,75,75,75,75,23,...,14,14,14,70,12,12,12,12,14,14
Latitude_clusters,36.1448,36.093,36.093,36.093,36.093,36.093,36.093,36.093,36.093,36.1578,...,36.1639,36.1639,36.1639,36.1817,36.0926,36.0926,36.0926,36.0926,36.117,36.117
Longitude_clusters,-86.7557,-86.8455,-86.8455,-86.8455,-86.8455,-86.8455,-86.8455,-86.8455,-86.8455,-86.7842,...,-86.7785,-86.7785,-86.7785,-86.7968,-86.7403,-86.7403,-86.7403,-86.7403,-86.7983,-86.7983
Accuracy Score_clusters,1,0.7,0.7,0.7,0.7,0.7,0.7,0.7,0.7,1,...,1,1,1,1,0.9,0.9,0.9,0.9,1,1


In [35]:
fuzzy_merge_df.to_csv('fuzzy_merge_df.csv')

In [36]:
fuzzy_merge_df['Cluster Name'].value_counts()

Kid Rock's Big Ass Honky Tonk                    47
Tootsie's                                        27
Winner's                                         27
Dawghouse Saloon                                  9
Montgomery Bell Academy job site                  8
Hermitage Hall                                    6
Miss Kelli's                                      6
Good Samaritan Health & Healing                   6
Life Care Center Old Hickory Village              5
Link Systems Electric                             4
Debra Johnson Rehab/TN Prison for Women           3
Grand Hyatt Hotel (Power Design Inc) job site     2
One Stone Church Service                          2
Iron Tribe Belmont                                2
Standing Tall Music City                          1
Nashville Center for Rehab and Healing (2)        1
Trevecca Center for Rehab and Healing             1
Tennessee Titans                                  1
Lakeshore Meadows                                 1
Metro Water 

## Create Buffer Zones around each Cluster Point
### Create a GeoDataFrame with the Clusters DataFrame

In [37]:
clusters_locations = clusters[clusters['Latitude'].notna()]

In [38]:
clusters_locations['Buffer_Geometry'] = clusters_locations.apply(lambda x: Point((float(x.Longitude),
                                              float(x.Latitude))),
                             axis=1)

In [39]:
clusters_locations.head()

Unnamed: 0,Cluster Name,Street,City,State,Facility Type,Cluster Start Date,# Cases,Latitude,Longitude,Accuracy Score,Accuracy Type,County,Zip,Buffer_Geometry
0,Vanderbilt Parties,,,TN,Social Gathering,3/11/20,49,35.858564,-86.349357,1.0,state,,,POINT (-86.349357 35.858564)
1,Event at Clementine Hall,4710 Charlotte Avenue,Nashville,TN,Social Gathering,3/14/20,23,36.1523,-86.843597,1.0,range_interpolation,Davidson County,37209.0,POINT (-86.843597 36.1523)
2,Religious Retreat,,,TN,Social Gathering,3/25/20,18,35.858564,-86.349357,1.0,state,,,POINT (-86.349357 35.858564)
3,The Health Center at Richland Place,504 Elmington Avenue,Nashville,TN,LTCF,4/3/20,47,36.128791,-86.818522,1.0,range_interpolation,Davidson County,37205.0,POINT (-86.818522 36.128791)
4,Trevecca Center for Rehab and Healing,329 Murfreesboro Pike,Nashville,TN,LTCF,4/4/20,102,36.144764,-86.755659,1.0,rooftop,Davidson County,37210.0,POINT (-86.75565899999999 36.144764)


In [40]:
geometry = clusters_locations['Buffer_Geometry']
clusters_locations_geodf = gpd.GeoDataFrame(clusters_locations, crs="EPSG:4326", geometry=geometry)

In [41]:
clusters_locations_geodf = clusters_locations_geodf.drop('Buffer_Geometry', axis=1)
clusters_locations_geodf['Buffer_Geometry'] = clusters_locations_geodf['geometry']
clusters_locations_geodf = clusters_locations_geodf.drop('geometry', axis=1)

In [42]:
clusters_locations_geodf.head()

Unnamed: 0,Cluster Name,Street,City,State,Facility Type,Cluster Start Date,# Cases,Latitude,Longitude,Accuracy Score,Accuracy Type,County,Zip,Buffer_Geometry
0,Vanderbilt Parties,,,TN,Social Gathering,3/11/20,49,35.858564,-86.349357,1.0,state,,,POINT (-86.34936 35.85856)
1,Event at Clementine Hall,4710 Charlotte Avenue,Nashville,TN,Social Gathering,3/14/20,23,36.1523,-86.843597,1.0,range_interpolation,Davidson County,37209.0,POINT (-86.84360 36.15230)
2,Religious Retreat,,,TN,Social Gathering,3/25/20,18,35.858564,-86.349357,1.0,state,,,POINT (-86.34936 35.85856)
3,The Health Center at Richland Place,504 Elmington Avenue,Nashville,TN,LTCF,4/3/20,47,36.128791,-86.818522,1.0,range_interpolation,Davidson County,37205.0,POINT (-86.81852 36.12879)
4,Trevecca Center for Rehab and Healing,329 Murfreesboro Pike,Nashville,TN,LTCF,4/4/20,102,36.144764,-86.755659,1.0,rooftop,Davidson County,37210.0,POINT (-86.75566 36.14476)


In [43]:
clusters_locations_geodf['Buffer_Zone'] = clusters_locations_geodf['Buffer_Geometry'].buffer(.0001, resolution=20)


  clusters_locations_geodf['Buffer_Zone'] = clusters_locations_geodf['Buffer_Geometry'].buffer(.0001, resolution=20)


In [44]:
clusters_locations_geodf

Unnamed: 0,Cluster Name,Street,City,State,Facility Type,Cluster Start Date,# Cases,Latitude,Longitude,Accuracy Score,Accuracy Type,County,Zip,Buffer_Geometry,Buffer_Zone
0,Vanderbilt Parties,,,TN,Social Gathering,3/11/20,49,35.858564,-86.349357,1.0,state,,,POINT (-86.34936 35.85856),"POLYGON ((-86.34926 35.85856, -86.34926 35.858..."
1,Event at Clementine Hall,4710 Charlotte Avenue,Nashville,TN,Social Gathering,3/14/20,23,36.152300,-86.843597,1.0,range_interpolation,Davidson County,37209.0,POINT (-86.84360 36.15230),"POLYGON ((-86.84350 36.15230, -86.84350 36.152..."
2,Religious Retreat,,,TN,Social Gathering,3/25/20,18,35.858564,-86.349357,1.0,state,,,POINT (-86.34936 35.85856),"POLYGON ((-86.34926 35.85856, -86.34926 35.858..."
3,The Health Center at Richland Place,504 Elmington Avenue,Nashville,TN,LTCF,4/3/20,47,36.128791,-86.818522,1.0,range_interpolation,Davidson County,37205.0,POINT (-86.81852 36.12879),"POLYGON ((-86.81842 36.12879, -86.81842 36.128..."
4,Trevecca Center for Rehab and Healing,329 Murfreesboro Pike,Nashville,TN,LTCF,4/4/20,102,36.144764,-86.755659,1.0,rooftop,Davidson County,37210.0,POINT (-86.75566 36.14476),"POLYGON ((-86.75556 36.14476, -86.75556 36.144..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,One Stone Church Service,1101 Stainback Ave,Nashville,TN,Church,9/22/20,10,36.189795,-86.765180,1.0,range_interpolation,Davidson County,37207.0,POINT (-86.76518 36.18980),"POLYGON ((-86.76508 36.18980, -86.76508 36.189..."
58,Miss Kelli's,207 Printers Alley,Nashville,TN,Bar,10/1/20,14,36.163931,-86.778513,1.0,range_interpolation,Davidson County,37201.0,POINT (-86.77851 36.16393),"POLYGON ((-86.77841 36.16393, -86.77841 36.163..."
59,Nashville Rescue Mission - Women's Campus,1716 Rosa L Parks Blvd,Nashville,TN,Congregate Living,10/1/20,70,36.181699,-86.796756,1.0,rooftop,Davidson County,37208.0,POINT (-86.79676 36.18170),"POLYGON ((-86.79666 36.18170, -86.79666 36.181..."
60,Link Systems Electric,444 McNally Dr,Nashville,TN,Other,10/16/20,12,36.092556,-86.740288,0.9,range_interpolation,Davidson County,37211.0,POINT (-86.74029 36.09256),"POLYGON ((-86.74019 36.09256, -86.74019 36.092..."


In [45]:
center = [36.16784, -86.78166]
nashville_buffer_map = folium.Map(location = center, zoom_start=11)

In [46]:
folium.GeoJson(clusters_locations_geodf['Buffer_Zone']).add_to(nashville_buffer_map)

<folium.features.GeoJson at 0x7fe098e8c790>

In [47]:
nashville_buffer_map

## Look at some Folium Maps

## Starting with just looking at all the clusters

COVID Icon - Icons made by <a href="https://www.flaticon.com/authors/freepik" title="Freepik">Freepik</a> from <a href="https://www.flaticon.com/" title="Flaticon"> www.flaticon.com</a>

In [48]:
clusters_locations

Unnamed: 0,Cluster Name,Street,City,State,Facility Type,Cluster Start Date,# Cases,Latitude,Longitude,Accuracy Score,Accuracy Type,County,Zip,Buffer_Geometry,geometry
0,Vanderbilt Parties,,,TN,Social Gathering,3/11/20,49,35.858564,-86.349357,1.0,state,,,POINT (-86.349357 35.858564),POINT (-86.34936 35.85856)
1,Event at Clementine Hall,4710 Charlotte Avenue,Nashville,TN,Social Gathering,3/14/20,23,36.152300,-86.843597,1.0,range_interpolation,Davidson County,37209.0,POINT (-86.843597 36.1523),POINT (-86.84360 36.15230)
2,Religious Retreat,,,TN,Social Gathering,3/25/20,18,35.858564,-86.349357,1.0,state,,,POINT (-86.349357 35.858564),POINT (-86.34936 35.85856)
3,The Health Center at Richland Place,504 Elmington Avenue,Nashville,TN,LTCF,4/3/20,47,36.128791,-86.818522,1.0,range_interpolation,Davidson County,37205.0,POINT (-86.818522 36.128791),POINT (-86.81852 36.12879)
4,Trevecca Center for Rehab and Healing,329 Murfreesboro Pike,Nashville,TN,LTCF,4/4/20,102,36.144764,-86.755659,1.0,rooftop,Davidson County,37210.0,POINT (-86.75565899999999 36.144764),POINT (-86.75566 36.14476)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,One Stone Church Service,1101 Stainback Ave,Nashville,TN,Church,9/22/20,10,36.189795,-86.765180,1.0,range_interpolation,Davidson County,37207.0,POINT (-86.76518 36.189795),POINT (-86.76518 36.18980)
58,Miss Kelli's,207 Printers Alley,Nashville,TN,Bar,10/1/20,14,36.163931,-86.778513,1.0,range_interpolation,Davidson County,37201.0,POINT (-86.778513 36.163931),POINT (-86.77851 36.16393)
59,Nashville Rescue Mission - Women's Campus,1716 Rosa L Parks Blvd,Nashville,TN,Congregate Living,10/1/20,70,36.181699,-86.796756,1.0,rooftop,Davidson County,37208.0,POINT (-86.796756 36.181699),POINT (-86.79676 36.18170)
60,Link Systems Electric,444 McNally Dr,Nashville,TN,Other,10/16/20,12,36.092556,-86.740288,0.9,range_interpolation,Davidson County,37211.0,POINT (-86.74028800000001 36.092556),POINT (-86.74029 36.09256)


In [49]:
center = [36.16784, -86.78166]
nashville_presentation_map_1 = folium.Map(location = center, zoom_start=11)

In [50]:
def color_producer(elevation):
    if elevation < 1000:
        return 'green'
    elif 1000 <= elevation < 3000:
        return 'orange'
    else:
        return 'red'
map = folium.Map(location=[38.58, -99.09], zoom_start=6, tiles="Mapbox Bright")
fg = folium.FeatureGroup(name="My Map")
for lt, ln, el in zip(lat, lon, elev):
    fg.add_child(folium.CircleMarker(location=[lt, ln], radius = 6, popup=str(el)+" m",
    fill_color=color_producer(el), color = 'grey', fill_opacity=0.7))
map.add_child(fg)

NameError: name 'lat' is not defined

In [51]:
def color_producer(elevation):
    if elevation < 1000:
        return 'green'
    elif 1000 <= elevation < 3000:
        return 'orange'
    else:
        return 'red'

for row_index, row_values in clusters_locations.iterrows():
    loc = [row_values['Latitude'], row_values['Longitude']]
    pop = str(row_values['Cluster Name'])
    rad = row_values['# Cases'] * 5
    marker = folium.Circle(
        location = loc,
        radius = rad,
        fill = True,
        fill_color = 'red',
        color = 'red',
        popup = folium.Popup(pop,
                     min_width=200,
                     max_width=500))
    
    marker.add_to(nashville_presentation_map_1)
    
nashville_presentation_map_1

In [None]:
center = [36.16784, -86.78166]
nashville_cluster_map = folium.Map(location = center, zoom_start=11)

In [None]:
for row_index, row_values in clusters_locations.iterrows():
    loc = [row_values['Latitude'], row_values['Longitude']]
    pop = str(row_values['Cluster Name'])
    icon = folium.features.CustomIcon('covid4.png')
    marker = folium.Marker(
        icon = icon,
        location = loc,
        popup = folium.Popup(pop,
                     min_width=200,
                     max_width=500))
    
    marker.add_to(nashville_cluster_map)
    
nashville_cluster_map

## Look at all reports
### Start by creating DataFrame without repeating coordinate locations

In [None]:
hub_covid_violations_df.head()

In [None]:
hub_covid_violations_df['Coordinates'] = list(zip(hub_covid_violations_df.Latitude, hub_covid_violations_df.Longitude))

In [None]:
hub_covid_map_df = hub_covid_violations_df.groupby('Coordinates').count()

In [None]:
center = [36.1672, -86.7816]
nashville_report_map = folium.Map(location = center, zoom_start=11)

In [None]:
#for row_index, row_values in hub_covid_violations_df[
    (hub_covid_violations_df['City'].notna()) & 
    (hub_covid_violations_df['Latitude'].notna())].iterrows():
    
    loc = [row_values['Latitude'], row_values['Longitude']]
    #pop = str(row_values['Cluster Name'])
    icon = folium.features.CustomIcon('covid4.png')
    marker = folium.Marker(
        icon = icon,
        location = loc)
        #popup = folium.Popup(pop,
                     #min_width=200,
                     #max_width=500))
    
    marker.add_to(nashville_report_map)
    
nashville_report_map

In [None]:
center = [36.16784, -86.78166]
nashville_covid_map = folium.Map(location = center, zoom_start=11)

In [None]:
for row_index, row_values in clusters_violations.iterrows():
    loc = [row_values['Latitude'], row_values['Longitude']]
    pop = str(row_values['Cluster Name'])
    icon = folium.features.CustomIcon('covid4.png')
    marker = folium.Marker(
        icon = icon,
        location = loc,
        popup = folium.Popup(pop,
                     min_width=200,
                     max_width=500))
    
    marker.add_to(nashville_covid_map)

In [None]:
nashville_covid_map

Loser's looks like it's located in the wrong spot - so it may not be correct for a cluster??

## Create a circle arounch each cluster, checking location matches that way

In [None]:
clusters_violations_geo['Cluster_Geometry']

In [None]:
for r in clusters_violations_geo:
    p = r['Cluster_Geometry']
    n_points = 20
    d = 10 # meters
    angles = np.linspace(0, 360, n_points)
    r['Cluster_Polygon'] = geog.propagate(p, angles, d)

In [None]:
n_points = 20
d = 10 * 1000  # meters
angles = np.linspace(0, 360, n_points)
polygon = geog.propagate(p, angles, d)
print(json.dumps(shapely.geometry.mapping(shapely.geometry.Polygon(polygon))))

## Import JSON files with business info

In [None]:
business_glob = glob.glob('../data/google_places_results/*.json')

In [None]:
jsons_list = []
for j in business_glob:
    with open(j) as result_file:
        jsons_list.append(json.load(result_file))

In [None]:
jsons_list[0][0]['results']

In [None]:
#Same as business_glob, but with Jason's code
json_files = glob.glob('../data/google_places_results/results_*.json')

In [None]:
def process_json_file(filename):
    with open(filename) as result_file:
        results_json = json.load(result_file)
        results_json_list = []
        for result_json in results_json:
            for result_json_result in result_json['results']:
                result_json_result['orig_mapped_location'] = result_json['mapped_location']
                result_json_result['orig_address'] = result_json['address']
                results_json_list.append(result_json_result)
        return pd.DataFrame(results_json_list)

In [None]:
frames = [process_json_file(file_path) for file_path in json_files]

In [None]:
frames

In [None]:
all_places_results = pd.concat(frames)

In [None]:
all_places_results.iloc[2]

In [None]:
def matcher(column1, column2):
    names_array=[]
    ratio_array=[]    
    if column1 in column2:
        return 100
    else:   
        return fuzz.partial_ratio(column1,column2)

In [None]:
all_places_results['match_ratio'] = all_places_results.apply(
    lambda x: matcher(x['vicinity'].split(',')[0], x['orig_address'].split(',')[0]), axis=1)

In [None]:
all_places_results

In [None]:
for addr, grp in all_places_results[all_places_results['match_ratio'] > 93].groupby('orig_address'):
    print(addr)
    print(grp[grp.match_ratio == grp.match_ratio.max()].iloc[0])
    print('='*30)

In [None]:
# Checking out Veronica's coding:

# Create an empty dataframe with the columns we want
column_names = ['mapped_location', 'address']
result_addresses = pd.DataFrame(columns = column_names)
# Go through and open each json results file
for file in list(glob.glob('../data/google_places_results/*.json')):
    with open(file) as fi:
        result = json.load(fi)
# Write the contents of the 'results' field to a dataframe       
        google_results = pd.json_normalize(result)
# Clean up the dataframe columns
        google_results = google_results.drop(['results'], axis = 1)
        google_results.columns = ['mapped_location', 'address']
# Append the contents of each json file to the results dataframe
        result_addresses = result_addresses.append(google_results)      

In [None]:
result_addresses.head()

In [None]:
with open('data/example.json', "r") as fi:
    person = json.load(fi)

In [None]:
businesses[0]

In [None]:
with open('../data/google_places_results/results_32.json', "r") as fi:
    person = json.load(fi)

In [None]:
print(pd.DataFrame(person[0]['results'][0].T)

In [None]:
person[0]['results'][0].keys()

In [None]:
pd.DataFrame(person[0]['results'][0].T)

In [None]:
person[0]['results']