In [7]:
import folium
import warnings
import pandas as pd
import numpy as np
import statsmodels as sm
import seaborn as sns 
import matplotlib.pyplot as plt
import scipy.stats as stats 

from scipy.stats import powerlaw 
from folium import plugins
from folium.plugins import HeatMap
from folium.plugins import MeasureControl
from folium.plugins import MarkerCluster

#need to install geopy
from geopy.geocoders import Nominatim

from IPython.display import IFrame

from violation_preprocessing import violation_separator, violations_dataframe
from mapping_functions import generateBaseMap, geodataframe, chlorepleth_map, adding_Marker, adding_CircleMarker

%matplotlib inline
sns.set()
warnings.filterwarnings('ignore')

In [8]:
# import the cleaning dataset 
data = pd.read_csv('data/clean_dataset.csv', delimiter = ',')

# drop the unnamed column
data.drop(['Unnamed: 0'], axis = 1,inplace = True)

# convert the zip column into an str
data.zip = data.zip.astype(str)
    
# reformat the zip code writing in order to compare it with the zip code in geojson file (for vizualisation step)
data['zip'] = data['zip'].apply(lambda x : x.split('.')[0])

# show the dataframe
display(data.head(3))

Unnamed: 0,inspection_id,dba_name,aka_name,license,facility_type,risk,address,zip,inspection_date,inspection_type,results,violations,latitude,longitude,location
0,2352734,CHILI'S T-I,CHILI'S (T1-B14),34169.0,Restaurant,Risk 1 (High),11601 W TOUHY AVE,60666,2019-12-04,Canvass,Pass,10. ADEQUATE HANDWASHING SINKS PROPERLY SUPPLI...,42.008536,-87.914428,"{'latitude': '-87.91442843927047', 'longitude'..."
1,2352727,PORTAGE PARK DAY NURSERY,MOSAIC EARLY CHILDHOOD ACADEMY,2215815.0,Children's Services Facility,Risk 1 (High),5332-5334 W ADDISON ST,60641,2019-12-04,Canvass,Pass,,41.946065,-87.760722,"{'latitude': '-87.76072227616888', 'longitude'..."
2,2352738,AMARIT RESTAURANT,AMARIT RESTAURANT,1801618.0,Restaurant,Risk 1 (High),600 S DEARBORN ST,60605,2019-12-04,Canvass Re-Inspection,Pass,,41.874481,-87.629357,"{'latitude': '-87.62935653990546', 'longitude'..."


In [9]:
#create a dataset with unique facilities
data_unique = data.drop_duplicates(subset='license')

# Mapping functions  : to add to a class ! 

In [None]:
# function that generate basic maps 
'''def generateBaseMap(default_location = [41.8600, -87.6298], default_zoom_start = 10):
    
    generates a map for the corect area
    
    Parameters
    ----------
    default_location: list of floats
        location of where the map is going to be
    
    default_zoom_start: int
        the amount of zoom on the map in beginning
    
    
    base_map = folium.Map(location=default_location, control_scale = True, zoom_start = default_zoom_start)
    return base_map
     '''

In [None]:
'''def geodataframe(df):

    converts a dataframe into a geodataframe
    
    Parameters
    ----------
    df: pandas.dataframe
    
    Returns
    -------
    gdf: geopandas.dataframe


    # path to geoJson file 
    geo = os.path.join('data/Boundaries-ZIPCodes.geojson')

    # creation of a geodataframe using geopandas
    gdf = gpd.read_file(geo)

    # add a column with the x-coordinate of the multipolygon
    gdf['centroid_lon'] = gdf['geometry'].centroid.x

    # add a column with the y-coordinate of the multipolygon
    gdf['centroid_lat'] = gdf['geometry'].centroid.y

    # setting a projection  by assigning the WGS84 latitude-longitude CRS to the crs attribute
    gdf.crs = {'init' :'epsg:4326'}
    
    # counting the number of facilities per zip 
    facility_number_per_zip = pd.DataFrame(df.groupby('zip')['license'].count()).reset_index()
    
    if (type(df.zip) is not str):
        # convert the zip column into an str
        facility_number_per_zip.zip = facility_number_per_zip.zip.astype(str)
    
        # reformat the zip code writing in order to compare it with the zip code in geojson file (for vizualisation step)
        facility_number_per_zip['zip'] = facility_number_per_zip['zip'].apply(lambda x : x.split('.')[0])
    
    # merge with the geodataframe
    gdf = pd.merge(gdf,facility_number_per_zip, on = 'zip')
    gdf.rename(columns = {'license': 'facility_number_per_zip'}, inplace = True)
    
    return gdf
     '''

In [10]:
# creating our geodataframe based on the basic dataframe
gdf = geodataframe(data)
gdf.head(3)

Unnamed: 0,objectid,shape_area,shape_len,zip,geometry,centroid_lon,centroid_lat,facility_number_per_zip
0,33,106052287.488,42720.0444058,60647,(POLYGON ((-87.67762151065281 41.9177578010629...,-87.702259,41.921098,4628
1,34,127476050.762,48103.7827213,60639,(POLYGON ((-87.72683253163021 41.9226462671259...,-87.755996,41.920456,3436
2,35,45069038.4783,27288.6096123,60707,(POLYGON ((-87.78500237831095 41.9091478547167...,-87.795738,41.919948,735


In [None]:
'''def chlorepleth_map (name,gdf,columns,legend_name,color):
    
    mapping cloropleth map
    
    Parameters
    ----------
    name: str 
        name of map
    
    gdf: geopandas.dataframe
        dataframe with data for mapping
    
    columns: list
        columns with data for mapping
    
    legend_name: str
        name on scale 
    
    color: str
        color scheme for map
        
    Returns
    -------
    map_: folium.folium.Map
    

    #creating a basic map of Chicago
    map_ = folium.Map(location=[41.8600, -87.6298], control_scale=True, zoom_start=10)
    
    #geoJson file path
    geo = os.path.join('data/Boundaries-ZIPCodes.geojson')
    
    folium.Choropleth(
        geo_data=geo,
        name='name',
        data=gdf,
        columns=columns,
        key_on='feature.properties.zip',
        fill_color=color,
        fill_opacity=0.8,
        line_opacity=1,
        legend_name=legend_name
    ).add_to(map_)
    
    #if (markers==True):
        #add_markers(map)
    
    return map_
'''
    

In [None]:
'''def Adding_Marker(map_, longitude, latitude, popup, colour):
    
    adds a marker which locates a facility on the map
    
    map_: folium.folium.Map
        basic map
    
    longitude: numpy.float64
    
    latitude: numpy.float64
    
    popup: str
        beach name and count of e-coli consentration higher than limit
    
    colour: str
    
    
    folium.Marker(
        location = [latitude,longitude], # coordinates for the marker 
        popup = popup ,  # pop-up label for the marker
        icon = folium.Icon(color = colour)
    ).add_to(map_)

'''

In [None]:
'''def Adding_CircleMarker(map_, longitude, latitude, color,r):
    
    adds a marker which locates a facility on the map
    
    map_: folium.folium.Map
        basic map
    
    longitude: numpy.float64
    
    latitude: numpy.float64
    
    popup: str
        beach name and count of e-coli consentration higher than limit
    
    colour: str
   
    
    
    folium.CircleMarker(
        location = [latitude,longitude], # coordinates for the marker 
        color = color ,  # pop-up label for the marker
        fill=True,
        radius = r,
    ).add_to(map_)
 '''

# Mapping with clusters : for the data exploration in data story

In [11]:
map_clustered= generateBaseMap()
cluster = folium.plugins.MarkerCluster(name="cluster name").add_to(map_clustered)
for i in range(len(data_unique)):
    folium.Marker(
        location = [data_unique.latitude.values[i],data_unique.longitude.values[i]], # coordinates for the marker 
        popup = data_unique.zip.values[i] ,  # pop-up label for the marker
        icon = folium.Icon(color = 'darkpurple')
    ).add_to(cluster)


In [6]:
map_clustered.save('maps/cluster_map.html')