In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point

# Load data
restaurant_df= pd.read_csv("open_restaurants.csv")
manhattan_restaurant_df=restaurant_df[restaurant_df['Borough']=='Manhattan']



In [2]:
pd.set_option('display.max_columns', None)
#display all columns not truncated

In [3]:
manhattan_restaurant_df.shape

(40710, 20)

In [4]:
manhattan_restaurant_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 40710 entries, 0 to 81552
Data columns (total 20 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Borough                 40710 non-null  object 
 1   RestaurantName          40710 non-null  object 
 2   SeatingChoice           40710 non-null  object 
 3   LegalBusinessName       40710 non-null  object 
 4   BusinessAddress         40710 non-null  object 
 5   RestaurantInspectionID  40710 non-null  int64  
 6   IsSidewayCompliant      0 non-null      float64
 7   IsRoadwayCompliant      40710 non-null  object 
 8   SkippedReason           8873 non-null   object 
 9   InspectedOn             40710 non-null  object 
 10  AgencyCode              34742 non-null  object 
 11  Postcode                40710 non-null  int64  
 12  Latitude                36316 non-null  float64
 13  Longitude               36316 non-null  float64
 14  CommunityBoard          36316 non-null  flo

In [5]:
manhattan_restaurant_df.dtypes

Borough                    object
RestaurantName             object
SeatingChoice              object
LegalBusinessName          object
BusinessAddress            object
RestaurantInspectionID      int64
IsSidewayCompliant        float64
IsRoadwayCompliant         object
SkippedReason              object
InspectedOn                object
AgencyCode                 object
Postcode                    int64
Latitude                  float64
Longitude                 float64
CommunityBoard            float64
CouncilDistrict           float64
CensusTract               float64
BIN                       float64
BBL                       float64
NTA                        object
dtype: object

In [6]:
manhattan_restaurant_df.columns

Index(['Borough', 'RestaurantName', 'SeatingChoice', 'LegalBusinessName',
       'BusinessAddress', 'RestaurantInspectionID', 'IsSidewayCompliant',
       'IsRoadwayCompliant', 'SkippedReason', 'InspectedOn', 'AgencyCode',
       'Postcode', 'Latitude', 'Longitude', 'CommunityBoard',
       'CouncilDistrict', 'CensusTract', 'BIN', 'BBL', 'NTA'],
      dtype='object')

In [7]:
manhattan_restaurant_df.head()

Unnamed: 0,Borough,RestaurantName,SeatingChoice,LegalBusinessName,BusinessAddress,RestaurantInspectionID,IsSidewayCompliant,IsRoadwayCompliant,SkippedReason,InspectedOn,AgencyCode,Postcode,Latitude,Longitude,CommunityBoard,CouncilDistrict,CensusTract,BIN,BBL,NTA
0,Manhattan,Oscar Wilde,both,Camelot Castle LLC,45 West 27th st,72891,,Non-Compliant,,12/20/2021 04:06:58 PM,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1008290000.0,Hudson Yards-Chelsea-Flatiron-Union Square
1,Manhattan,LA RUBIA RESTAURANT,both,LA RUBIA RESTAURANT INC,3517 BROADWAY,72892,,For HIQA Review,,12/20/2021 04:18:42 PM,DOT,10031,40.825863,-73.950874,9.0,7.0,229.0,1062369.0,1020910000.0,Hamilton Heights
2,Manhattan,Thai Sliders,sidewalk,Silom Thai Inc,150 8th Ave,72893,,Non-Compliant,,12/20/2021 04:35:41 PM,DOT,10011,40.741906,-74.000945,4.0,3.0,81.0,1013845.0,1007670000.0,Hudson Yards-Chelsea-Flatiron-Union Square
5,Manhattan,LURE FISHBAR,both,142 MERCER STREET LLC,142 MERCER STREET,72897,,Compliant,,12/20/2021 04:56:14 PM,DOT,10012,40.724837,-73.998402,2.0,1.0,49.0,1080019.0,1005120000.0,SoHo-TriBeCa-Civic Center-Little Italy
6,Manhattan,Mojo omakase,both,Mojo chef inc,177 9th avenue,72898,,Compliant,,12/20/2021 05:00:10 PM,DOT,10011,40.744917,-74.002487,4.0,3.0,89.0,1088184.0,1007188000.0,Hudson Yards-Chelsea-Flatiron-Union Square


In [8]:
#checking for duplicated rows
duplicate_rows = manhattan_restaurant_df[manhattan_restaurant_df.duplicated()]

#display duplicate rows
print("Duplicate Rows:")
print(duplicate_rows)

#Results: there are no duplicated rows

Duplicate Rows:
Empty DataFrame
Columns: [Borough, RestaurantName, SeatingChoice, LegalBusinessName, BusinessAddress, RestaurantInspectionID, IsSidewayCompliant, IsRoadwayCompliant, SkippedReason, InspectedOn, AgencyCode, Postcode, Latitude, Longitude, CommunityBoard, CouncilDistrict, CensusTract, BIN, BBL, NTA]
Index: []


In [9]:
manhattan_restaurant_df.isnull().sum()

Borough                       0
RestaurantName                0
SeatingChoice                 0
LegalBusinessName             0
BusinessAddress               0
RestaurantInspectionID        0
IsSidewayCompliant        40710
IsRoadwayCompliant            0
SkippedReason             31837
InspectedOn                   0
AgencyCode                 5968
Postcode                      0
Latitude                   4394
Longitude                  4394
CommunityBoard             4394
CouncilDistrict            4394
CensusTract                4394
BIN                        4538
BBL                        4538
NTA                        4394
dtype: int64

In [10]:
type(restaurant_df)

pandas.core.frame.DataFrame

In [11]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point


# Convert to GeoDataFrame
try:
    manhattan_restaurant_gdf = gpd.GeoDataFrame(manhattan_restaurant_df, geometry=gpd.points_from_xy(manhattan_restaurant_df.Longitude, manhattan_restaurant_df.Latitude))
except Exception as e:
    print('No longitude or latitude')

# Set a coordinate reference system (CRS), e.g., WGS84
manhattan_restaurant_gdf.set_crs(epsg=4326, inplace=True)

# Export to GeoJSON
manhattan_restaurant_gdf.to_file("manhattan_restaurant.geojson", driver="GeoJSON")


In [12]:
manhattan_restaurant_gdf.tail()


Unnamed: 0,Borough,RestaurantName,SeatingChoice,LegalBusinessName,BusinessAddress,RestaurantInspectionID,IsSidewayCompliant,IsRoadwayCompliant,SkippedReason,InspectedOn,AgencyCode,Postcode,Latitude,Longitude,CommunityBoard,CouncilDistrict,CensusTract,BIN,BBL,NTA,geometry
81548,Manhattan,subway,sidewalk,1613 second avenue food inc,1613 2nd avenue,111386,,Pre-Suspension,,05/01/2024 03:27:34 PM,DOT,10075,40.776357,-73.952867,8.0,5.0,138.0,1048709.0,1015290000.0,Yorkville,POINT (-73.95287 40.77636)
81549,Manhattan,Ethyl's alcohol and food,both,1629 2nd Restaurant LLC,1629 2 Ave,111387,,Skipped Inspection,No Seating,05/01/2024 03:31:33 PM,DOT,10028,40.776909,-73.952463,8.0,5.0,14601.0,1048741.0,1015300000.0,Yorkville,POINT (-73.95246 40.77691)
81550,Manhattan,Ethyl's alcohol and food,both,1629 2nd Restaurant LLC,1629 2 Ave,111388,,Pre-Suspension,,05/01/2024 03:31:33 PM,DOT,10028,40.776909,-73.952463,8.0,5.0,14601.0,1048741.0,1015300000.0,Yorkville,POINT (-73.95246 40.77691)
81551,Manhattan,LADY CHOW KITCHEN,roadway,KING JADE GARDEN INC,171 HESTER STREET,111389,,For HIQA Review,,05/01/2024 03:35:16 PM,DOT,10013,40.717783,-73.996829,2.0,1.0,41.0,1083322.0,1002380000.0,SoHo-TriBeCa-Civic Center-Little Italy,POINT (-73.99683 40.71778)
81552,Manhattan,MADAME BONTE,both,BEANHOUSE LLC,318 EAST 84 STREET,111390,,For HIQA Review,,05/01/2024 03:51:01 PM,DOT,10028,40.776277,-73.952051,8.0,5.0,138.0,1049941.0,1015460000.0,Yorkville,POINT (-73.95205 40.77628)


In [13]:
manhattan_restaurant_gdf['Latitude'].isnull().value_counts()

Latitude
False    36316
True      4394
Name: count, dtype: int64

In [14]:
manhattan_restaurant_gdf.columns

Index(['Borough', 'RestaurantName', 'SeatingChoice', 'LegalBusinessName',
       'BusinessAddress', 'RestaurantInspectionID', 'IsSidewayCompliant',
       'IsRoadwayCompliant', 'SkippedReason', 'InspectedOn', 'AgencyCode',
       'Postcode', 'Latitude', 'Longitude', 'CommunityBoard',
       'CouncilDistrict', 'CensusTract', 'BIN', 'BBL', 'NTA', 'geometry'],
      dtype='object')

#### Geocoding 

In [15]:
from geopy.geocoders import Nominatim

In [16]:
def geocode_address(x):
    try:
        geolocator = Nominatim(user_agent="Restaurant_geocode")
        location=geolocator.geocode(x)
        if location:
            return location.latitude, location.longitude
        else:
            return None,None
    except Exception as e:
        print(f"Error geocoding {x}: {e}")
        return None, None

In [17]:
import googlemaps
gmaps = googlemaps.Client(key='AIzaSyAvr9o38Xk159Uh4BTvDkKrKLvO7t9VvxU')

In [18]:
def geocode_address_google(x):
    geocode_result=gmaps.geocode(x)
    if geocode_result:
        location=geocode_result[0]['geometry']['location']
        return location['lat'],location['lng']
    else:
        return None,None

In [19]:
manhattan_restaurant_gdf.columns

Index(['Borough', 'RestaurantName', 'SeatingChoice', 'LegalBusinessName',
       'BusinessAddress', 'RestaurantInspectionID', 'IsSidewayCompliant',
       'IsRoadwayCompliant', 'SkippedReason', 'InspectedOn', 'AgencyCode',
       'Postcode', 'Latitude', 'Longitude', 'CommunityBoard',
       'CouncilDistrict', 'CensusTract', 'BIN', 'BBL', 'NTA', 'geometry'],
      dtype='object')

In [20]:
for index,row in manhattan_restaurant_gdf.iterrows():
    if pd.isnull(row['Latitude']) or pd.isnull(row['Longitude']):
        address=row['BusinessAddress']
        if pd.notnull(row['BusinessAddress']):
            latitude,longitude= geocode_address_google(row['BusinessAddress'])
            if latitude is None or longitude is None:
                print(f"Failed to geocode address: {address}")
            else:
                manhattan_restaurant_gdf.at[index,'Latitude']=latitude
                manhattan_restaurant_gdf.at[index,'Longitude']=longitude


Failed to geocode address: 59 54th st
Failed to geocode address: 49W  20th Street
Failed to geocode address: 235237 WEST   48 STREET
Failed to geocode address: 930 8 Ave 55 st
Failed to geocode address: 218 218 Bowery
Failed to geocode address: 384 Grand
Failed to geocode address: 283 3 ave 22 street
Failed to geocode address: 53 72 Nd street
Failed to geocode address: 144 46th
Failed to geocode address: 283 3 ave 22 street
Failed to geocode address: 283 3 ave 22 street
Failed to geocode address: 9 East 37
Failed to geocode address: 283 3 ave 22 street
Failed to geocode address: 229 E 84th
Failed to geocode address: 33 CARMINE
Failed to geocode address: 316E 84 St
Failed to geocode address: 1453 1453 3rd ave
Failed to geocode address: 53 72 Nd street
Failed to geocode address: 228 10th Street
Failed to geocode address: 229 E 84th
Failed to geocode address: 304 E 78
Failed to geocode address: 598 8th
Failed to geocode address: 17West 45th street
Failed to geocode address: 631  9 th Aven

In [21]:
manhattan_restaurant_gdf.columns

Index(['Borough', 'RestaurantName', 'SeatingChoice', 'LegalBusinessName',
       'BusinessAddress', 'RestaurantInspectionID', 'IsSidewayCompliant',
       'IsRoadwayCompliant', 'SkippedReason', 'InspectedOn', 'AgencyCode',
       'Postcode', 'Latitude', 'Longitude', 'CommunityBoard',
       'CouncilDistrict', 'CensusTract', 'BIN', 'BBL', 'NTA', 'geometry'],
      dtype='object')

In [22]:
manhattan_restaurant_gdf = manhattan_restaurant_gdf.drop('Longitutde', axis=1)

KeyError: "['Longitutde'] not found in axis"

In [None]:
manhattan_restaurant_gdf

Unnamed: 0,Borough,RestaurantName,SeatingChoice,LegalBusinessName,BusinessAddress,RestaurantInspectionID,IsSidewayCompliant,IsRoadwayCompliant,SkippedReason,InspectedOn,AgencyCode,Postcode,Latitude,Longitude,CommunityBoard,CouncilDistrict,CensusTract,BIN,BBL,NTA,geometry
0,Manhattan,Oscar Wilde,both,Camelot Castle LLC,45 West 27th st,72891,,Non-Compliant,,12/20/2021 04:06:58 PM,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1.008290e+09,Hudson Yards-Chelsea-Flatiron-Union Square,POINT (-73.98966 40.74488)
1,Manhattan,LA RUBIA RESTAURANT,both,LA RUBIA RESTAURANT INC,3517 BROADWAY,72892,,For HIQA Review,,12/20/2021 04:18:42 PM,DOT,10031,40.825863,-73.950874,9.0,7.0,229.0,1062369.0,1.020910e+09,Hamilton Heights,POINT (-73.95087 40.82586)
2,Manhattan,Thai Sliders,sidewalk,Silom Thai Inc,150 8th Ave,72893,,Non-Compliant,,12/20/2021 04:35:41 PM,DOT,10011,40.741906,-74.000945,4.0,3.0,81.0,1013845.0,1.007670e+09,Hudson Yards-Chelsea-Flatiron-Union Square,POINT (-74.00095 40.74191)
5,Manhattan,LURE FISHBAR,both,142 MERCER STREET LLC,142 MERCER STREET,72897,,Compliant,,12/20/2021 04:56:14 PM,DOT,10012,40.724837,-73.998402,2.0,1.0,49.0,1080019.0,1.005120e+09,SoHo-TriBeCa-Civic Center-Little Italy,POINT (-73.99840 40.72484)
6,Manhattan,Mojo omakase,both,Mojo chef inc,177 9th avenue,72898,,Compliant,,12/20/2021 05:00:10 PM,DOT,10011,40.744917,-74.002487,4.0,3.0,89.0,1088184.0,1.007188e+09,Hudson Yards-Chelsea-Flatiron-Union Square,POINT (-74.00249 40.74492)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81548,Manhattan,subway,sidewalk,1613 second avenue food inc,1613 2nd avenue,111386,,Pre-Suspension,,05/01/2024 03:27:34 PM,DOT,10075,40.776357,-73.952867,8.0,5.0,138.0,1048709.0,1.015290e+09,Yorkville,POINT (-73.95287 40.77636)
81549,Manhattan,Ethyl's alcohol and food,both,1629 2nd Restaurant LLC,1629 2 Ave,111387,,Skipped Inspection,No Seating,05/01/2024 03:31:33 PM,DOT,10028,40.776909,-73.952463,8.0,5.0,14601.0,1048741.0,1.015300e+09,Yorkville,POINT (-73.95246 40.77691)
81550,Manhattan,Ethyl's alcohol and food,both,1629 2nd Restaurant LLC,1629 2 Ave,111388,,Pre-Suspension,,05/01/2024 03:31:33 PM,DOT,10028,40.776909,-73.952463,8.0,5.0,14601.0,1048741.0,1.015300e+09,Yorkville,POINT (-73.95246 40.77691)
81551,Manhattan,LADY CHOW KITCHEN,roadway,KING JADE GARDEN INC,171 HESTER STREET,111389,,For HIQA Review,,05/01/2024 03:35:16 PM,DOT,10013,40.717783,-73.996829,2.0,1.0,41.0,1083322.0,1.002380e+09,SoHo-TriBeCa-Civic Center-Little Italy,POINT (-73.99683 40.71778)


In [None]:
manhattan_restaurant_gdf.isnull().value_counts()

Borough  RestaurantName  SeatingChoice  LegalBusinessName  BusinessAddress  RestaurantInspectionID  IsSidewayCompliant  IsRoadwayCompliant  SkippedReason  InspectedOn  AgencyCode  Postcode  Latitude  Longitude  CommunityBoard  CouncilDistrict  CensusTract  BIN    BBL    NTA    geometry
False    False           False          False              False            False                   True                False               True           False        False       False     False     False      False           False            False        False  False  False  False       23118
                                                                                                                                            False          False        False       False     False     False      False           False            False        False  False  False  False        7808
                                                                                                                         

In [None]:
manhattan_restaurant_gdf.isnull().sum()

Borough                       0
RestaurantName                0
SeatingChoice                 0
LegalBusinessName             0
BusinessAddress               0
RestaurantInspectionID        0
IsSidewayCompliant        40710
IsRoadwayCompliant            0
SkippedReason             31837
InspectedOn                   0
AgencyCode                 5968
Postcode                      0
Latitude                   1935
Longitude                  1935
CommunityBoard             4394
CouncilDistrict            4394
CensusTract                4394
BIN                        4538
BBL                        4538
NTA                        4394
geometry                      0
dtype: int64