In [46]:
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import pandas as pd
import numpy as np
import html
import matplotlib as plt
import re
from itertools import combinations
import time
pd.set_option('display.max_columns', None)

In [47]:
historic_df=pd.read_csv('cleaned_historic_inspections.csv')

In [48]:
historic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14428 entries, 0 to 14427
Data columns (total 35 columns):
 #   Column                                    Non-Null Count  Dtype  
---  ------                                    --------------  -----  
 0   objectid                                  14428 non-null  int64  
 1   globalid                                  14428 non-null  object 
 2   Seating Interest (Sidewalk/Roadway/Both)  14428 non-null  object 
 3   Restaurant Name                           14428 non-null  object 
 4   Legal Business Name                       14428 non-null  object 
 5   Doing Business As (DBA)                   14427 non-null  object 
 6   Building Number                           14329 non-null  object 
 7   Street                                    14428 non-null  object 
 8   Borough                                   14428 non-null  object 
 9   Postcode                                  14428 non-null  int64  
 10  Business Address                  

In [49]:
historic_df['Doing Business As (DBA)'].fillna(historic_df['Restaurant Name'], inplace=True)
historic_df['Column Of Interest'] = historic_df['Restaurant Name'] + " " + historic_df['Street']
historic_df['Global Restaurant ID'] = ''
historic_df['Latest Inspection Row'] = ''
historic_df['BIN'] = historic_df['BIN'].astype(str).replace('nan', 'undefined').replace('nan', 'undefined').fillna('undefined')

In [50]:
historic_df['BIN'].value_counts()

undefined    1402
3000000.0      29
4000000.0      21
3397861.0      18
3000090.0      17
             ... 
4124422.0       1
2013298.0       1
1035447.0       1
4311574.0       1
1025423.0       1
Name: BIN, Length: 8960, dtype: int64

In [51]:
#Function to fill Global Restaurant ID
global_restaurant_id_counter = 0
def assign_global_restaurant_id(df, unique_column, threshold=85):
    global global_restaurant_id_counter
    # Filter rows where 'Global Restaurant ID' is NaN
    filtered_df = df[df['Global Restaurant ID'] == '']

    # Iterate through rows in the group
    for bin_value in filtered_df['BIN'].unique():
        # Group rows by BIN
        bin_group = filtered_df[filtered_df['BIN'] == bin_value]

        # Iterate through rows in the BIN group
        for index, row in bin_group.iterrows():
            # Check if the 'Column of Interest' values are similar
            if pd.isna(filtered_df.loc[index, 'Global Restaurant ID']) or filtered_df.loc[index, 'Global Restaurant ID'] == '':
                # Check if the 'Column of Interest' values are similar with other rows in the group
                match = bin_group[bin_group.index != index]['Column Of Interest'].apply(lambda x: fuzz.ratio(row['Column Of Interest'], x) > threshold)

                # If there are matches, assign the same global restaurant ID to the current row and matching rows
                if match.any():
                    global_id = f"{global_restaurant_id_counter:08d}"
                    filtered_df.at[index, 'Global Restaurant ID'] = global_id
                    filtered_df.loc[match.index[match==True], 'Global Restaurant ID'] = global_id
                    global_restaurant_id_counter += 1
                else:
                    # If no match, assign the next global restaurant ID to the single row
                    filtered_df.at[index, 'Global Restaurant ID'] = f"{global_restaurant_id_counter:08d}"
                    global_restaurant_id_counter += 1
        
        # Merge the result_df with the original df
    df = pd.merge(df, filtered_df[[unique_column, 'Global Restaurant ID']], on=unique_column, how='left', suffixes=('', '_filtered'))
    df['Global Restaurant ID'] = df['Global Restaurant ID_filtered'].combine_first(df['Global Restaurant ID'])
    df.drop(columns=['Global Restaurant ID_filtered'], inplace=True)

    return df



historic_df = assign_global_restaurant_id(historic_df,'globalid',threshold=85)

In [52]:
#Function to find the latest Inspection Row
def find_latest_inspection_row(df, time_column):
    df[time_column] = pd.to_datetime(df[time_column], errors='coerce')
    latest_indices = df.groupby('Global Restaurant ID')[time_column].idxmax()
    df.loc[latest_indices, 'Latest Inspection Row'] = 1
    return df

historic_df = find_latest_inspection_row(historic_df,'Time of Submission')

In [53]:
historic_df

Unnamed: 0,objectid,globalid,Seating Interest (Sidewalk/Roadway/Both),Restaurant Name,Legal Business Name,Doing Business As (DBA),Building Number,Street,Borough,Postcode,Business Address,Food Service Establishment Permit #,Sidewalk Dimensions (Length),Sidewalk Dimensions (Width),Sidewalk Dimensions (Area),Roadway Dimensions (Length),Roadway Dimensions (Width),Roadway Dimensions (Area),Approved for Sidewalk Seating,Approved for Roadway Seating,Qualify Alcohol,SLA Serial Number,SLA License Type,Landmark District or Building,landmarkDistrict_terms,healthCompliance_terms,Time of Submission,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA,Column Of Interest,Global Restaurant ID,Latest Inspection Row
0,13610,FD87ABAA-860E-4762-845D-8F0403D0246B,Roadway,Madame Bonte,Beanhouse LLC,Madame Bonte,318,East 84th Street,Manhattan,10028,"318 East 84th Street, Manhattan, NY",50114174,,,,10.0,8.0,80.0,no,yes,yes,1346965.0,TW,no,,yes,2022-06-09 16:26:00,40.776277,-73.952051,8.0,5.0,138.0,1049941.0,1.015460e+09,Yorkville,Madame Bonte East 84th Street,00000000,1
1,5900,3B07E4C0-07B7-4079-8333-64446CC3EE03,Sidewalk,Seasoned Vegan,"Seasoned Vegan, LLC","Seasoned Vegan, LLC",55,St. Nicholas Avenue,Manhattan,10026,"55 St. Nicholas Avenue, Manhattan, NY",50003337,47.0,3.0,141.0,,,,yes,no,yes,1289526.0,RW,no,,yes,2020-06-26 20:38:00,40.800500,-73.952507,10.0,9.0,216.0,1054995.0,1.018220e+09,Central Harlem South,Seasoned Vegan St. Nicholas Avenue,00000001,1
2,13018,137C575D-DC14-4F9D-83D9-A3FFE513B3B8,Sidewalk,Americas Cafe & Grill,68th Grill Inc.,Americas Cafe & Grill,1159,3rd Avenue,Manhattan,10065,"1159 3rd Avenue, Manhattan, NY",40797684,8.0,8.0,64.0,,,,yes,no,no,,,no,,yes,2021-10-22 11:01:00,40.766845,-73.962708,8.0,4.0,118.0,1043896.0,1.014220e+09,Lenox Hill-Roosevelt Island,Americas Cafe & Grill 3rd Avenue,00000002,1
3,11630,15270732-2A78-4C24-89DD-BE8DD916F115,Roadway,Sushi Seki,Seki Inc.,Sushi Seki,208,West 23rd Street,Manhattan,10011,"208 West 23rd Street, Manhattan, NY",50005983,,,,44.0,8.0,352.0,no,yes,yes,0.0,OP,no,,yes,2020-12-14 19:54:00,40.744338,-73.996240,4.0,3.0,91.0,1014129.0,1.007720e+09,Hudson Yards-Chelsea-Flatiron-Union Square,Sushi Seki West 23rd Street,00000004,
4,13137,EF9C8173-91D1-496E-8BD4-B02BEADC2A21,Roadway,Sami & Susu,Amir Nathan,Sami & Susu,190,Orchard Street,Manhattan,10002,"190 Orchard Street, Manhattan, NY",50112624,,,,22.0,8.0,176.0,no,yes,yes,0.0,TW,no,,yes,2021-12-22 12:46:00,40.722124,-73.988160,3.0,1.0,3001.0,1005393.0,1.004120e+09,Chinatown,Sami & Susu Orchard Street,00000005,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14429,9799,108C6784-54DB-480B-8627-CF0495D27A0E,Both,King Of Spades Inc.,King Of Spades Inc.,King Of Spades Inc.,1425,College Point Boulevard,Queens,11356,"1425 College Point Boulevard, Queens, NY",50072483,22.0,4.0,88.0,22.0,8.0,176.0,yes,yes,yes,0.0,OP,no,,yes,2020-08-04 15:27:00,40.784739,-73.845776,7.0,19.0,929.0,4098349.0,4.040850e+09,College Point,King Of Spades Inc. College Point Boulevard,00012038,1
14430,8215,7B8EC275-D470-4106-819E-03FC0E1F9717,Both,La Queen Tea House Inc.,La Queen Tea House Inc.,La Queen Tea House Inc.,752B,61st Street,Brooklyn,11220,"752B 61st Street, Brooklyn, NY",50105060,26.0,7.0,182.0,26.0,8.0,208.0,yes,yes,no,,,no,,yes,2020-07-13 15:53:00,40.635543,-74.011220,7.0,38.0,118.0,3143885.0,3.057940e+09,Sunset Park East,La Queen Tea House Inc. 61st Street,00011494,1
14431,2895,476F88D2-D3E8-4E83-8686-8BAC2F752CF5,Sidewalk,Aahar Indina Cuisine,Vidhan Bhatt Inc.,Vidhan Bhatt Inc.,10,Murray Street,Manhattan,10007,"10 Murray Street, Manhattan, NY",50049997,25.0,14.0,350.0,,,,yes,no,yes,1298882.0,RW,no,,yes,2020-06-21 13:46:00,40.713298,-74.007773,1.0,1.0,21.0,1001399.0,1.001240e+09,SoHo-TriBeCa-Civic Center-Little Italy,Aahar Indina Cuisine Murray Street,00002415,1
14432,607,FF0D04EF-1D9A-47D6-8F20-8BDBDBCE2792,Sidewalk,Le Cafe Coffee,Le Cafe Coffee LLC,Le Cafe Coffee LLC,145,4th Avenue,Manhattan,10003,"145 4th Avenue, Manhattan, NY",50042698,10.0,10.0,100.0,,,,yes,no,no,,,no,,yes,2020-06-19 13:19:00,40.733916,-73.989872,3.0,2.0,42.0,1077569.0,1.005590e+09,East Village,Le Cafe Coffee 4th Avenue,00002047,1


In [54]:
historic_df['Global Restaurant ID'].value_counts()

00002253    8
00003640    7
00001620    7
00004557    6
00003164    6
           ..
00001331    1
00006398    1
00000505    1
00006399    1
00012039    1
Name: Global Restaurant ID, Length: 12040, dtype: int64

In [55]:
historic_df[historic_df['Global Restaurant ID']=='00000000']

Unnamed: 0,objectid,globalid,Seating Interest (Sidewalk/Roadway/Both),Restaurant Name,Legal Business Name,Doing Business As (DBA),Building Number,Street,Borough,Postcode,Business Address,Food Service Establishment Permit #,Sidewalk Dimensions (Length),Sidewalk Dimensions (Width),Sidewalk Dimensions (Area),Roadway Dimensions (Length),Roadway Dimensions (Width),Roadway Dimensions (Area),Approved for Sidewalk Seating,Approved for Roadway Seating,Qualify Alcohol,SLA Serial Number,SLA License Type,Landmark District or Building,landmarkDistrict_terms,healthCompliance_terms,Time of Submission,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA,Column Of Interest,Global Restaurant ID,Latest Inspection Row
0,13610,FD87ABAA-860E-4762-845D-8F0403D0246B,Roadway,Madame Bonte,Beanhouse LLC,Madame Bonte,318,East 84th Street,Manhattan,10028,"318 East 84th Street, Manhattan, NY",50114174,,,,10.0,8.0,80.0,no,yes,yes,1346965.0,TW,no,,yes,2022-06-09 16:26:00,40.776277,-73.952051,8.0,5.0,138.0,1049941.0,1015460000.0,Yorkville,Madame Bonte East 84th Street,0,1.0
1253,13165,CCC23E5F-7C57-43AA-869D-071A348FABFF,Sidewalk,Madame Bonte,Beanhouse LLC,Madame Bonte,318,East 84th Street,Manhattan,10028,"318 East 84th Street, Manhattan, NY",50114174,10.0,5.0,50.0,,,,yes,no,no,,,no,,yes,2022-01-12 13:10:00,40.776277,-73.952051,8.0,5.0,138.0,1049941.0,1015460000.0,Yorkville,Madame Bonte East 84th Street,0,
7528,13416,190B3F21-87A1-4A42-8F5D-EA69A6B42CE5,Sidewalk,Madame Bonte,Beanhouse LLC,Madame Bonte,318,East 84th Street,Manhattan,10028,"318 East 84th Street, Manhattan, NY",50114174,15.0,5.0,75.0,,,,yes,no,yes,0.0,TW,no,,yes,2022-04-20 10:10:00,40.776277,-73.952051,8.0,5.0,138.0,1049941.0,1015460000.0,Yorkville,Madame Bonte East 84th Street,0,


In [56]:
inspections_df=pd.read_csv('cleaned_current_open_inspections.csv')

In [57]:
inspections_df

Unnamed: 0,Borough,RestaurantName,SeatingChoice,LegalBusinessName,Street,RestaurantInspectionID,IsSidewayCompliant,IsRoadwayCompliant,SkippedReason,InspectedOn,AgencyCode,Postcode,Latitude,Longitude,CommunityBoard,CouncilDistrict,CensusTract,BIN,BBL,NTA,Building Number,Business Address
0,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,72891,,Non-Compliant,,12/20/2021 04:06:58 PM,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1.008290e+09,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY"
1,Manhattan,La Rubia Restaurant,Both,La Rubia Restaurant Inc.,Broadway,72892,,For HIQA Review,,12/20/2021 04:18:42 PM,DOT,10031,40.825863,-73.950874,9.0,7.0,229.0,1062369.0,1.020910e+09,Hamilton Heights,3517.0,"3517 Broadway, Manhattan, NY"
2,Manhattan,Thai Sliders,Sidewalk,Silom Thai Inc.,8th Avenue,72893,,Non-Compliant,,12/20/2021 04:35:41 PM,DOT,10011,40.741906,-74.000945,4.0,3.0,81.0,1013845.0,1.007670e+09,Hudson Yards-Chelsea-Flatiron-Union Square,150.0,"150 8th Avenue, Manhattan, NY"
3,Brooklyn,Otway,Both,St James 930 LLC,Fulton Street,72894,,Cease and Desist,,12/20/2021 04:38:45 PM,DOT,11238,40.682833,-73.963833,2.0,35.0,201.0,3335112.0,3.020130e+09,Clinton Hill,930.0,"930 Fulton Street, Brooklyn, NY"
4,Brooklyn,Williamsburg Thai Cuisine,Both,Williamsburg Thai Cuisine Ny Inc.,Bedford Avenue,72896,,Compliant,,12/20/2021 04:52:41 PM,DOT,11249,40.716913,-73.958728,1.0,33.0,553.0,3062192.0,3.023350e+09,North Side-South Side,212.0,"212 Bedford Avenue, Brooklyn, NY"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79198,Brooklyn,Sunshine Co,Both,780 Washington LLC,Wahington Avenue,107873,,Pre-Removal,,12/01/2023 03:44:49 PM,DOT,11238,,,,,,,,,780.0,"780 Wahington Avenue, Brooklyn, NY"
79199,Manhattan,Moustache,Sidewalk,Bedford Pitza Corp.,7th Avenue South,107874,,Cease and Desist,,12/01/2023 04:13:28 PM,DOT,10014,40.730812,-74.004395,2.0,3.0,67.0,1087324.0,1.005860e+09,West Village,29.0,"29 7th Avenue South, Manhattan, NY"
79200,Brooklyn,Mekelburg's,Both,"Hop, Stock & Barrel Ii LLC",Kent Avenue,107875,,Skipped Inspection,No Seating,12/01/2023 04:42:51 PM,DOT,11238,40.713757,-73.967200,1.0,33.0,551.0,3424711.0,3.024280e+09,North Side-South Side,319.0,"319 Kent Avenue, Brooklyn, NY"
79201,Brooklyn,Sunday In Brooklyn,Both,Sunday In Brooklyn LLC,Wythe Avenue,107876,,Compliant,,12/01/2023 04:48:20 PM,DOT,11249,40.714171,-73.965208,1.0,33.0,551.0,3321284.0,3.024150e+09,North Side-South Side,348.0,"348 Wythe Avenue, Brooklyn, NY"


In [58]:
inspections_df['LegalBusinessName'].fillna(inspections_df['RestaurantName'], inplace=True)
inspections_df['Column Of Interest'] = inspections_df['LegalBusinessName'] + " " + inspections_df['Street']
inspections_df['Global Restaurant ID'] = ''
inspections_df['Latest Inspection Row'] = ''
inspections_df['BIN'] = inspections_df['BIN'].astype(str).replace('nan', 'undefined').replace('nan', 'undefined').fillna('undefined')

In [59]:
inspections_df

Unnamed: 0,Borough,RestaurantName,SeatingChoice,LegalBusinessName,Street,RestaurantInspectionID,IsSidewayCompliant,IsRoadwayCompliant,SkippedReason,InspectedOn,AgencyCode,Postcode,Latitude,Longitude,CommunityBoard,CouncilDistrict,CensusTract,BIN,BBL,NTA,Building Number,Business Address,Column Of Interest,Global Restaurant ID,Latest Inspection Row
0,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,72891,,Non-Compliant,,12/20/2021 04:06:58 PM,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1.008290e+09,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY",Camelot Castle LLC West 27th Street,,
1,Manhattan,La Rubia Restaurant,Both,La Rubia Restaurant Inc.,Broadway,72892,,For HIQA Review,,12/20/2021 04:18:42 PM,DOT,10031,40.825863,-73.950874,9.0,7.0,229.0,1062369.0,1.020910e+09,Hamilton Heights,3517.0,"3517 Broadway, Manhattan, NY",La Rubia Restaurant Inc. Broadway,,
2,Manhattan,Thai Sliders,Sidewalk,Silom Thai Inc.,8th Avenue,72893,,Non-Compliant,,12/20/2021 04:35:41 PM,DOT,10011,40.741906,-74.000945,4.0,3.0,81.0,1013845.0,1.007670e+09,Hudson Yards-Chelsea-Flatiron-Union Square,150.0,"150 8th Avenue, Manhattan, NY",Silom Thai Inc. 8th Avenue,,
3,Brooklyn,Otway,Both,St James 930 LLC,Fulton Street,72894,,Cease and Desist,,12/20/2021 04:38:45 PM,DOT,11238,40.682833,-73.963833,2.0,35.0,201.0,3335112.0,3.020130e+09,Clinton Hill,930.0,"930 Fulton Street, Brooklyn, NY",St James 930 LLC Fulton Street,,
4,Brooklyn,Williamsburg Thai Cuisine,Both,Williamsburg Thai Cuisine Ny Inc.,Bedford Avenue,72896,,Compliant,,12/20/2021 04:52:41 PM,DOT,11249,40.716913,-73.958728,1.0,33.0,553.0,3062192.0,3.023350e+09,North Side-South Side,212.0,"212 Bedford Avenue, Brooklyn, NY",Williamsburg Thai Cuisine Ny Inc. Bedford Avenue,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79198,Brooklyn,Sunshine Co,Both,780 Washington LLC,Wahington Avenue,107873,,Pre-Removal,,12/01/2023 03:44:49 PM,DOT,11238,,,,,,undefined,,,780.0,"780 Wahington Avenue, Brooklyn, NY",780 Washington LLC Wahington Avenue,,
79199,Manhattan,Moustache,Sidewalk,Bedford Pitza Corp.,7th Avenue South,107874,,Cease and Desist,,12/01/2023 04:13:28 PM,DOT,10014,40.730812,-74.004395,2.0,3.0,67.0,1087324.0,1.005860e+09,West Village,29.0,"29 7th Avenue South, Manhattan, NY",Bedford Pitza Corp. 7th Avenue South,,
79200,Brooklyn,Mekelburg's,Both,"Hop, Stock & Barrel Ii LLC",Kent Avenue,107875,,Skipped Inspection,No Seating,12/01/2023 04:42:51 PM,DOT,11238,40.713757,-73.967200,1.0,33.0,551.0,3424711.0,3.024280e+09,North Side-South Side,319.0,"319 Kent Avenue, Brooklyn, NY","Hop, Stock & Barrel Ii LLC Kent Avenue",,
79201,Brooklyn,Sunday In Brooklyn,Both,Sunday In Brooklyn LLC,Wythe Avenue,107876,,Compliant,,12/01/2023 04:48:20 PM,DOT,11249,40.714171,-73.965208,1.0,33.0,551.0,3321284.0,3.024150e+09,North Side-South Side,348.0,"348 Wythe Avenue, Brooklyn, NY",Sunday In Brooklyn LLC Wythe Avenue,,


In [60]:
#inspections_df = assign_global_restaurant_id(inspections_df,'RestaurantInspectionID',threshold=85)

In [61]:
#inspections_df['Global Restaurant ID'].value_counts()

In [62]:
#inspections_df[inspections_df['Global Restaurant ID']=='00013455']

In [63]:
def match_and_assign_global_ids(df1, df2, unique_column, threshold=85):
    filtered_df1 = df1.groupby('Global Restaurant ID').first().reset_index()
    filtered_df2 = df2[df2['Global Restaurant ID'] == ''] #Newly Added
    
    # Group both dataframes by BIN
    grouped_df1 = filtered_df1.groupby('BIN')
    grouped_df2 = filtered_df2.groupby('BIN')

    # Iterate through BIN groups
    for bin_value in grouped_df1.groups.keys():
        if bin_value in grouped_df2.groups:
            # Get rows for the current BIN from both dataframes
            bin_group_df1 = grouped_df1.get_group(bin_value)
            bin_group_df2 = grouped_df2.get_group(bin_value)

            # Iterate through rows in both BIN groups
            for _, row_df1 in bin_group_df1.iterrows():
                # Compare the 'Column Of Interest' values with rows in df2 using fuzzy matching
                match = bin_group_df2['Column Of Interest'].apply(lambda x: fuzz.ratio(row_df1['Column Of Interest'], x) > threshold)

                # If a match is found, assign the global restaurant ID from df1 to df2
                if match.any():
                    filtered_df2.loc[match.index[match == True], 'Global Restaurant ID'] = row_df1['Global Restaurant ID']
    
    df2 = pd.merge(df2, filtered_df2[[unique_column, 'Global Restaurant ID']], on=unique_column, how='left', suffixes=('', '_filtered'))
    df2['Global Restaurant ID'] = df2['Global Restaurant ID_filtered'].combine_first(df2['Global Restaurant ID'])
    df2.drop(columns=['Global Restaurant ID_filtered'], inplace=True)

    return df2

inspections_df = match_and_assign_global_ids(historic_df, inspections_df, 'RestaurantInspectionID', threshold=85)
inspections_df

Unnamed: 0,Borough,RestaurantName,SeatingChoice,LegalBusinessName,Street,RestaurantInspectionID,IsSidewayCompliant,IsRoadwayCompliant,SkippedReason,InspectedOn,AgencyCode,Postcode,Latitude,Longitude,CommunityBoard,CouncilDistrict,CensusTract,BIN,BBL,NTA,Building Number,Business Address,Column Of Interest,Global Restaurant ID,Latest Inspection Row
0,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,72891,,Non-Compliant,,12/20/2021 04:06:58 PM,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1.008290e+09,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY",Camelot Castle LLC West 27th Street,,
1,Manhattan,La Rubia Restaurant,Both,La Rubia Restaurant Inc.,Broadway,72892,,For HIQA Review,,12/20/2021 04:18:42 PM,DOT,10031,40.825863,-73.950874,9.0,7.0,229.0,1062369.0,1.020910e+09,Hamilton Heights,3517.0,"3517 Broadway, Manhattan, NY",La Rubia Restaurant Inc. Broadway,00001882,
2,Manhattan,Thai Sliders,Sidewalk,Silom Thai Inc.,8th Avenue,72893,,Non-Compliant,,12/20/2021 04:35:41 PM,DOT,10011,40.741906,-74.000945,4.0,3.0,81.0,1013845.0,1.007670e+09,Hudson Yards-Chelsea-Flatiron-Union Square,150.0,"150 8th Avenue, Manhattan, NY",Silom Thai Inc. 8th Avenue,,
3,Brooklyn,Otway,Both,St James 930 LLC,Fulton Street,72894,,Cease and Desist,,12/20/2021 04:38:45 PM,DOT,11238,40.682833,-73.963833,2.0,35.0,201.0,3335112.0,3.020130e+09,Clinton Hill,930.0,"930 Fulton Street, Brooklyn, NY",St James 930 LLC Fulton Street,,
4,Brooklyn,Williamsburg Thai Cuisine,Both,Williamsburg Thai Cuisine Ny Inc.,Bedford Avenue,72896,,Compliant,,12/20/2021 04:52:41 PM,DOT,11249,40.716913,-73.958728,1.0,33.0,553.0,3062192.0,3.023350e+09,North Side-South Side,212.0,"212 Bedford Avenue, Brooklyn, NY",Williamsburg Thai Cuisine Ny Inc. Bedford Avenue,00009896,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79198,Brooklyn,Sunshine Co,Both,780 Washington LLC,Wahington Avenue,107873,,Pre-Removal,,12/01/2023 03:44:49 PM,DOT,11238,,,,,,undefined,,,780.0,"780 Wahington Avenue, Brooklyn, NY",780 Washington LLC Wahington Avenue,,
79199,Manhattan,Moustache,Sidewalk,Bedford Pitza Corp.,7th Avenue South,107874,,Cease and Desist,,12/01/2023 04:13:28 PM,DOT,10014,40.730812,-74.004395,2.0,3.0,67.0,1087324.0,1.005860e+09,West Village,29.0,"29 7th Avenue South, Manhattan, NY",Bedford Pitza Corp. 7th Avenue South,,
79200,Brooklyn,Mekelburg's,Both,"Hop, Stock & Barrel Ii LLC",Kent Avenue,107875,,Skipped Inspection,No Seating,12/01/2023 04:42:51 PM,DOT,11238,40.713757,-73.967200,1.0,33.0,551.0,3424711.0,3.024280e+09,North Side-South Side,319.0,"319 Kent Avenue, Brooklyn, NY","Hop, Stock & Barrel Ii LLC Kent Avenue",,
79201,Brooklyn,Sunday In Brooklyn,Both,Sunday In Brooklyn LLC,Wythe Avenue,107876,,Compliant,,12/01/2023 04:48:20 PM,DOT,11249,40.714171,-73.965208,1.0,33.0,551.0,3321284.0,3.024150e+09,North Side-South Side,348.0,"348 Wythe Avenue, Brooklyn, NY",Sunday In Brooklyn LLC Wythe Avenue,00002501,


In [64]:
inspections_df['Global Restaurant ID'].value_counts()

            57518
00005188       44
00002191       42
00000494       38
00006486       38
            ...  
00000473        1
00008721        1
00000819        1
00000505        1
00011084        1
Name: Global Restaurant ID, Length: 2796, dtype: int64

In [65]:
inspections_df[inspections_df['Global Restaurant ID']=='00005188']

Unnamed: 0,Borough,RestaurantName,SeatingChoice,LegalBusinessName,Street,RestaurantInspectionID,IsSidewayCompliant,IsRoadwayCompliant,SkippedReason,InspectedOn,AgencyCode,Postcode,Latitude,Longitude,CommunityBoard,CouncilDistrict,CensusTract,BIN,BBL,NTA,Building Number,Business Address,Column Of Interest,Global Restaurant ID,Latest Inspection Row
364,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,73291,,Cease and Desist,,12/28/2021 04:20:08 PM,DOT,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
365,Brooklyn,Cafe Argentino,Both,Cafe Argentino Inc.,Grand Street,73292,,Cease and Desist,,12/28/2021 04:23:52 PM,DOT,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
1730,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,74734,,Cease and Desist,,02/10/2022 12:38:21 PM,DOT,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
1768,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,74779,,Under Review,,02/11/2022 07:41:11 AM,DOT,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
2023,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,75062,,For HIQA Review,,02/19/2022 12:00:01 AM,,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
3214,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,76300,,Under Review,,03/08/2022 02:49:49 PM,DOT,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
3689,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,76784,,For HIQA Review,,03/16/2022 12:00:01 AM,,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
5245,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,77997,,Under Review,,04/08/2022 12:09:43 PM,DOT,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
5558,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,79335,,For HIQA Review,,04/16/2022 12:00:01 AM,,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
7898,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,81420,,Non-Compliant,,05/24/2022 02:02:42 PM,DOT,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,


In [66]:
historic_df[historic_df['Global Restaurant ID']=='00005188']

Unnamed: 0,objectid,globalid,Seating Interest (Sidewalk/Roadway/Both),Restaurant Name,Legal Business Name,Doing Business As (DBA),Building Number,Street,Borough,Postcode,Business Address,Food Service Establishment Permit #,Sidewalk Dimensions (Length),Sidewalk Dimensions (Width),Sidewalk Dimensions (Area),Roadway Dimensions (Length),Roadway Dimensions (Width),Roadway Dimensions (Area),Approved for Sidewalk Seating,Approved for Roadway Seating,Qualify Alcohol,SLA Serial Number,SLA License Type,Landmark District or Building,landmarkDistrict_terms,healthCompliance_terms,Time of Submission,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA,Column Of Interest,Global Restaurant ID,Latest Inspection Row
3610,1544,9B8D3570-2C05-4189-8C95-A1369C042139,Both,Cafe Argentino Inc.,Cafe Argentino Inc.,Cafe Argentino Inc.,499,Grand Street,Brooklyn,11211,"499 Grand Street, Brooklyn, NY",41456654,43.0,7.0,301.0,53.0,8.0,424.0,yes,yes,yes,1225456.0,OP,no,,yes,2020-06-19 15:41:00,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,Cafe Argentino Inc. Grand Street,5188,1.0
13914,1229,D98BA9BF-3B7A-4CCE-879C-0FAA4E4EA285,Both,Cafe Argentino,Cafe Argentino Inc.,Cafe Argentino Inc.,499,Grand Street,Brooklyn,11211,"499 Grand Street, Brooklyn, NY",41456654,24.0,7.0,168.0,52.0,8.0,416.0,yes,yes,yes,1225456.0,OP,no,,yes,2020-06-19 14:12:00,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,Cafe Argentino Grand Street,5188,


In [67]:
inspections_df = assign_global_restaurant_id(inspections_df,'RestaurantInspectionID',threshold=85)

In [68]:
inspections_df

Unnamed: 0,Borough,RestaurantName,SeatingChoice,LegalBusinessName,Street,RestaurantInspectionID,IsSidewayCompliant,IsRoadwayCompliant,SkippedReason,InspectedOn,AgencyCode,Postcode,Latitude,Longitude,CommunityBoard,CouncilDistrict,CensusTract,BIN,BBL,NTA,Building Number,Business Address,Column Of Interest,Global Restaurant ID,Latest Inspection Row
0,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,72891,,Non-Compliant,,12/20/2021 04:06:58 PM,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1.008290e+09,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY",Camelot Castle LLC West 27th Street,00012040,
1,Manhattan,La Rubia Restaurant,Both,La Rubia Restaurant Inc.,Broadway,72892,,For HIQA Review,,12/20/2021 04:18:42 PM,DOT,10031,40.825863,-73.950874,9.0,7.0,229.0,1062369.0,1.020910e+09,Hamilton Heights,3517.0,"3517 Broadway, Manhattan, NY",La Rubia Restaurant Inc. Broadway,00001882,
2,Manhattan,Thai Sliders,Sidewalk,Silom Thai Inc.,8th Avenue,72893,,Non-Compliant,,12/20/2021 04:35:41 PM,DOT,10011,40.741906,-74.000945,4.0,3.0,81.0,1013845.0,1.007670e+09,Hudson Yards-Chelsea-Flatiron-Union Square,150.0,"150 8th Avenue, Manhattan, NY",Silom Thai Inc. 8th Avenue,00012041,
3,Brooklyn,Otway,Both,St James 930 LLC,Fulton Street,72894,,Cease and Desist,,12/20/2021 04:38:45 PM,DOT,11238,40.682833,-73.963833,2.0,35.0,201.0,3335112.0,3.020130e+09,Clinton Hill,930.0,"930 Fulton Street, Brooklyn, NY",St James 930 LLC Fulton Street,00012042,
4,Brooklyn,Williamsburg Thai Cuisine,Both,Williamsburg Thai Cuisine Ny Inc.,Bedford Avenue,72896,,Compliant,,12/20/2021 04:52:41 PM,DOT,11249,40.716913,-73.958728,1.0,33.0,553.0,3062192.0,3.023350e+09,North Side-South Side,212.0,"212 Bedford Avenue, Brooklyn, NY",Williamsburg Thai Cuisine Ny Inc. Bedford Avenue,00009896,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79198,Brooklyn,Sunshine Co,Both,780 Washington LLC,Wahington Avenue,107873,,Pre-Removal,,12/01/2023 03:44:49 PM,DOT,11238,,,,,,undefined,,,780.0,"780 Wahington Avenue, Brooklyn, NY",780 Washington LLC Wahington Avenue,00012239,
79199,Manhattan,Moustache,Sidewalk,Bedford Pitza Corp.,7th Avenue South,107874,,Cease and Desist,,12/01/2023 04:13:28 PM,DOT,10014,40.730812,-74.004395,2.0,3.0,67.0,1087324.0,1.005860e+09,West Village,29.0,"29 7th Avenue South, Manhattan, NY",Bedford Pitza Corp. 7th Avenue South,00016133,
79200,Brooklyn,Mekelburg's,Both,"Hop, Stock & Barrel Ii LLC",Kent Avenue,107875,,Skipped Inspection,No Seating,12/01/2023 04:42:51 PM,DOT,11238,40.713757,-73.967200,1.0,33.0,551.0,3424711.0,3.024280e+09,North Side-South Side,319.0,"319 Kent Avenue, Brooklyn, NY","Hop, Stock & Barrel Ii LLC Kent Avenue",00014253,
79201,Brooklyn,Sunday In Brooklyn,Both,Sunday In Brooklyn LLC,Wythe Avenue,107876,,Compliant,,12/01/2023 04:48:20 PM,DOT,11249,40.714171,-73.965208,1.0,33.0,551.0,3321284.0,3.024150e+09,North Side-South Side,348.0,"348 Wythe Avenue, Brooklyn, NY",Sunday In Brooklyn LLC Wythe Avenue,00002501,


In [69]:
inspections_df[inspections_df['Global Restaurant ID']=='00005188']

Unnamed: 0,Borough,RestaurantName,SeatingChoice,LegalBusinessName,Street,RestaurantInspectionID,IsSidewayCompliant,IsRoadwayCompliant,SkippedReason,InspectedOn,AgencyCode,Postcode,Latitude,Longitude,CommunityBoard,CouncilDistrict,CensusTract,BIN,BBL,NTA,Building Number,Business Address,Column Of Interest,Global Restaurant ID,Latest Inspection Row
364,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,73291,,Cease and Desist,,12/28/2021 04:20:08 PM,DOT,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
365,Brooklyn,Cafe Argentino,Both,Cafe Argentino Inc.,Grand Street,73292,,Cease and Desist,,12/28/2021 04:23:52 PM,DOT,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
1730,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,74734,,Cease and Desist,,02/10/2022 12:38:21 PM,DOT,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
1768,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,74779,,Under Review,,02/11/2022 07:41:11 AM,DOT,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
2023,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,75062,,For HIQA Review,,02/19/2022 12:00:01 AM,,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
3214,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,76300,,Under Review,,03/08/2022 02:49:49 PM,DOT,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
3689,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,76784,,For HIQA Review,,03/16/2022 12:00:01 AM,,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
5245,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,77997,,Under Review,,04/08/2022 12:09:43 PM,DOT,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
5558,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,79335,,For HIQA Review,,04/16/2022 12:00:01 AM,,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,
7898,Brooklyn,Cafe Argentino Inc.,Both,Cafe Argentino Inc.,Grand Street,81420,,Non-Compliant,,05/24/2022 02:02:42 PM,DOT,11211,40.710957,-73.951197,1.0,34.0,513.0,3062729.0,3023870000.0,North Side-South Side,499.0,"499 Grand Street, Brooklyn, NY",Cafe Argentino Inc. Grand Street,5188,


In [70]:
inspections_df = find_latest_inspection_row(inspections_df,'InspectedOn')

In [71]:
inspections_df

Unnamed: 0,Borough,RestaurantName,SeatingChoice,LegalBusinessName,Street,RestaurantInspectionID,IsSidewayCompliant,IsRoadwayCompliant,SkippedReason,InspectedOn,AgencyCode,Postcode,Latitude,Longitude,CommunityBoard,CouncilDistrict,CensusTract,BIN,BBL,NTA,Building Number,Business Address,Column Of Interest,Global Restaurant ID,Latest Inspection Row
0,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,72891,,Non-Compliant,,2021-12-20 16:06:58,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1.008290e+09,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY",Camelot Castle LLC West 27th Street,00012040,
1,Manhattan,La Rubia Restaurant,Both,La Rubia Restaurant Inc.,Broadway,72892,,For HIQA Review,,2021-12-20 16:18:42,DOT,10031,40.825863,-73.950874,9.0,7.0,229.0,1062369.0,1.020910e+09,Hamilton Heights,3517.0,"3517 Broadway, Manhattan, NY",La Rubia Restaurant Inc. Broadway,00001882,
2,Manhattan,Thai Sliders,Sidewalk,Silom Thai Inc.,8th Avenue,72893,,Non-Compliant,,2021-12-20 16:35:41,DOT,10011,40.741906,-74.000945,4.0,3.0,81.0,1013845.0,1.007670e+09,Hudson Yards-Chelsea-Flatiron-Union Square,150.0,"150 8th Avenue, Manhattan, NY",Silom Thai Inc. 8th Avenue,00012041,
3,Brooklyn,Otway,Both,St James 930 LLC,Fulton Street,72894,,Cease and Desist,,2021-12-20 16:38:45,DOT,11238,40.682833,-73.963833,2.0,35.0,201.0,3335112.0,3.020130e+09,Clinton Hill,930.0,"930 Fulton Street, Brooklyn, NY",St James 930 LLC Fulton Street,00012042,
4,Brooklyn,Williamsburg Thai Cuisine,Both,Williamsburg Thai Cuisine Ny Inc.,Bedford Avenue,72896,,Compliant,,2021-12-20 16:52:41,DOT,11249,40.716913,-73.958728,1.0,33.0,553.0,3062192.0,3.023350e+09,North Side-South Side,212.0,"212 Bedford Avenue, Brooklyn, NY",Williamsburg Thai Cuisine Ny Inc. Bedford Avenue,00009896,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79198,Brooklyn,Sunshine Co,Both,780 Washington LLC,Wahington Avenue,107873,,Pre-Removal,,2023-12-01 15:44:49,DOT,11238,,,,,,undefined,,,780.0,"780 Wahington Avenue, Brooklyn, NY",780 Washington LLC Wahington Avenue,00012239,
79199,Manhattan,Moustache,Sidewalk,Bedford Pitza Corp.,7th Avenue South,107874,,Cease and Desist,,2023-12-01 16:13:28,DOT,10014,40.730812,-74.004395,2.0,3.0,67.0,1087324.0,1.005860e+09,West Village,29.0,"29 7th Avenue South, Manhattan, NY",Bedford Pitza Corp. 7th Avenue South,00016133,1
79200,Brooklyn,Mekelburg's,Both,"Hop, Stock & Barrel Ii LLC",Kent Avenue,107875,,Skipped Inspection,No Seating,2023-12-01 16:42:51,DOT,11238,40.713757,-73.967200,1.0,33.0,551.0,3424711.0,3.024280e+09,North Side-South Side,319.0,"319 Kent Avenue, Brooklyn, NY","Hop, Stock & Barrel Ii LLC Kent Avenue",00014253,1
79201,Brooklyn,Sunday In Brooklyn,Both,Sunday In Brooklyn LLC,Wythe Avenue,107876,,Compliant,,2023-12-01 16:48:20,DOT,11249,40.714171,-73.965208,1.0,33.0,551.0,3321284.0,3.024150e+09,North Side-South Side,348.0,"348 Wythe Avenue, Brooklyn, NY",Sunday In Brooklyn LLC Wythe Avenue,00002501,1


In [72]:
inspections_df[inspections_df['Global Restaurant ID']=='00012040']

Unnamed: 0,Borough,RestaurantName,SeatingChoice,LegalBusinessName,Street,RestaurantInspectionID,IsSidewayCompliant,IsRoadwayCompliant,SkippedReason,InspectedOn,AgencyCode,Postcode,Latitude,Longitude,CommunityBoard,CouncilDistrict,CensusTract,BIN,BBL,NTA,Building Number,Business Address,Column Of Interest,Global Restaurant ID,Latest Inspection Row
0,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,72891,,Non-Compliant,,2021-12-20 16:06:58,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1008290000.0,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY",Camelot Castle LLC West 27th Street,12040,
3220,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,76306,,Cease and Desist,,2022-03-08 15:17:22,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1008290000.0,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY",Camelot Castle LLC West 27th Street,12040,
5021,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,77759,,Cease and Desist,,2022-04-01 14:23:34,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1008290000.0,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY",Camelot Castle LLC West 27th Street,12040,
33463,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,33896,,Reset,,2020-12-22 09:05:24,,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1008290000.0,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY",Camelot Castle LLC West 27th Street,12040,
41103,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,41069,,Non-Compliant,,2021-01-21 12:55:11,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1008290000.0,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY",Camelot Castle LLC West 27th Street,12040,
47469,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,48870,,Cease and Desist,,2021-03-07 14:57:51,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1008290000.0,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY",Camelot Castle LLC West 27th Street,12040,
53941,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,57569,,Compliant,,2021-06-28 18:40:55,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1008290000.0,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY",Camelot Castle LLC West 27th Street,12040,
63047,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,7,,Non-Compliant,,2020-07-01 22:46:49,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1008290000.0,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY",Camelot Castle LLC West 27th Street,12040,
64085,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,7108,,Compliant,,2020-07-11 15:18:38,DOB,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1008290000.0,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY",Camelot Castle LLC West 27th Street,12040,
71952,Manhattan,Oscar Wilde,Both,Camelot Castle LLC,West 27th Street,93019,,Cease and Desist,,2022-12-07 11:59:18,DOT,10001,40.744876,-73.989657,5.0,3.0,58.0,1015677.0,1008290000.0,Hudson Yards-Chelsea-Flatiron-Union Square,45.0,"45 West 27th Street, Manhattan, NY",Camelot Castle LLC West 27th Street,12040,


In [73]:
dohmh_df = pd.read_csv('cleaned_dohmh.csv')

In [74]:
dohmh_df

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,INSPECTION DATE,ACTION,VIOLATION CODE,VIOLATION DESCRIPTION,CRITICAL FLAG,SCORE,GRADE,GRADE DATE,RECORD DATE,INSPECTION TYPE,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA,Location Point1,Business Address
0,50106271,Sour Mouse,Manhattan,110,Delancey Street,10002,6462567220,,01/01/1900,,,,Not Applicable,,,,12/01/2023,,40.718640,-73.988481,103.0,1.0,1800.0,1087565.0,1.004100e+09,Chinatown,,"110 Delancey Street, Manhattan, NY"
1,50117820,Modello Bar - Barclays Center,Brooklyn,620,Atlantic Avenue,11217,9174174384,,01/01/1900,,,,Not Applicable,,,,12/01/2023,,40.683447,-73.975691,302.0,35.0,12902.0,3398156.0,3.011180e+09,Park Slope-Gowanus,,"620 Atlantic Avenue, Brooklyn, NY"
2,50141790,Nan,Manhattan,30,East 20th Street,10003,9176670036,,01/01/1900,,,,Not Applicable,,,,12/01/2023,,40.738929,-73.989221,105.0,2.0,5200.0,1016181.0,1.008480e+09,Hudson Yards-Chelsea-Flatiron-Union Square,,"30 East 20th Street, Manhattan, NY"
3,50134862,Fine And Raw Chocolate,Brooklyn,70,Scott Avenue,11237,6462440734,,01/01/1900,,,,Not Applicable,,,,12/01/2023,,40.709853,-73.922706,301.0,34.0,44900.0,3070597.0,3.029900e+09,East Williamsburg,,"70 Scott Avenue, Brooklyn, NY"
4,50127238,The Deluxe Party Ktv,Queens,3420,Linden Pl,11354,9172957843,,01/01/1900,,,,Not Applicable,,,,12/01/2023,,40.766275,-73.831338,407.0,20.0,86900.0,4112007.0,4.049500e+09,Flushing,,"3420 Linden Pl, Queens, NY"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209767,50129459,Blueberry,Brooklyn,1849,Coney Island Avenue,11230,3473719836,Bakery Products/Desserts,08/03/2023,Violations were cited in the following area(s).,02A,Time/Temperature Control for Safety (TCS) food...,Critical,50.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.611980,-73.962744,314.0,48.0,54200.0,3181708.0,3.067580e+09,Midwood,,"1849 Coney Island Avenue, Brooklyn, NY"
209768,41444866,Pies-N-Thighs,Brooklyn,166,South 4th Street,11211,3475296090,American,08/09/2023,Violations were cited in the following area(s).,04N,Filth flies or food/refuse/sewage associated w...,Critical,18.0,B,08/09/2023,12/01/2023,Cycle Inspection / Re-inspection,40.711289,-73.961300,301.0,34.0,52300.0,3063458.0,3.024460e+09,North Side-South Side,,"166 South 4th Street, Brooklyn, NY"
209769,50123805,Qk Restaurant Inc.,Queens,4626,Kissena Boulevard,11355,7189990806,Chinese,08/01/2022,Violations were cited in the following area(s).,06D,"Food contact surface not properly washed, rins...",Critical,29.0,C,08/01/2022,12/01/2023,Pre-permit (Operational) / Re-inspection,40.750040,-73.818520,407.0,20.0,84500.0,4314742.0,4.051490e+09,Flushing,,"4626 Kissena Boulevard, Queens, NY"
209770,50056510,Ginbo's Hamburger House,Bronx,118,East 170th Street,10452,3479634644,Hamburgers,09/28/2022,Violations were cited in the following area(s).,08A,Establishment is not free of harborage or cond...,Not Critical,21.0,,,12/01/2023,Cycle Inspection / Compliance Inspection,40.839304,-73.915334,204.0,16.0,22102.0,2008084.0,2.028420e+09,West Concourse,,"118 East 170th Street, Bronx, NY"


In [76]:
dohmh_df['Column Of Interest'] = dohmh_df['DBA'] + " " + dohmh_df['STREET']
dohmh_df['Global Restaurant ID'] = ''
dohmh_df['Latest Inspection Row'] = ''
dohmh_df['BIN'] = dohmh_df['BIN'].astype(str).replace('nan', 'undefined').replace('nan', 'undefined').fillna('undefined')

In [77]:
dohmh_df = match_and_assign_global_ids(historic_df, dohmh_df, 'CAMIS', threshold=85)
dohmh_df

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,INSPECTION DATE,ACTION,VIOLATION CODE,VIOLATION DESCRIPTION,CRITICAL FLAG,SCORE,GRADE,GRADE DATE,RECORD DATE,INSPECTION TYPE,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA,Location Point1,Business Address,Column Of Interest,Global Restaurant ID,Latest Inspection Row
0,50106271,Sour Mouse,Manhattan,110,Delancey Street,10002,6462567220,,01/01/1900,,,,Not Applicable,,,,12/01/2023,,40.718640,-73.988481,103.0,1.0,1800.0,1087565.0,1.004100e+09,Chinatown,,"110 Delancey Street, Manhattan, NY",Sour Mouse Delancey Street,,
1,50117820,Modello Bar - Barclays Center,Brooklyn,620,Atlantic Avenue,11217,9174174384,,01/01/1900,,,,Not Applicable,,,,12/01/2023,,40.683447,-73.975691,302.0,35.0,12902.0,3398156.0,3.011180e+09,Park Slope-Gowanus,,"620 Atlantic Avenue, Brooklyn, NY",Modello Bar - Barclays Center Atlantic Avenue,,
2,50141790,Nan,Manhattan,30,East 20th Street,10003,9176670036,,01/01/1900,,,,Not Applicable,,,,12/01/2023,,40.738929,-73.989221,105.0,2.0,5200.0,1016181.0,1.008480e+09,Hudson Yards-Chelsea-Flatiron-Union Square,,"30 East 20th Street, Manhattan, NY",Nan East 20th Street,,
3,50134862,Fine And Raw Chocolate,Brooklyn,70,Scott Avenue,11237,6462440734,,01/01/1900,,,,Not Applicable,,,,12/01/2023,,40.709853,-73.922706,301.0,34.0,44900.0,3070597.0,3.029900e+09,East Williamsburg,,"70 Scott Avenue, Brooklyn, NY",Fine And Raw Chocolate Scott Avenue,,
4,50127238,The Deluxe Party Ktv,Queens,3420,Linden Pl,11354,9172957843,,01/01/1900,,,,Not Applicable,,,,12/01/2023,,40.766275,-73.831338,407.0,20.0,86900.0,4112007.0,4.049500e+09,Flushing,,"3420 Linden Pl, Queens, NY",The Deluxe Party Ktv Linden Pl,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2495043,41112524,Adrienne's Pizza Bar,Manhattan,54,Stone Street,10004,2122483838,Italian,03/10/2022,Violations were cited in the following area(s).,04L,Evidence of mice or live mice present in facil...,Critical,21.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.704403,-74.010211,101.0,1.0,900.0,1088620.0,1.000298e+09,Battery Park City-Lower Manhattan,,"54 Stone Street, Manhattan, NY",Adrienne's Pizza Bar Stone Street,00007944,
2495044,41112524,Adrienne's Pizza Bar,Manhattan,54,Stone Street,10004,2122483838,Italian,03/10/2022,Violations were cited in the following area(s).,04L,Evidence of mice or live mice present in facil...,Critical,21.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.704403,-74.010211,101.0,1.0,900.0,1088620.0,1.000298e+09,Battery Park City-Lower Manhattan,,"54 Stone Street, Manhattan, NY",Adrienne's Pizza Bar Stone Street,00007944,
2495045,41112524,Adrienne's Pizza Bar,Manhattan,54,Stone Street,10004,2122483838,Italian,03/10/2022,Violations were cited in the following area(s).,04L,Evidence of mice or live mice present in facil...,Critical,21.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.704403,-74.010211,101.0,1.0,900.0,1088620.0,1.000298e+09,Battery Park City-Lower Manhattan,,"54 Stone Street, Manhattan, NY",Adrienne's Pizza Bar Stone Street,00007944,
2495046,41112524,Adrienne's Pizza Bar,Manhattan,54,Stone Street,10004,2122483838,Italian,03/10/2022,Violations were cited in the following area(s).,04L,Evidence of mice or live mice present in facil...,Critical,21.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.704403,-74.010211,101.0,1.0,900.0,1088620.0,1.000298e+09,Battery Park City-Lower Manhattan,,"54 Stone Street, Manhattan, NY",Adrienne's Pizza Bar Stone Street,00007944,


In [78]:
dohmh_df['Global Restaurant ID'].value_counts()

            1819821
00010423       2304
00009119       2209
00008237       2116
00008161       1681
             ...   
00003829          1
00006820          1
00010552          1
00003943          1
00009189          1
Name: Global Restaurant ID, Length: 6555, dtype: int64

In [79]:
dohmh_df[dohmh_df['Global Restaurant ID']=='00010423']

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,INSPECTION DATE,ACTION,VIOLATION CODE,VIOLATION DESCRIPTION,CRITICAL FLAG,SCORE,GRADE,GRADE DATE,RECORD DATE,INSPECTION TYPE,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA,Location Point1,Business Address,Column Of Interest,Global Restaurant ID,Latest Inspection Row
67664,40400544,The Arch Diner,Brooklyn,1866,Ralph Avenue,11236,7185313718,American,01/14/2022,Violations were cited in the following area(s).,20F,Current letter grade sign not posted.,Not Critical,,,,12/01/2023,Administrative Miscellaneous / Initial Inspection,40.631196,-73.918521,318.0,46.0,72000.0,3214959.0,3.077630e+09,Flatlands,,"1866 Ralph Avenue, Brooklyn, NY",The Arch Diner Ralph Avenue,00010423,
67665,40400544,The Arch Diner,Brooklyn,1866,Ralph Avenue,11236,7185313718,American,01/14/2022,Violations were cited in the following area(s).,20F,Current letter grade sign not posted.,Not Critical,,,,12/01/2023,Administrative Miscellaneous / Initial Inspection,40.631196,-73.918521,318.0,46.0,72000.0,3214959.0,3.077630e+09,Flatlands,,"1866 Ralph Avenue, Brooklyn, NY",The Arch Diner Ralph Avenue,00010423,
67666,40400544,The Arch Diner,Brooklyn,1866,Ralph Avenue,11236,7185313718,American,01/14/2022,Violations were cited in the following area(s).,20F,Current letter grade sign not posted.,Not Critical,,,,12/01/2023,Administrative Miscellaneous / Initial Inspection,40.631196,-73.918521,318.0,46.0,72000.0,3214959.0,3.077630e+09,Flatlands,,"1866 Ralph Avenue, Brooklyn, NY",The Arch Diner Ralph Avenue,00010423,
67667,40400544,The Arch Diner,Brooklyn,1866,Ralph Avenue,11236,7185313718,American,01/14/2022,Violations were cited in the following area(s).,20F,Current letter grade sign not posted.,Not Critical,,,,12/01/2023,Administrative Miscellaneous / Initial Inspection,40.631196,-73.918521,318.0,46.0,72000.0,3214959.0,3.077630e+09,Flatlands,,"1866 Ralph Avenue, Brooklyn, NY",The Arch Diner Ralph Avenue,00010423,
67668,40400544,The Arch Diner,Brooklyn,1866,Ralph Avenue,11236,7185313718,American,01/14/2022,Violations were cited in the following area(s).,20F,Current letter grade sign not posted.,Not Critical,,,,12/01/2023,Administrative Miscellaneous / Initial Inspection,40.631196,-73.918521,318.0,46.0,72000.0,3214959.0,3.077630e+09,Flatlands,,"1866 Ralph Avenue, Brooklyn, NY",The Arch Diner Ralph Avenue,00010423,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2475767,40400544,The Arch Diner,Brooklyn,1866,Ralph Avenue,11236,7185313718,American,01/23/2020,Violations were cited in the following area(s).,04H,"Raw, cooked or prepared food is adulterated, c...",Critical,25.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.631196,-73.918521,318.0,46.0,72000.0,3214959.0,3.077630e+09,Flatlands,,"1866 Ralph Avenue, Brooklyn, NY",The Arch Diner Ralph Avenue,00010423,
2475768,40400544,The Arch Diner,Brooklyn,1866,Ralph Avenue,11236,7185313718,American,01/23/2020,Violations were cited in the following area(s).,04H,"Raw, cooked or prepared food is adulterated, c...",Critical,25.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.631196,-73.918521,318.0,46.0,72000.0,3214959.0,3.077630e+09,Flatlands,,"1866 Ralph Avenue, Brooklyn, NY",The Arch Diner Ralph Avenue,00010423,
2475769,40400544,The Arch Diner,Brooklyn,1866,Ralph Avenue,11236,7185313718,American,01/23/2020,Violations were cited in the following area(s).,04H,"Raw, cooked or prepared food is adulterated, c...",Critical,25.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.631196,-73.918521,318.0,46.0,72000.0,3214959.0,3.077630e+09,Flatlands,,"1866 Ralph Avenue, Brooklyn, NY",The Arch Diner Ralph Avenue,00010423,
2475770,40400544,The Arch Diner,Brooklyn,1866,Ralph Avenue,11236,7185313718,American,01/23/2020,Violations were cited in the following area(s).,04H,"Raw, cooked or prepared food is adulterated, c...",Critical,25.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.631196,-73.918521,318.0,46.0,72000.0,3214959.0,3.077630e+09,Flatlands,,"1866 Ralph Avenue, Brooklyn, NY",The Arch Diner Ralph Avenue,00010423,


In [81]:
historic_df[historic_df['Global Restaurant ID']=='00010423']

Unnamed: 0,objectid,globalid,Seating Interest (Sidewalk/Roadway/Both),Restaurant Name,Legal Business Name,Doing Business As (DBA),Building Number,Street,Borough,Postcode,Business Address,Food Service Establishment Permit #,Sidewalk Dimensions (Length),Sidewalk Dimensions (Width),Sidewalk Dimensions (Area),Roadway Dimensions (Length),Roadway Dimensions (Width),Roadway Dimensions (Area),Approved for Sidewalk Seating,Approved for Roadway Seating,Qualify Alcohol,SLA Serial Number,SLA License Type,Landmark District or Building,landmarkDistrict_terms,healthCompliance_terms,Time of Submission,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA,Column Of Interest,Global Restaurant ID,Latest Inspection Row
11010,9309,08945D44-7605-4054-8BEF-31AA6973E769,Sidewalk,Arch Diner,Kanoni Inc.,Arch Diner,1866,Ralph Avenue,Brooklyn,11234,"1866 Ralph Avenue, Brooklyn, NY",40400544,55.0,15.0,825.0,,,,yes,no,yes,0.0,OP,no,,yes,2020-07-24 22:01:00,40.631203,-73.918521,18.0,46.0,720.0,3214959.0,3077630000.0,Flatlands,Arch Diner Ralph Avenue,10423,1


In [82]:
dohmh_df[dohmh_df['Global Restaurant ID']=='00009119']

Unnamed: 0,CAMIS,DBA,BORO,BUILDING,STREET,ZIPCODE,PHONE,CUISINE DESCRIPTION,INSPECTION DATE,ACTION,VIOLATION CODE,VIOLATION DESCRIPTION,CRITICAL FLAG,SCORE,GRADE,GRADE DATE,RECORD DATE,INSPECTION TYPE,Latitude,Longitude,Community Board,Council District,Census Tract,BIN,BBL,NTA,Location Point1,Business Address,Column Of Interest,Global Restaurant ID,Latest Inspection Row
164854,41658324,Mi Casa Restaurant,Queens,11620,Jamaica Avenue,11418,7188499636,Latin American,02/01/2023,Violations were cited in the following area(s).,04L,Evidence of mice or live mice in establishment...,Critical,32.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.699204,-73.833095,409.0,29.0,12200.0,4195822.0,4.093260e+09,Richmond Hill,,"11620 Jamaica Avenue, Queens, NY",Mi Casa Restaurant Jamaica Avenue,00009119,
164855,41658324,Mi Casa Restaurant,Queens,11620,Jamaica Avenue,11418,7188499636,Latin American,02/01/2023,Violations were cited in the following area(s).,04L,Evidence of mice or live mice in establishment...,Critical,32.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.699204,-73.833095,409.0,29.0,12200.0,4195822.0,4.093260e+09,Richmond Hill,,"11620 Jamaica Avenue, Queens, NY",Mi Casa Restaurant Jamaica Avenue,00009119,
164856,41658324,Mi Casa Restaurant,Queens,11620,Jamaica Avenue,11418,7188499636,Latin American,02/01/2023,Violations were cited in the following area(s).,04L,Evidence of mice or live mice in establishment...,Critical,32.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.699204,-73.833095,409.0,29.0,12200.0,4195822.0,4.093260e+09,Richmond Hill,,"11620 Jamaica Avenue, Queens, NY",Mi Casa Restaurant Jamaica Avenue,00009119,
164857,41658324,Mi Casa Restaurant,Queens,11620,Jamaica Avenue,11418,7188499636,Latin American,02/01/2023,Violations were cited in the following area(s).,04L,Evidence of mice or live mice in establishment...,Critical,32.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.699204,-73.833095,409.0,29.0,12200.0,4195822.0,4.093260e+09,Richmond Hill,,"11620 Jamaica Avenue, Queens, NY",Mi Casa Restaurant Jamaica Avenue,00009119,
164858,41658324,Mi Casa Restaurant,Queens,11620,Jamaica Avenue,11418,7188499636,Latin American,02/01/2023,Violations were cited in the following area(s).,04L,Evidence of mice or live mice in establishment...,Critical,32.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.699204,-73.833095,409.0,29.0,12200.0,4195822.0,4.093260e+09,Richmond Hill,,"11620 Jamaica Avenue, Queens, NY",Mi Casa Restaurant Jamaica Avenue,00009119,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2425911,41658324,Mi Casa Restaurant,Queens,11620,Jamaica Avenue,11418,7188499636,Latin American,02/01/2023,Violations were cited in the following area(s).,02B,Hot TCS food item not held at or above 140 °F.,Critical,32.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.699204,-73.833095,409.0,29.0,12200.0,4195822.0,4.093260e+09,Richmond Hill,,"11620 Jamaica Avenue, Queens, NY",Mi Casa Restaurant Jamaica Avenue,00009119,
2425912,41658324,Mi Casa Restaurant,Queens,11620,Jamaica Avenue,11418,7188499636,Latin American,02/01/2023,Violations were cited in the following area(s).,02B,Hot TCS food item not held at or above 140 °F.,Critical,32.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.699204,-73.833095,409.0,29.0,12200.0,4195822.0,4.093260e+09,Richmond Hill,,"11620 Jamaica Avenue, Queens, NY",Mi Casa Restaurant Jamaica Avenue,00009119,
2425913,41658324,Mi Casa Restaurant,Queens,11620,Jamaica Avenue,11418,7188499636,Latin American,02/01/2023,Violations were cited in the following area(s).,02B,Hot TCS food item not held at or above 140 °F.,Critical,32.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.699204,-73.833095,409.0,29.0,12200.0,4195822.0,4.093260e+09,Richmond Hill,,"11620 Jamaica Avenue, Queens, NY",Mi Casa Restaurant Jamaica Avenue,00009119,
2425914,41658324,Mi Casa Restaurant,Queens,11620,Jamaica Avenue,11418,7188499636,Latin American,02/01/2023,Violations were cited in the following area(s).,02B,Hot TCS food item not held at or above 140 °F.,Critical,32.0,,,12/01/2023,Cycle Inspection / Initial Inspection,40.699204,-73.833095,409.0,29.0,12200.0,4195822.0,4.093260e+09,Richmond Hill,,"11620 Jamaica Avenue, Queens, NY",Mi Casa Restaurant Jamaica Avenue,00009119,


In [83]:
dohmh_df = match_and_assign_global_ids(inspections_df, dohmh_df, 'CAMIS', threshold=85)
dohmh_df

MemoryError: Unable to allocate 101. GiB for an array with shape (21, 647595212) and data type object

In [None]:
import pandas as pd

# Define chunk size
chunk_size = 10000  # Adjust the chunk size based on your system's memory capacity

# Determine the number of chunks needed for df2
num_chunks = len(df2) // chunk_size + 1

# Create an empty DataFrame to store the result
result_df = pd.DataFrame()

# Process the data in chunks
for i in range(num_chunks):
    start_idx = i * chunk_size
    end_idx = min((i + 1) * chunk_size, len(df2))

    # Get a chunk of df2
    chunk_df2 = df2.iloc[start_idx:end_idx]

    # Perform the merge and other operations on the chunk
    chunk_df2 = pd.merge(chunk_df2, filtered_df2[[unique_column, 'Global Restaurant ID']],
                         on=unique_column, how='left', suffixes=('', '_filtered'))
    
    chunk_df2['Global Restaurant ID'] = chunk_df2['Global Restaurant ID_filtered'].combine_first(chunk_df2['Global Restaurant ID'])
    
    # Append the chunk result to the final result DataFrame
    result_df = pd.concat([result_df, chunk_df2], ignore_index=True)

# Drop unnecessary columns
result_df.drop(columns=['Global Restaurant ID_filtered'], inplace=True)

# Now result_df contains the merged result in chunks
