# Data cleaning for RestaurantAI

Cleaning zomato kolkata dataset for AI model training

## Importing packages

In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
plt.style.use('dark_background')

## Read CSV

In [2]:
df = pd.read_csv('Zkolkata.csv')
df.head()

Unnamed: 0,name,voteCount,rating,address,cusine,cost,timing
0,Peter Cat,12404 votes,4.2,"18A, Park Street, Park Street Area, Kolkata","North Indian, Continental","₹1,000",11am – 11:20pm (Mon-Sun)
1,Naturals Ice Cream,2498 votes,4.9,"77/1/A, Ground Floor, Near West Bengal Board, ...","North Indian, Continental",₹200,11am – 12midnight (Mon-Sun)
2,Carpe Diem,4083 votes,4.4,"18M, Park Street Area, Kolkata",Ice Cream,"₹1,000","12noon – 12midnight (Mon, Tue, Wed, Thu, Fri..."
3,Barbeque Nation,5455 votes,4.6,"1st Floor, 24, Park Center Building, Park Stre...",Ice Cream,"₹1,800","12noon – 3:30pm, 6:30pm – 10:45pm (Mon-Sun)"
4,Flurys,4709 votes,4.2,"15, Apeejay House, Park Street Area, Kolkata","Chinese, North Indian, Continental",₹400,7:30am – 11pm (Mon-Sun)


## Analyze

In [3]:
# Shape
df.shape

(7388, 7)

In [4]:
# What are the different columns?
df.columns

Index(['name', 'voteCount', 'rating', 'address', 'cusine', 'cost', 'timing'], dtype='object')

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7388 entries, 0 to 7387
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   name       7388 non-null   object
 1   voteCount  7388 non-null   object
 2   rating     7104 non-null   object
 3   address    7387 non-null   object
 4   cusine     7388 non-null   object
 5   cost       7387 non-null   object
 6   timing     7086 non-null   object
dtypes: object(7)
memory usage: 404.2+ KB


### Dropping duplicates

In [7]:
df.drop_duplicates(inplace=True)
df.shape

(7388, 7)

### Cleaning rate column

In [8]:
df['rating'].unique

<bound method Series.unique of 0       4.2
1       4.9
2       4.4
3       4.6
4       4.2
       ... 
7383    NEW
7384    2.8
7385      -
7386      -
7387    NEW
Name: rating, Length: 7388, dtype: object>

In [10]:
df['rating'].value_counts()

rating
-      1520
3.2     538
3.3     526
3.4     476
3.1     461
3.5     438
3.0     423
3.6     417
NEW     385
3.7     338
2.9     282
3.8     259
3.9     204
4.0     156
2.8     151
4.1     119
2.7      92
4.2      72
4.3      53
2.6      49
4.4      36
2.5      25
4.6      18
2.4      18
4.5      17
2.3       9
2.2       7
4.8       6
4.7       5
4.9       1
2.1       1
1.9       1
2.0       1
Name: count, dtype: int64

In [12]:
def handlerate(value):
    if(value=='NEW' or value == '-'):
        return np.nan
    else:
        return float(value)
    
df['rating'] = df['rating'].apply(handlerate)
df['rating'].head()

0    4.2
1    4.9
2    4.4
3    4.6
4    4.2
Name: rating, dtype: float64

In [20]:
# Total no. of null values
df.rating.isnull().sum()

2189

### Filling Null Values in Rate Column with Mean

In [21]:
df['rating'].fillna(df['rating'].mean(), inplace=True)
df['rating'].isnull().sum()

0

In [24]:
df['rating'].value_counts()

rating
3.389075    2189
3.200000     538
3.300000     526
3.400000     476
3.100000     461
3.500000     438
3.000000     423
3.600000     417
3.700000     338
2.900000     282
3.800000     259
3.900000     204
4.000000     156
2.800000     151
4.100000     119
2.700000      92
4.200000      72
4.300000      53
2.600000      49
4.400000      36
2.500000      25
4.600000      18
2.400000      18
4.500000      17
2.300000       9
2.200000       7
4.800000       6
4.700000       5
4.900000       1
2.100000       1
1.900000       1
2.000000       1
Name: count, dtype: int64

In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7388 entries, 0 to 7387
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   name       7388 non-null   object 
 1   voteCount  7388 non-null   object 
 2   rating     7388 non-null   float64
 3   address    7387 non-null   object 
 4   cusine     7388 non-null   object 
 5   cost       7387 non-null   object 
 6   timing     7086 non-null   object 
dtypes: float64(1), object(6)
memory usage: 404.2+ KB


### Dropping null values

In [26]:
# Drop Null Values
df.dropna(inplace=True)
df.head()

Unnamed: 0,name,voteCount,rating,address,cusine,cost,timing
0,Peter Cat,12404 votes,4.2,"18A, Park Street, Park Street Area, Kolkata","North Indian, Continental","₹1,000",11am – 11:20pm (Mon-Sun)
1,Naturals Ice Cream,2498 votes,4.9,"77/1/A, Ground Floor, Near West Bengal Board, ...","North Indian, Continental",₹200,11am – 12midnight (Mon-Sun)
2,Carpe Diem,4083 votes,4.4,"18M, Park Street Area, Kolkata",Ice Cream,"₹1,000","12noon – 12midnight (Mon, Tue, Wed, Thu, Fri..."
3,Barbeque Nation,5455 votes,4.6,"1st Floor, 24, Park Center Building, Park Stre...",Ice Cream,"₹1,800","12noon – 3:30pm, 6:30pm – 10:45pm (Mon-Sun)"
4,Flurys,4709 votes,4.2,"15, Apeejay House, Park Street Area, Kolkata","Chinese, North Indian, Continental",₹400,7:30am – 11pm (Mon-Sun)


In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 7084 entries, 0 to 7387
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   name       7084 non-null   object 
 1   voteCount  7084 non-null   object 
 2   rating     7084 non-null   float64
 3   address    7084 non-null   object 
 4   cusine     7084 non-null   object 
 5   cost       7084 non-null   object 
 6   timing     7084 non-null   object 
dtypes: float64(1), object(6)
memory usage: 442.8+ KB


### Address Column
Extracting location and city from the address column

In [28]:
def extract_parts_of_loc(address, index):
    """
    Extracts parts from location

    Args:
        address (string): Takes in the whole string address
        index (int): The index to determine which part of the address to extract.

    Returns:
        str: The extracted location part of the address.

    Example: 
        address = "1, Middleton Row, Near Loreto House, Park Street Area, Kolkata"
        
        overall_location = extract_location(address,-1)
        -> Kolkata

        overall_location = extract_location(address,-2)
        ->Park Street Area
    """
    # Split the address into parts
    address_parts = address.split(', ')

    #print(address_parts)

    if index==-1:
        loc = "".join(address_parts[index:])
    else:
        # Identify the relevant part (e.g., the last two elements)
        loc = "".join(address_parts[index:index+1])
    
    return loc

In [34]:
df['City'] = ""
df.head()

Unnamed: 0,name,voteCount,rating,address,cusine,cost,timing,City
0,Peter Cat,12404 votes,4.2,"18A, Park Street, Park Street Area, Kolkata","North Indian, Continental","₹1,000",11am – 11:20pm (Mon-Sun),
1,Naturals Ice Cream,2498 votes,4.9,"77/1/A, Ground Floor, Near West Bengal Board, ...","North Indian, Continental",₹200,11am – 12midnight (Mon-Sun),
2,Carpe Diem,4083 votes,4.4,"18M, Park Street Area, Kolkata",Ice Cream,"₹1,000","12noon – 12midnight (Mon, Tue, Wed, Thu, Fri...",
3,Barbeque Nation,5455 votes,4.6,"1st Floor, 24, Park Center Building, Park Stre...",Ice Cream,"₹1,800","12noon – 3:30pm, 6:30pm – 10:45pm (Mon-Sun)",
4,Flurys,4709 votes,4.2,"15, Apeejay House, Park Street Area, Kolkata","Chinese, North Indian, Continental",₹400,7:30am – 11pm (Mon-Sun),


In [41]:
# Update the 'City' column with values
df['City'] = df['address'].apply(lambda x: extract_parts_of_loc(x, -1))
df.head()

Unnamed: 0,name,voteCount,rating,address,cusine,cost,timing,City
0,Peter Cat,12404 votes,4.2,"18A, Park Street, Park Street Area, Kolkata","North Indian, Continental","₹1,000",11am – 11:20pm (Mon-Sun),Kolkata
1,Naturals Ice Cream,2498 votes,4.9,"77/1/A, Ground Floor, Near West Bengal Board, ...","North Indian, Continental",₹200,11am – 12midnight (Mon-Sun),Kolkata
2,Carpe Diem,4083 votes,4.4,"18M, Park Street Area, Kolkata",Ice Cream,"₹1,000","12noon – 12midnight (Mon, Tue, Wed, Thu, Fri...",Kolkata
3,Barbeque Nation,5455 votes,4.6,"1st Floor, 24, Park Center Building, Park Stre...",Ice Cream,"₹1,800","12noon – 3:30pm, 6:30pm – 10:45pm (Mon-Sun)",Kolkata
4,Flurys,4709 votes,4.2,"15, Apeejay House, Park Street Area, Kolkata","Chinese, North Indian, Continental",₹400,7:30am – 11pm (Mon-Sun),Kolkata


In [46]:
df['City'].value_counts(),len(df['City'].unique())

(City
 Kolkata                                                       6345
 Howrah                                                         458
 Delivery Only                                                  186
 Salt Lake                                                       38
 kolkata                                                          5
 Kolkata.                                                         3
 Kolkata,                                                         3
 West Bengal                                                      2
 Sodepur                                                          1
 Rajarhat                                                         1
 Baranagar                                                        1
 Tollygunge Kolkata                                               1
 Kolkara                                                          1
 opp Spencer and beside Arsanal.                                  1
 Barasat                                  

In [47]:
# Replacing variations with the correct city name
df['City'] = df['City'].replace(['kolkata', 'Kolkara', 'Kolkata.','Kolkata,','Koata'], 'Kolkata')



City
Kolkata                                                       6358
Howrah                                                         458
Delivery Only                                                  186
Salt Lake                                                       38
West Bengal                                                      2
Behala                                                           1
Rajarhat                                                         1
Baranagar                                                        1
Tollygunge Kolkata                                               1
opp Spencer and beside Arsanal.                                  1
Barasat                                                          1
Sodepur Kolkata                                                  1
Sodepur                                                          1
Santragachi,Kolkata                                              1
Garia                                                    

In [51]:
# Display the updated value counts
print(df['City'].value_counts())

City
Kolkata                                                       6358
Howrah                                                         458
Delivery Only                                                  186
Salt Lake                                                       38
West Bengal                                                      2
Behala                                                           1
Rajarhat                                                         1
Baranagar                                                        1
Tollygunge Kolkata                                               1
opp Spencer and beside Arsanal.                                  1
Barasat                                                          1
Sodepur Kolkata                                                  1
Sodepur                                                          1
Santragachi,Kolkata                                              1
Garia                                                    

In [53]:
pd.set_option('display.max_colwidth', None)

# # Revert to the default display options
# pd.reset_option('display.max_colwidth')

In [56]:
df[df['City'] == 'West Bengal']['address']

2694        38C, New Santoshpur Main Rd, Santoshpur, Kolkata, West Bengal
5998    Shop 88, Kabi Guru Rabindra Path, Bijoy, Kanchrapara, West Bengal
Name: address, dtype: object

In [57]:
# Update the 'Location' column with values
df['Location'] = df['address'].apply(lambda x: extract_parts_of_loc(x, -2))

In [58]:
df.head()

Unnamed: 0,name,voteCount,rating,address,cusine,cost,timing,City,Location
0,Peter Cat,12404 votes,4.2,"18A, Park Street, Park Street Area, Kolkata","North Indian, Continental","₹1,000",11am – 11:20pm (Mon-Sun),Kolkata,Park Street Area
1,Naturals Ice Cream,2498 votes,4.9,"77/1/A, Ground Floor, Near West Bengal Board, Park Street Area, Kolkata","North Indian, Continental",₹200,11am – 12midnight (Mon-Sun),Kolkata,Park Street Area
2,Carpe Diem,4083 votes,4.4,"18M, Park Street Area, Kolkata",Ice Cream,"₹1,000","12noon – 12midnight (Mon, Tue, Wed, Thu, Fri...",Kolkata,Park Street Area
3,Barbeque Nation,5455 votes,4.6,"1st Floor, 24, Park Center Building, Park Street Area, Kolkata",Ice Cream,"₹1,800","12noon – 3:30pm, 6:30pm – 10:45pm (Mon-Sun)",Kolkata,Park Street Area
4,Flurys,4709 votes,4.2,"15, Apeejay House, Park Street Area, Kolkata","Chinese, North Indian, Continental",₹400,7:30am – 11pm (Mon-Sun),Kolkata,Park Street Area


In [59]:
# Display the updated value counts
print(df['Location'].value_counts())

Location
Salt Lake              329
New Town               327
Tollygunge             246
Behala                 235
Dum Dum                233
                      ... 
Fazir Bazar Shibpur      1
Ambedkar Sarani          1
Kalikapur Road           1
Bali                     1
Dum Dum Road             1
Name: count, Length: 285, dtype: int64


In [67]:
df[df['City'] == 'Salt Lake'][['Location', 'address']]

Unnamed: 0,Location,address
1113,Sector 1,"CD 18, Near Kalyan Jewelers, City Centre 1, Sector 1, Salt Lake"
2133,Sector 1,"DD 30A, Sector 1, Salt Lake"
2156,Sector 4,"Near Nicco Park, Sector 4, Salt Lake"
2280,Sector 4,"R.A. 308, Nabapally, Sector 4, Salt Lake"
2282,Sector 4,"J 240, Sukanta Nagar, Sector 4, Salt Lake"
2495,Sector 3,"FD Park, Near Poura Bhawan, Sector 3, Salt Lake"
2612,Sector 4,"G1/727, Sukanta Nagar, Sector 4, Salt Lake"
2910,Sector 1,"The Sonnet, DD 8, Sector 1, Salt Lake"
3159,Sector 1,"CA Block, Sector 1, Salt Lake"
3330,Sector 1,"1st Floor, CE 217, Sector 1, Salt Lake"


In [110]:
df[df['Location'] == 'New Town'][['Location', 'address']]

Unnamed: 0,Location,address
841,New Town,"39 Darga Road, Near Don Bosco School, New Town, Kolkata"
1001,New Town,"Chhapna, Rajarhat, New Town, Kolkata"
3839,New Town,"Noapara, Sukantapolly, Rajarhat, New Town, Kolkata"
3841,New Town,"Post Patharghata, Near Natun Pukur Bridge, New Town, Kolkata"
3851,New Town,"Ground Floor, 7B, Solua, Dasha Drone, Bidhannagar, New Town, Kolkata"
...,...,...
6993,New Town,"Location Varies, New Town, Kolkata"
6994,New Town,"Shop 23 & 25 Ground Floor, Opposite Helabattalla Bus Stop, Rajendra Super Market, Helabattala Bazar, Hatiara Road, New Town, Kolkata"
7029,New Town,"Street 0152, Jatragachhi, Deshbandhu Nagar, New Town, Kolkata"
7036,New Town,"19, Purbayan, Rajarhat, Near Derozio Memorial, New Town, Kolkata"


In [102]:
# df.to_csv('checkpoint1_data_cleaning.csv',index=False)
# Load the CSV file into a pandas DataFrame
df = pd.read_csv('checkpoint1_data_cleaning.csv')

In [105]:
# Update the 'Location' column with values for rows where 'location' is 'Salt Lake'
df.loc[df['Location'] == 'Salt Lake', 'City'] = df.loc[df['Location'] == 'Salt Lake', 'address'].apply(lambda x: extract_parts_of_loc(x, -2))
df.loc[df['Location'] == 'Salt Lake', 'Location'] = df.loc[df['Location'] == 'Salt Lake', 'address'].apply(lambda x: extract_parts_of_loc(x, -3))


In [231]:
df['Location'].value_counts()

Location
New Town                                                 341
Tollygunge                                               250
Behala                                                   239
Dum Dum                                                  238
Delivery Only                                            186
Barasat                                                  174
Park Circus Area                                         160
Sector 5                                                 153
Jadavpur                                                 153
Baguihati                                                144
Ballygunge                                               138
Garia                                                    138
Kasba                                                    117
Park Street Area                                         116
Sodepur                                                  114
Shibpur                                                  113
Baranagar      

Removing "near" wherever present


In [111]:
# Replace 'Near' with an empty string in the 'address' column
df['Location'] = df['Location'].str.replace('Near ', '')
df['Location'] = df['Location'].str.replace('Near', '')
df['Location'] = df['Location'].str.replace('near ', '')

Removing leading spaces

In [112]:
# Remove leading and trailing spaces in the 'address' column
df['Location'] = df['Location'].str.strip()


In [228]:
df['Location'].value_counts(), df['City'].value_counts()

(Location
 New Town                                                 341
 Tollygunge                                               250
 Behala                                                   239
 Dum Dum                                                  238
 Delivery Only                                            186
 Barasat                                                  174
 Park Circus Area                                         160
 Sector 5                                                 153
 Jadavpur                                                 153
 Baguihati                                                144
 Ballygunge                                               138
 Garia                                                    138
 Kasba                                                    117
 Park Street Area                                         116
 Sodepur                                                  114
 Shibpur                                                  11

Remeber: There are 21 more restaurants in howrah 

In [117]:
# Update the 'Location' column with values for rows where 'location' is 'Howrah'
df.loc[df['Location'] == 'Howrah', 'City'] = df.loc[df['Location'] == 'Howrah', 'address'].apply(lambda x: extract_parts_of_loc(x, -2))
df.loc[df['Location'] == 'Howrah', 'Location'] = df.loc[df['Location'] == 'Howrah', 'address'].apply(lambda x: extract_parts_of_loc(x, -3))

In [119]:
#df.to_csv('checkpoint3_data_cleaning.csv',index=False)

In [121]:
df['City'].value_counts()

City
Kolkata                                                       6008
Howrah                                                         479
Salt Lake                                                      367
Delivery Only                                                  186
West Bengal                                                      2
Behala                                                           1
Rajarhat                                                         1
Baranagar                                                        1
Tollygunge Kolkata                                               1
opp Spencer and beside Arsanal.                                  1
Barasat                                                          1
Sodepur Kolkata                                                  1
Sodepur                                                          1
Santragachi,Kolkata                                              1
Garia                                                    

In [122]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7084 entries, 0 to 7083
Data columns (total 9 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   name       7084 non-null   object 
 1   voteCount  7084 non-null   object 
 2   rating     7084 non-null   float64
 3   address    7084 non-null   object 
 4   cusine     7084 non-null   object 
 5   cost       7084 non-null   object 
 6   timing     7084 non-null   object 
 7   City       7084 non-null   object 
 8   Location   6895 non-null   object 
dtypes: float64(1), object(8)
memory usage: 498.2+ KB


In [137]:
# Null location addresses
null_loc_add = df[df['Location']=='Delivery Only']['City']
null_loc_add#.value_counts()

72      Delivery Only
98      Delivery Only
103     Delivery Only
107     Delivery Only
332     Delivery Only
494     Delivery Only
651     Delivery Only
662     Delivery Only
664     Delivery Only
716     Delivery Only
746     Delivery Only
751     Delivery Only
871     Delivery Only
904     Delivery Only
977     Delivery Only
1000    Delivery Only
1022    Delivery Only
1258    Delivery Only
1267    Delivery Only
1296    Delivery Only
1314    Delivery Only
1317    Delivery Only
1434    Delivery Only
1466    Delivery Only
1520    Delivery Only
1547    Delivery Only
1693    Delivery Only
1765    Delivery Only
1769    Delivery Only
1888    Delivery Only
1922    Delivery Only
1923    Delivery Only
1935    Delivery Only
1965    Delivery Only
1990    Delivery Only
2124    Delivery Only
2260    Delivery Only
2262    Delivery Only
2279    Delivery Only
2305    Delivery Only
2469    Delivery Only
2486    Delivery Only
2494    Delivery Only
2500    Delivery Only
2564    Delivery Only
2671    De

In [128]:
# Update 'location' for the specified index
df.at[209, 'Location'] = 'Rippon Street'
# Update 'City' for the specified index
df.at[209, 'City'] = 'Kolkata'

In [131]:
# Set 'location' to 'Delivery Only' where it is null
df.loc[df['Location'].isnull(), 'Location'] = 'Delivery Only'
# Set 'City' to 'Delivery Only' where 'location' it is null
df.loc[df['Location'].isnull(), 'City'] = 'Delivery Only'


In [136]:
# Drop the row with index 923
df = df.drop(index=923)
df = df.drop(index=4031)

Removing locations which have less than 10 occurances and grouping them into other

In [138]:
df['Location'].value_counts()

Location
New Town                                                 329
Tollygunge                                               248
Behala                                                   237
Dum Dum                                                  235
Delivery Only                                            186
Barasat                                                  174
Park Circus Area                                         160
Jadavpur                                                 153
Sector 5                                                 152
Baguihati                                                144
Ballygunge                                               137
Garia                                                    136
Park Street Area                                         116
Kasba                                                    116
Sodepur                                                  114
Shibpur                                                  113
Kankurgachi    

In [1]:
import pandas as pd

# Specify the path to your CSV file
file_path = 'checkpoint3_locations_cleaned.csv'

# Load the CSV file into a pandas DataFrame
df = pd.read_csv(file_path)

In [61]:
locations = df['Location'].value_counts(ascending=False)
locations

Location
New Town                                                 343
Tollygunge                                               252
Behala                                                   240
Dum Dum                                                  239
Delivery Only                                            186
Barasat                                                  176
Park Circus Area                                         162
Sector 5                                                 155
Jadavpur                                                 154
Baguihati                                                144
Garia                                                    139
Ballygunge                                               138
Kasba                                                    119
Sodepur                                                  117
Park Street Area                                         116
Shibpur                                                  113
Baranagar      

In [62]:
loc_less_than_8 = locations[locations<8]
loc_less_than_8

Location
Falta                                                    7
Ichapur                                                  7
Kalindi                                                  5
Baguiati                                                 4
Shalimar                                                 2
Kolkata Municipal Corporation                            1
Russel Street Area                                       1
Belgharia                                                1
Dankuni                                                  1
Santragachi                                              1
Metiabruz                                                1
Thakur Pukur Bazar                                       1
Ripon Street                                             1
Mirza Ghalib Street Park Street Area                     1
Kona Express Way                                         1
Shop No. 32                                              1
Bhagjatin                                      

In [296]:
# Update 'location' to 'New Town' where it is 'Newtown'
#df.loc[df['Location'] == 'Shantipally Kasba', 'Location'] = 'Kasba'

In [58]:
df['City'].value_counts()

City
Kolkata      6235
Howrah        479
Salt Lake     367
Bangalore       1
Name: count, dtype: int64

In [12]:
# Set display option to show all rows
pd.set_option('display.max_rows', None)

# Your DataFrame display command goes here

# # Reset display option to the default value if needed
#pd.reset_option('display.max_rows')

Renaming all the fields to Kolkata except Kolkata, Howrah, Salt Lake and Bangalore

In [57]:
# Update Location to 'Kolkata' for specified conditions
df.loc[~df['City'].isin(['Kolkata', 'Howrah', 'Salt Lake', 'Bangalore']), 'City'] = 'Kolkata'


Renaming all the locations to other which have less than 8 restaurants in that location

In [63]:
def handle_loc(value):
    if(value in loc_less_than_8):
        return 'others'
    else:
        return value
    
df['Location'] = df['Location'].apply(handle_loc)
df['Location'].value_counts()

Location
New Town                  343
Tollygunge                252
Behala                    240
Dum Dum                   239
Delivery Only             186
Barasat                   176
Park Circus Area          162
Sector 5                  155
Jadavpur                  154
Baguihati                 144
Garia                     139
Ballygunge                138
Kasba                     119
Sodepur                   117
Park Street Area          116
Shibpur                   113
Baranagar                 106
Kankurgachi               105
Kestopur                  104
Barrackpore               101
New Market Area            97
Bara Bazar                 95
Sector 1                   93
Chinar Park                92
Bhawanipur                 92
others                     90
Serampore                  81
Kalyani                    81
Picnic Garden              79
Lake Town                  78
Hatibagan                  73
Prince Anwar Shah Road     73
New Alipore                71
T

In [64]:
#df.to_csv('checkpoint3_locations_cleaned.csv',index=False)

In [65]:
df.head()

Unnamed: 0,name,voteCount,rating,address,cusine,cost,timing,City,Location
0,Peter Cat,12404 votes,4.2,"18A, Park Street, Park Street Area, Kolkata","North Indian, Continental","₹1,000",11am – 11:20pm (Mon-Sun),Kolkata,Park Street Area
1,Naturals Ice Cream,2498 votes,4.9,"77/1/A, Ground Floor, Near West Bengal Board, ...","North Indian, Continental",₹200,11am – 12midnight (Mon-Sun),Kolkata,Park Street Area
2,Carpe Diem,4083 votes,4.4,"18M, Park Street Area, Kolkata",Ice Cream,"₹1,000","12noon – 12midnight (Mon, Tue, Wed, Thu, Fri...",Kolkata,Park Street Area
3,Barbeque Nation,5455 votes,4.6,"1st Floor, 24, Park Center Building, Park Stre...",Ice Cream,"₹1,800","12noon – 3:30pm, 6:30pm – 10:45pm (Mon-Sun)",Kolkata,Park Street Area
4,Flurys,4709 votes,4.2,"15, Apeejay House, Park Street Area, Kolkata","Chinese, North Indian, Continental",₹400,7:30am – 11pm (Mon-Sun),Kolkata,Park Street Area


In [71]:
df['timing'].value_counts()

timing
11am – 11pm (Mon-Sun)                                    981
12noon – 11pm (Mon-Sun)                                  426
11am – 10pm (Mon-Sun)                                    258
10am – 10pm (Mon-Sun)                                    251
12noon – 10pm (Mon-Sun)                                  227
12noon – 10:30pm (Mon-Sun)                               196
12noon – 12midnight (Mon-Sun)                            175
7am – 10pm (Mon-Sun)                                     128
11am – 12midnight (Mon-Sun)                              128
8am – 10pm (Mon-Sun)                                     113
8am – 11pm (Mon-Sun)                                     105
11am – 10:30pm (Mon-Sun)                                 100
10am – 11pm (Mon-Sun)                                     91
9am – 10pm (Mon-Sun)                                      77
11am – 9pm (Mon-Sun)                                      68
9am – 9pm (Mon-Sun)                                       67
7am – 11pm (Mon-S