In [3]:
# Import required libraries
import pandas as pd

# Load the datasets
zomato_data = pd.read_csv('zomato_data.csv')
geographical_data = pd.read_csv('Geographical Coordinates.csv')

# Display the first few rows of each dataset to understand their structure
print("Zomato Data:")
print(zomato_data.head())

print("\nGeographical Data:")
print(geographical_data.head())


Zomato Data:
  online_order book_table   rate  votes            rest_type  \
0          Yes        Yes  4.1/5    775        Casual Dining   
1          Yes         No  4.1/5    787        Casual Dining   
2          Yes         No  3.8/5    918  Cafe, Casual Dining   
3           No         No  3.7/5     88          Quick Bites   
4           No         No  3.8/5    166        Casual Dining   

                                          dish_liked  \
0  Pasta, Lunch Buffet, Masala Papad, Paneer Laja...   
1  Momos, Lunch Buffet, Chocolate Nirvana, Thai G...   
2  Churros, Cannelloni, Minestrone Soup, Hot Choc...   
3                                        Masala Dosa   
4                                Panipuri, Gol Gappe   

                         cuisines approx_costfor_two_people listed_intype  \
0  North Indian, Mughlai, Chinese                       800        Buffet   
1     Chinese, North Indian, Thai                       800        Buffet   
2          Cafe, Mexican, Italian 

In [5]:
# Step 2.1: Clean the rating column
# Replace '-' and 'NEW' with NaN
zomato_data['rate'] = zomato_data['rate'].replace(['-', 'NEW'], pd.NA)

# Remove '/5' and convert to float
zomato_data['rate'] = zomato_data['rate'].str.replace('/5', '', regex=False)

# Convert to numeric, coercing errors will convert non-numeric values to NaN
zomato_data['rate'] = pd.to_numeric(zomato_data['rate'], errors='coerce')

# Fill missing values in the rating column with the median rating
zomato_data['rate'] = zomato_data['rate'].fillna(zomato_data['rate'].median())

# Verify the changes
print(zomato_data['rate'].head())


0    4.1
1    4.1
2    3.8
3    3.7
4    3.8
Name: rate, dtype: float64


In [11]:
# Step 2.2: Clean the cost column
# Ensure the column is in string format, then remove commas, and convert to numeric
zomato_data['approx_costfor_two_people'] = zomato_data['approx_costfor_two_people'].astype(str)  # Convert to string
zomato_data['approx_costfor_two_people'] = zomato_data['approx_costfor_two_people'].str.replace(',', '')  # Remove commas

# Convert the cleaned column to numeric
zomato_data['approx_costfor_two_people'] = pd.to_numeric(zomato_data['approx_costfor_two_people'], errors='coerce')

# Fill missing values in the cost column with the median cost
zomato_data['approx_costfor_two_people'] = zomato_data['approx_costfor_two_people'].fillna(zomato_data['approx_costfor_two_people'].median())

# Verify the changes
print(zomato_data['approx_costfor_two_people'].head())



0    800.0
1    800.0
2    800.0
3    300.0
4    600.0
Name: approx_costfor_two_people, dtype: float64


In [15]:
# Step 2.3: Handle categorical columns
zomato_data['dish_liked'] = zomato_data['dish_liked'].fillna('Not Available')
zomato_data['cuisines'] = zomato_data['cuisines'].fillna('Other')
zomato_data['rest_type'] = zomato_data['rest_type'].fillna('Unknown')

In [16]:
# Step 2.4: Handle votes column
zomato_data['votes'] = zomato_data['votes'].fillna(zomato_data['votes'].median())

In [17]:
# Step 2.5: Binary encoding
zomato_data['online_order'] = zomato_data['online_order'].map({'Yes': 1, 'No': 0})
zomato_data['book_table'] = zomato_data['book_table'].map({'Yes': 1, 'No': 0})

In [18]:
# Step 2.6: Data type conversion
zomato_data['rate'] = zomato_data['rate'].astype(float)
zomato_data['votes'] = zomato_data['votes'].astype(int)
zomato_data['approx_costfor_two_people'] = zomato_data['approx_costfor_two_people'].astype(int)


In [19]:
# Check the result
print(zomato_data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 10 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   online_order               0 non-null      float64
 1   book_table                 0 non-null      float64
 2   rate                       51717 non-null  float64
 3   votes                      51717 non-null  int64  
 4   rest_type                  51717 non-null  object 
 5   dish_liked                 51717 non-null  object 
 6   cuisines                   51717 non-null  object 
 7   approx_costfor_two_people  51717 non-null  int64  
 8   listed_intype              51717 non-null  object 
 9   listed_incity              51717 non-null  object 
dtypes: float64(3), int64(2), object(5)
memory usage: 3.9+ MB
None


In [22]:
# Step 6: Convert Binary Columns to Numeric (Yes = 1, No = 0)
# Replace NaN values in 'online_order' and 'book_table' columns with 'No' if not available
zomato_data['online_order'] = zomato_data['online_order'].fillna('No').map({'Yes': 1, 'No': 0})
zomato_data['book_table'] = zomato_data['book_table'].fillna('No').map({'Yes': 1, 'No': 0})

# Verify the changes
print(zomato_data.info())  # Checking if the 'online_order' and 'book_table' columns are now cleaned


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 10 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   online_order               51717 non-null  int64  
 1   book_table                 51717 non-null  int64  
 2   rate                       51717 non-null  float64
 3   votes                      51717 non-null  int64  
 4   rest_type                  51717 non-null  object 
 5   dish_liked                 51717 non-null  object 
 6   cuisines                   51717 non-null  object 
 7   approx_costfor_two_people  51717 non-null  int64  
 8   listed_intype              51717 non-null  object 
 9   listed_incity              51717 non-null  object 
dtypes: float64(1), int64(4), object(5)
memory usage: 3.9+ MB
None


In [23]:
# Ensure that 'rate' is a float and 'approx_costfor_two_people' is an integer
zomato_data['rate'] = zomato_data['rate'].astype(float)
zomato_data['approx_costfor_two_people'] = zomato_data['approx_costfor_two_people'].astype(int)

# Verify the changes
print(zomato_data.dtypes)  # Check the data types of the columns


online_order                   int64
book_table                     int64
rate                         float64
votes                          int64
rest_type                     object
dish_liked                    object
cuisines                      object
approx_costfor_two_people      int64
listed_intype                 object
listed_incity                 object
dtype: object


In [24]:
# Load the Geographical Coordinates data
geo_data = pd.read_csv('Geographical Coordinates.csv')

# Merge the datasets on 'listed_incity' column
merged_data = pd.merge(zomato_data, geo_data, on='listed_incity', how='left')

# Verify the merge
print(merged_data.head())  # Check the first few rows of the merged data
print(merged_data.info())  # Check the data types and non-null counts of the merged data


   online_order  book_table  rate  votes            rest_type  \
0             0           0   4.1    775        Casual Dining   
1             0           0   4.1    787        Casual Dining   
2             0           0   3.8    918  Cafe, Casual Dining   
3             0           0   3.7     88          Quick Bites   
4             0           0   3.8    166        Casual Dining   

                                          dish_liked  \
0  Pasta, Lunch Buffet, Masala Papad, Paneer Laja...   
1  Momos, Lunch Buffet, Chocolate Nirvana, Thai G...   
2  Churros, Cannelloni, Minestrone Soup, Hot Choc...   
3                                        Masala Dosa   
4                                Panipuri, Gol Gappe   

                         cuisines  approx_costfor_two_people listed_intype  \
0  North Indian, Mughlai, Chinese                        800        Buffet   
1     Chinese, North Indian, Thai                        800        Buffet   
2          Cafe, Mexican, Italian     

In [25]:
import folium
from folium.plugins import HeatMap

# Create a base map centered around Bangalore
bangalore_map = folium.Map(location=[12.9716, 77.5946], zoom_start=12)

# Drop rows with missing Latitude and Longitude values
merged_data_clean = merged_data.dropna(subset=['Latitude', 'Longitude'])

# Prepare the data for heat map (latitude and longitude)
heat_data = [[row['Latitude'], row['Longitude']] for index, row in merged_data_clean.iterrows()]

# Add the heat map to the map
HeatMap(heat_data).add_to(bangalore_map)

# Display the map
bangalore_map


In [27]:
# Filter for Italian restaurants
italian_restaurants = merged_data[merged_data['cuisines'].str.contains('Italian', case=False, na=False)]

# Drop rows with missing Latitude or Longitude values
italian_restaurants = italian_restaurants.dropna(subset=['Latitude', 'Longitude'])

# Create a map centered around Bangalore
italian_map = folium.Map(location=[12.9716, 77.5946], zoom_start=12)

# Add markers for each Italian restaurant
for index, row in italian_restaurants.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=row['rest_type'] + ": " + row['dish_liked'],
        icon=folium.Icon(color='blue', icon='info-sign')
    ).add_to(italian_map)

# Display the map
italian_map


In [None]:
import folium
from folium.plugins import HeatMap

# Initialize the map at the center of Bangalore
bangalore_map = folium.Map(location=[12.9716, 77.5946], zoom_start=12)

# Filter out rows where Latitude and Longitude might be NaN
restaurants_with_coords = merged_data.dropna(subset=['Latitude', 'Longitude'])

# If you'd like to visualize restaurant density with a HeatMap
heat_data = [[row['Latitude'], row['Longitude']] for index, row in restaurants_with_coords.iterrows()]
HeatMap(heat_data).add_to(bangalore_map)

# If you want to visualize with markers (optional, can get crowded)
for index, row in restaurants_with_coords.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"{row['rest_type']}<br>{row['dish_liked']}",
        icon=folium.Icon(color='blue', icon='info-sign')
    ).add_to(bangalore_map)

# Save the map to an HTML file to view it in a browser
bangalore_map.save('bangalore_restaurant_density_map.html')

# Inform where the map has been saved
print("Restaurant Density Map saved as 'bangalore_restaurant_density_map.html'. Open it in your browser.")


In [None]:
# Filter for Italian restaurants
italian_restaurants = merged_data[merged_data['cuisines'].str.contains('Italian', case=False, na=False)]

# Create the map for Italian restaurants
italian_map = folium.Map(location=[12.9716, 77.5946], zoom_start=12)

# Add markers for each Italian restaurant
for index, row in italian_restaurants.iterrows():
    folium.Marker(
        location=[row['Latitude'], row['Longitude']],
        popup=f"{row['rest_type']}<br>{row['dish_liked']}",
        icon=folium.Icon(color='green', icon='info-sign')
    ).add_to(italian_map)

# Save the Italian restaurant map to an HTML file
italian_map.save('italian_restaurant_map.html')

# Inform where the map has been saved
print("Italian Restaurant Map saved as 'italian_restaurant_map.html'. Open it in your browser.")
