In [54]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
#from IPython.core.display import display, HTML
import time


In [55]:

zomato = pd.read_csv('zomato_data.csv')
geo = pd.read_csv('Geo.csv')


print(zomato.head())
print(geo.head())

  online_order book_table   rate  votes            rest_type  \
0          Yes        Yes  4.1/5    775        Casual Dining   
1          Yes         No  4.1/5    787        Casual Dining   
2          Yes         No  3.8/5    918  Cafe, Casual Dining   
3           No         No  3.7/5     88          Quick Bites   
4           No         No  3.8/5    166        Casual Dining   

                                          dish_liked  \
0  Pasta, Lunch Buffet, Masala Papad, Paneer Laja...   
1  Momos, Lunch Buffet, Chocolate Nirvana, Thai G...   
2  Churros, Cannelloni, Minestrone Soup, Hot Choc...   
3                                        Masala Dosa   
4                                Panipuri, Gol Gappe   

                         cuisines approx_costfor_two_people listed_intype  \
0  North Indian, Mughlai, Chinese                       800        Buffet   
1     Chinese, North Indian, Thai                       800        Buffet   
2          Cafe, Mexican, Italian              

In [56]:

# Convert 'rate' to string
zomato['rate'] = zomato['rate'].astype(str)

# Replace '-' with NaN and remove '/5'
zomato['rate'] = zomato['rate'].replace('-', np.nan)
zomato['rate'] = zomato['rate'].str.replace('/5', '', regex=False)

# Convert to numeric
zomato['rate'] = pd.to_numeric(zomato['rate'], errors='coerce')

# Fill missing values with the median rating
median_rating = zomato['rate'].median()
zomato['rate'] = zomato['rate'].fillna(median_rating)

# Optional: View result
print(zomato['rate'].describe())


count    51717.000000
mean         3.700362
std          0.395391
min          1.800000
25%          3.500000
50%          3.700000
75%          3.900000
max          4.900000
Name: rate, dtype: float64


In [57]:
# Convert to string and remove commas
zomato['approx_costfor_two_people'] = zomato['approx_costfor_two_people'].astype(str)
zomato['approx_costfor_two_people'] = zomato['approx_costfor_two_people'].str.replace(',', '', regex=False)

# Convert to numeric (float)
zomato['approx_costfor_two_people'] = pd.to_numeric(zomato['approx_costfor_two_people'], errors='coerce')

# Fill missing values with the median cost
median_cost = zomato['approx_costfor_two_people'].median()
zomato['approx_costfor_two_people'] = zomato['approx_costfor_two_people'].fillna(median_cost)

# Optional: View result
print(zomato['approx_costfor_two_people'].describe())


count    51717.000000
mean       554.391689
std        437.563723
min         40.000000
25%        300.000000
50%        400.000000
75%        650.000000
max       6000.000000
Name: approx_costfor_two_people, dtype: float64


In [58]:
# Replace NaN values in 'dish_liked' with "Not Available"
zomato['dish_liked'] = zomato['dish_liked'].fillna('Not Available')

# Replace NaN values in 'cuisines' with "Other"
zomato['cuisines'] = zomato['cuisines'].fillna('Other')

# Replace NaN values in 'rest_type' with "Unknown"
zomato['rest_type'] = zomato['rest_type'].fillna('Unknown')


In [59]:
# Count remaining missing values in these columns
print(zomato[['dish_liked', 'cuisines', 'rest_type']].isnull().sum())


dish_liked    0
cuisines      0
rest_type     0
dtype: int64


In [60]:
# View a few rows to confirm the replacements
print(zomato[['dish_liked', 'cuisines', 'rest_type']].sample(5))


                                      dish_liked  \
7224                               Not Available   
1191                               Not Available   
24483  Rolls, Chicken Momo, Biryani, Paneer Roll   
47571                           Nutella Pancakes   
8789                              Mutton Biryani   

                                            cuisines           rest_type  
7224   North Indian, South Indian, Chinese, Desserts  Takeaway, Delivery  
1191                           Chinese, North Indian       Casual Dining  
24483                        Fast Food, Rolls, Momos         Quick Bites  
47571                            Desserts, Ice Cream      Dessert Parlor  
8789                           Biryani, North Indian       Casual Dining  


In [61]:
# Convert to numeric just in case (optional safety step)
zomato['votes'] = pd.to_numeric(zomato['votes'], errors='coerce')

# Fill missing values with the median
median_votes = zomato['votes'].median()
zomato['votes'] = zomato['votes'].fillna(median_votes)

# Optional: Confirm changes
print(zomato['votes'].isnull().sum())
print(zomato['votes'].describe())


0
count    51717.000000
mean       283.697527
std        803.838853
min          0.000000
25%          7.000000
50%         41.000000
75%        198.000000
max      16832.000000
Name: votes, dtype: float64


In [62]:
# Binary encoding: Yes → 1, No → 0
zomato['online_order'] = zomato['online_order'].map({'Yes': 1, 'No': 0})
zomato['book_table'] = zomato['book_table'].map({'Yes': 1, 'No': 0})

# Optional: Confirm the changes
print(zomato[['online_order', 'book_table']].head())


   online_order  book_table
0             1           1
1             1           0
2             1           0
3             0           0
4             0           0


In [63]:
# Convert 'rate' to float
zomato['rate'] = zomato['rate'].astype(float)

# Convert 'votes' to integer
zomato['votes'] = zomato['votes'].astype(int)

# Convert 'approx_costfor_two_people' to integer
zomato['approx_costfor_two_people'] = zomato['approx_costfor_two_people'].astype(int)

# Optional: Confirm data types
print(zomato.dtypes[['rate', 'votes', 'approx_costfor_two_people']])


rate                         float64
votes                          int32
approx_costfor_two_people      int32
dtype: object


In [64]:
print(zomato.shape)  # Outputs: (rows, columns)


(51717, 10)


In [65]:
print(zomato.shape)  


(51717, 10)


In [66]:
# Make sure to replace 'cuisines' with the actual column name in your dataset
north_indian_count = zomato[zomato['cuisines'].str.contains('North Indian', case=False, na=False)].shape[0]

print(f"Number of restaurants serving North Indian cuisine: {north_indian_count}")


Number of restaurants serving North Indian cuisine: 21085


In [67]:
import pandas as pd

# Load the dataset
df = pd.read_csv('zomato_data.csv')

# List of Bangalore neighborhoods
bangalore_neighborhoods = [
    'Banashankari', 'Bannerghatta Road', 'Basavanagudi', 'Bellandur', 'Brigade Road', 'Brookefield', 'BTM',
    'Church Street', 'Electronic City', 'Frazer Town', 'HSR', 'Indiranagar', 'Jayanagar', 'JP Nagar', 'Kalyan Nagar',
    'Kammanahalli', 'Koramangala 4th Block', 'Koramangala 5th Block', 'Koramangala 6th Block', 'Koramangala 7th Block',
    'Lavelle Road', 'Malleshwaram', 'Marathahalli', 'MG Road', 'New BEL Road', 'Old Airport Road', 'Rajajinagar',
    'Residency Road', 'Sarjapur Road', 'Whitefield'
]

# Step 1: Filter for restaurants in the specified Bangalore neighborhoods
bangalore_df = df[df['listed_incity'].str.lower().isin([neigh.lower() for neigh in bangalore_neighborhoods])]

# Step 2: Check if any data was found
if bangalore_df.empty:
    raise ValueError("No restaurants found in the specified Bangalore neighborhoods.")

# Step 3: Make sure 'cuisines' column exists
if 'cuisines' not in bangalore_df.columns:
    raise ValueError("Column 'cuisines' not found in the dataset.")

# Step 4: Drop rows with missing cuisines
bangalore_df = bangalore_df.dropna(subset=['cuisines'])

# Step 5: Split cuisines, clean and count the occurrences of all cuisines
cuisine_series = bangalore_df['cuisines'].str.split(',').explode().str.strip()
cuisine_counts = cuisine_series.value_counts()

# Step 6: Get the most common cuisine across all neighborhoods
most_common_cuisine = cuisine_counts.idxmax()
count = cuisine_counts.max()

# Step 7: Output result
print(f"\n✅ The most commonly offered cuisine across all neighborhoods in Bangalore is: **{most_common_cuisine}** ({count} restaurants)")



✅ The most commonly offered cuisine across all neighborhoods in Bangalore is: **North Indian** (21085 restaurants)


In [68]:
import pandas as pd

# Load the dataset
df = pd.read_csv('zomato_data.csv')

# Step 1: Print column names to inspect the available columns
print("Columns in the dataset:")
print(df.columns)

# Step 2: Print the first few rows of the dataset to inspect the data
print("\nFirst few rows of the dataset:")
print(df.head())


Columns in the dataset:
Index(['online_order', 'book_table', 'rate', 'votes', 'rest_type',
       'dish_liked', 'cuisines', 'approx_costfor_two_people', 'listed_intype',
       'listed_incity'],
      dtype='object')

First few rows of the dataset:
  online_order book_table   rate  votes            rest_type  \
0          Yes        Yes  4.1/5    775        Casual Dining   
1          Yes         No  4.1/5    787        Casual Dining   
2          Yes         No  3.8/5    918  Cafe, Casual Dining   
3           No         No  3.7/5     88          Quick Bites   
4           No         No  3.8/5    166        Casual Dining   

                                          dish_liked  \
0  Pasta, Lunch Buffet, Masala Papad, Paneer Laja...   
1  Momos, Lunch Buffet, Chocolate Nirvana, Thai G...   
2  Churros, Cannelloni, Minestrone Soup, Hot Choc...   
3                                        Masala Dosa   
4                                Panipuri, Gol Gappe   

                         cuis

In [69]:
import pandas as pd

# Load the dataset
df = pd.read_csv('zomato_data.csv')

# List of Bangalore neighborhoods
bangalore_neighborhoods = [
    'Banashankari', 'Bannerghatta Road', 'Basavanagudi', 'Bellandur', 'Brigade Road', 'Brookefield', 'BTM',
    'Church Street', 'Electronic City', 'Frazer Town', 'HSR', 'Indiranagar', 'Jayanagar', 'JP Nagar', 'Kalyan Nagar',
    'Kammanahalli', 'Koramangala 4th Block', 'Koramangala 5th Block', 'Koramangala 6th Block', 'Koramangala 7th Block',
    'Lavelle Road', 'Malleshwaram', 'Marathahalli', 'MG Road', 'New BEL Road', 'Old Airport Road', 'Rajajinagar',
    'Residency Road', 'Sarjapur Road', 'Whitefield'
]

# Step 1: Filter for restaurants in the specified Bangalore neighborhoods
bangalore_df = df[df['listed_incity'].str.lower().isin([neigh.lower() for neigh in bangalore_neighborhoods])]

# Step 2: Check how many rows remain after filtering by neighborhood
print(f"Rows after filtering by Bangalore neighborhoods: {bangalore_df.shape[0]}")

# Step 3: Ensure the 'approx_costfor_two_people' column exists
if 'approx_costfor_two_people' not in bangalore_df.columns:
    raise ValueError("Column 'approx_costfor_two_people' not found in the dataset.")

# Step 4: Clean the 'approx_costfor_two_people' column
# Remove any non-numeric characters (such as commas) and convert to numeric
bangalore_df['approx_costfor_two_people'] = bangalore_df['approx_costfor_two_people'].replace({',': ''}, regex=True)
bangalore_df['approx_costfor_two_people'] = pd.to_numeric(bangalore_df['approx_costfor_two_people'], errors='coerce')

# Step 5: Check for missing values in the 'approx_costfor_two_people' column
missing_cost_values = bangalore_df['approx_costfor_two_people'].isnull().sum()
print(f"\nMissing values in 'approx_costfor_two_people' column: {missing_cost_values}")

# Step 6: Drop rows where 'approx_costfor_two_people' is missing
bangalore_df = bangalore_df.dropna(subset=['approx_costfor_two_people'])

# Step 7: Check if data exists after dropping NaNs
print(f"\nRows after dropping missing values: {bangalore_df.shape[0]}")

# Step 8: Group by neighborhood and calculate the average cost for each
avg_cost_per_neigh = bangalore_df.groupby('listed_incity')['approx_costfor_two_people'].mean()

# Step 9: Check if the grouping resulted in any data
print("\nAverage cost per neighborhood:")
print(avg_cost_per_neigh)

# Step 10: Find the neighborhood with the highest average cost
if not avg_cost_per_neigh.empty:
    highest_avg_cost_neigh = avg_cost_per_neigh.idxmax()
    highest_avg_cost = avg_cost_per_neigh.max()

    print(f"\n✅ The locality in Bangalore with the highest average cost for dining for two people is: {highest_avg_cost_neigh} with an average cost of ₹{highest_avg_cost:.2f}")
else:
    print("No data found after grouping by neighborhood.")


Rows after filtering by Bangalore neighborhoods: 51717

Missing values in 'approx_costfor_two_people' column: 346

Rows after dropping missing values: 51371

Average cost per neighborhood:
listed_incity
BTM                      495.485145
Banashankari             401.551564
Bannerghatta Road        452.534077
Basavanagudi             445.137549
Bellandur                551.098361
Brigade Road             767.091115
Brookefield              521.246702
Church Street            771.990104
Electronic City          496.955102
Frazer Town              558.237288
HSR                      487.814302
Indiranagar              654.753655
JP Nagar                 459.774904
Jayanagar                469.077053
Kalyan Nagar             479.753657
Kammanahalli             470.659591
Koramangala 4th Block    528.353924
Koramangala 5th Block    522.979026
Koramangala 6th Block    521.340524
Koramangala 7th Block    520.497598
Lavelle Road             753.584873
MG Road                  763.987696
Malle

In [70]:
import pandas as pd

# Load the dataset
df = pd.read_csv('zomato_data.csv')

# Step 1: Filter the dataset for restaurants with over 1000 votes
df_filtered = df[df['votes'] > 1000]

# Step 2: Ensure the 'rate' column exists and clean the ratings if needed
# Convert the 'rate' column to numeric (remove '/5' or any other non-numeric characters)
df_filtered['rate'] = df_filtered['rate'].str.extract('([0-9.]+)').astype(float)

# Step 3: Group by restaurant type and calculate the average rating
avg_rating_by_rest_type = df_filtered.groupby('rest_type')['rate'].mean()

# Step 4: Find the restaurant type with the highest average rating
top_rest_type = avg_rating_by_rest_type.idxmax()
top_rating = avg_rating_by_rest_type.max()

# Output the result
print(f"\n✅ The restaurant type with the highest rating (over 1000 votes) is: {top_rest_type} with an average rating of {top_rating:.2f}")



✅ The restaurant type with the highest rating (over 1000 votes) is: Bakery with an average rating of 4.80


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['rate'] = df_filtered['rate'].str.extract('([0-9.]+)').astype(float)


In [72]:
import pandas as pd

# Load your dataset
df = pd.read_csv('zomato_data.csv')

# Step 1: Normalize 'listed_incity' column
df['listed_incity'] = df['listed_incity'].str.strip().str.lower()

# Step 2: Define known Bangalore neighborhoods (lowercased)
bangalore_neighborhoods = [
    'banashankari', 'bannerghatta road', 'basavanagudi', 'bellandur', 'brigade road', 'brookefield', 'btm',
    'church street', 'electronic city', 'frazer town', 'hsr', 'indiranagar', 'jayanagar', 'jp nagar', 'kalyan nagar',
    'kammanahalli', 'koramangala 4th block', 'koramangala 5th block', 'koramangala 6th block', 'koramangala 7th block',
    'lavelle road', 'malleshwaram', 'marathahalli', 'mg road', 'new bel road', 'old airport road', 'rajajinagar',
    'residency road', 'sarjapur road', 'whitefield'
]

# Step 3: Filter for Bangalore-local restaurants
bangalore_df = df[df['listed_incity'].isin(bangalore_neighborhoods)].copy()

# Step 4: Clean cost column
bangalore_df['approx_costfor_two_people'] = bangalore_df['approx_costfor_two_people'].astype(str).str.replace(',', '')
bangalore_df['approx_costfor_two_people'] = pd.to_numeric(bangalore_df['approx_costfor_two_people'], errors='coerce')

# Step 5: Drop rows with missing costs
bangalore_df.dropna(subset=['approx_costfor_two_people'], inplace=True)

# Step 6: Find the minimum cost
min_cost = bangalore_df['approx_costfor_two_people'].min()

# Step 7: Filter restaurants with the minimum cost
cheapest_restaurants = bangalore_df[bangalore_df['approx_costfor_two_people'] == min_cost]

# Step 8: Display result
print(f"✅ The minimum cost to eat out in Bangalore is ₹{min_cost:.0f}")
print("\n🧾 Restaurants offering this price:")
print(cheapest_restaurants[['listed_incity', 'cuisines', 'approx_costfor_two_people']].reset_index(drop=True))


✅ The minimum cost to eat out in Bangalore is ₹40

🧾 Restaurants offering this price:
           listed_incity                             cuisines  \
0           brigade road  South Indian, North Indian, Chinese   
1          church street  South Indian, North Indian, Chinese   
2            frazer town  South Indian, North Indian, Chinese   
3            indiranagar  South Indian, North Indian, Chinese   
4            indiranagar  South Indian, North Indian, Chinese   
5  koramangala 4th block  South Indian, North Indian, Chinese   
6  koramangala 5th block  South Indian, North Indian, Chinese   
7  koramangala 6th block  South Indian, North Indian, Chinese   

   approx_costfor_two_people  
0                       40.0  
1                       40.0  
2                       40.0  
3                       40.0  
4                       40.0  
5                       40.0  
6                       40.0  
7                       40.0  


In [73]:
import pandas as pd

# Load your dataset
df = pd.read_csv("zomato_data.csv")

# Step 1: Clean 'listed_incity' and 'online_order'
df['listed_incity'] = df['listed_incity'].astype(str).str.strip().str.lower()
df['online_order'] = df['online_order'].astype(str).str.strip().str.lower()

# Step 2: Filter for restaurants accepting online orders
online_orders_df = df[df['online_order'] == 'yes']
total_online_orders = len(online_orders_df)

# Step 3: Filter Banashankari restaurants with online order
banashankari_online = online_orders_df[online_orders_df['listed_incity'] == 'banashankari']
banashankari_online_count = len(banashankari_online)

# Step 4: Calculate percentage
if total_online_orders > 0:
    percentage = (banashankari_online_count / total_online_orders) * 100
    print(f"✅ Banashankari receives {percentage:.2f}% of all online orders.")
else:
    print("⚠️ No online orders found in the dataset.")


✅ Banashankari receives 1.79% of all online orders.


In [74]:
import pandas as pd

# Load dataset
df = pd.read_csv("zomato_data.csv")

# Step 1: Normalize city names and clean rating column
df['listed_incity'] = df['listed_incity'].astype(str).str.strip().str.lower()
df['rate'] = df['rate'].astype(str).str.extract('([0-9.]+)', expand=False).astype(float)
df['votes'] = pd.to_numeric(df['votes'], errors='coerce')

# Step 2: Filter for restaurants with > 500 votes and rating < 3.0
filtered_df = df[(df['votes'] > 500) & (df['rate'] < 3.0)]

# Step 3: Focus only on specified localities
target_localities = ['bellandur', 'whitefield', 'brookefield', 'hsr']
filtered_df = filtered_df[filtered_df['listed_incity'].isin(target_localities)]

# Step 4: Count by locality
locality_counts = filtered_df['listed_incity'].value_counts()

# Step 5: Get the top one
if not locality_counts.empty:
    top_locality = locality_counts.idxmax()
    count = locality_counts.max()
    print(f"✅ The locality with the most restaurants having >500 votes and rating <3.0 is: {top_locality.title()} ({count} restaurants)")
else:
    print("⚠️ No restaurants found matching the criteria.")


✅ The locality with the most restaurants having >500 votes and rating <3.0 is: Brookefield (8 restaurants)


In [75]:
import pandas as pd

# Load the dataset
df = pd.read_csv("zomato_data.csv")

# Clean the 'listed_incity' and 'rest_type' columns
df['listed_incity'] = df['listed_incity'].astype(str).str.strip().str.lower()
df['rest_type'] = df['rest_type'].astype(str).str.strip()

# Group by locality and count unique restaurant types
rest_type_diversity = df.groupby('listed_incity')['rest_type'].nunique().sort_values(ascending=False)

# Get the top locality
top_locality = rest_type_diversity.idxmax()
unique_count = rest_type_diversity.max()

print(f"✅ Zomato should target: {top_locality.title()} (with {unique_count} unique restaurant types)")


✅ Zomato should target: Btm (with 62 unique restaurant types)


In [76]:
import pandas as pd

# Load dataset
df = pd.read_csv("zomato_data.csv")

# Step 1: Clean the columns
df['listed_intype'] = df['listed_intype'].astype(str).str.strip().str.lower()
df['approx_costfor_two_people'] = df['approx_costfor_two_people'].astype(str).str.replace(',', '').str.strip()

# Step 2: Convert cost column to numeric
df['approx_costfor_two_people'] = pd.to_numeric(df['approx_costfor_two_people'], errors='coerce')

# Step 3: Filter Buffet and Delivery restaurants
buffet_df = df[df['listed_intype'] == 'buffet']
delivery_df = df[df['listed_intype'] == 'delivery']

# Step 4: Calculate average costs
buffet_avg = buffet_df['approx_costfor_two_people'].mean()
delivery_avg = delivery_df['approx_costfor_two_people'].mean()

# Step 5: Compute difference
cost_difference = abs(buffet_avg - delivery_avg)

# Display results
print(f"📊 Average Buffet Cost: ₹{buffet_avg:.2f}")
print(f"📦 Average Delivery Cost: ₹{delivery_avg:.2f}")
print(f"✅ Average Cost Difference: ₹{cost_difference:.2f}")


📊 Average Buffet Cost: ₹1306.66
📦 Average Delivery Cost: ₹464.49
✅ Average Cost Difference: ₹842.17


In [77]:
import pandas as pd

# Load dataset
df = pd.read_csv("zomato_data.csv")

# Step 1: Clean the 'online_order' column and ensure 'votes' is numeric
df['online_order'] = df['online_order'].astype(str).str.strip().str.lower()
df['votes'] = pd.to_numeric(df['votes'], errors='coerce')

# Step 2: Filter for restaurants with online ordering enabled
online_order_df = df[df['online_order'] == 'yes']

# Step 3: Find the maximum number of votes for these restaurants
max_votes = online_order_df['votes'].max()

# Display the result
print(f"✅ The maximum number of votes received by any restaurant with online ordering: {max_votes}")


✅ The maximum number of votes received by any restaurant with online ordering: 16832


In [78]:
import pandas as pd

# Load the dataset
zomato_data = pd.read_csv("zomato_data.csv")

# Step 1: Clean and process columns
zomato_data['cuisines'] = zomato_data['cuisines'].astype(str).str.lower()
zomato_data['rate'] = zomato_data['rate'].astype(str).str.extract('([0-9.]+)', expand=False).astype(float)

# Step 2: Filter for restaurants serving both North Indian and Chinese cuisines
north_indian_chinese_df = zomato_data[zomato_data['cuisines'].str.contains('north indian') & zomato_data['cuisines'].str.contains('chinese')]

# Step 3: Calculate the average rating
average_rating = north_indian_chinese_df['rate'].mean()

# Display the result
print(f"✅ The average rating of restaurants serving both North Indian and Chinese cuisines is: {average_rating:.2f}")


✅ The average rating of restaurants serving both North Indian and Chinese cuisines is: 3.57


In [80]:
import pandas as pd

# Load the dataset
zomato_data = pd.read_csv("zomato_data.csv")

# Step 1: Clean and process the columns
zomato_data['listed_incity'] = zomato_data['listed_incity'].astype(str).str.strip().str.lower()
zomato_data['approx_costfor_two_people'] = pd.to_numeric(zomato_data['approx_costfor_two_people'], errors='coerce')
zomato_data['votes'] = pd.to_numeric(zomato_data['votes'], errors='coerce')

# Step 2: Calculate potential revenue for each restaurant
zomato_data['revenue'] = zomato_data['approx_costfor_two_people'] * zomato_data['votes']

# Step 3: Group by locality and calculate total revenue and number of restaurants
locality_stats = zomato_data.groupby('listed_incity').agg(
    total_revenue=('revenue', 'sum'),
    total_restaurants=('revenue', 'size')
)

# Step 4: Calculate strike rate for each locality (revenue per restaurant)
locality_stats['strike_rate'] = (locality_stats['total_revenue'] / locality_stats['total_restaurants']) * 100

# Step 5: Get the locality with the highest strike rate
most_profitable_locality = locality_stats['strike_rate'].idxmax()
highest_strike_rate = locality_stats['strike_rate'].max()

# Display the result
print(f"✅ The most profitable area for Zomato is: {most_profitable_locality.title()} with a strike rate of {highest_strike_rate:.2f}")


✅ The most profitable area for Zomato is: Koramangala 6Th Block with a strike rate of 13270051.47


In [81]:
import pandas as pd

# Load the dataset
zomato_data = pd.read_csv("zomato_data.csv")

# Step 1: Clean the data
zomato_data['rate'] = zomato_data['rate'].astype(str).str.extract('([0-9.]+)', expand=False).astype(float)
zomato_data['online_order'] = zomato_data['online_order'].str.strip().str.lower()

# Step 2: Filter data based on criteria
filtered_data = zomato_data[
    (zomato_data['rate'] > 4.2) & 
    (zomato_data['votes'] > 500) & 
    (zomato_data['online_order'] == 'yes')
]

# Step 3: Group by area (listed_incity or locality) and calculate the number of qualifying restaurants
area_counts = filtered_data.groupby('listed_incity')['rate'].count()

# Step 4: Identify the area with the maximum qualifying restaurants
best_area = area_counts.idxmax()
max_restaurants = area_counts.max()

# Display the result
print(f"✅ The area where Zomato should invest is: {best_area.title()} with {max_restaurants} qualifying restaurants.")



✅ The area where Zomato should invest is: Koramangala 7Th Block with 97 qualifying restaurants.
