## Import necessary Python libraries 

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns

## Step1: Cleaning the data

### Load the dataset

In [2]:
df=pd.read_csv("Zomato data.csv")
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'Zomato data.csv'

### Check the structure of the dataset

In [None]:
df.shape
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.describe()

### Removing Missing Values

In [None]:
# Remove rows where any column has missing values
null_values = df.isnull().sum()
print(null_values)

### Check for duplicates

In [None]:
duplicate_rows = df.duplicated()
duplicate_rows

### Converting 'Rate' Column to Numeric Format by Removing Denominator

In [None]:
def handleRate(value):
    value = str(value).split("/")
    value = value[0];
    return float(value)
df["rate"] = df["rate"].apply(handleRate)
print(df.head())

### Summary of the df

In [None]:
df.info()

In [None]:
df.head()

#### *Conclusion- There is no NULL value in df*

## Step2: Visualizing Data After Cleaning

### What is the distribution of restaurants across different types (listed_in(type))?

In [None]:
sns.set_style("darkgrid")
sns.countplot(x='listed_in(type)', data=df, hue='listed_in(type)', palette='plasma', edgecolor='black', linewidth=0.7, legend=False)

plt.title('Distribution of Restaurants Across Different Types')
plt.xlabel('Restaurant Type',color="black")
plt.ylabel('Count of Restaurants',color="black")

plt.xticks(rotation=0, ha='right')
plt.show()

#### *RESULT: The majority of the restaurant fall into the dining category.*

### What is the count of restaurants offering online orders (online_order) and table bookings (book_table)?

In [None]:
online_counts = df['online_order'].value_counts()
book_table_counts = df['book_table'].value_counts()

online_table_counts = pd.DataFrame({
    'Offering': ['Yes', 'No'],
    'Online Order Count': [online_counts.get('Yes', 0), online_counts.get('No', 0)],
    'Table Booking Count': [book_table_counts.get('Yes', 0), book_table_counts.get('No', 0)]
})

plt.figure(figsize=(8, 6))
palette = sns.color_palette("muted", n_colors=2)

online_table_counts.set_index('Offering').plot(kind='bar', stacked=False, color=palette, edgecolor='black', linewidth=0.7)

plt.title('Count of Restaurants Offering Online Orders and Table Bookings',color="black")
plt.ylabel('Number of Restaurants',color="black")
plt.xlabel('Offering',color="black",fontsize="12")

plt.grid(True, linestyle='--', alpha=0.6)
plt.xticks(rotation=0)  
plt.show()

In [None]:
plt.figure(figsize=(7, 5))
sns.set_style("whitegrid")
sns.set_context("notebook")

sns.boxplot(x='rate',data=df,color="green",linewidth=1.2,legend=False)

plt.title('Rating Distribution of Restaurants',color="black",fontweight='regular',fontsize="14")
plt.xlabel('Rating',fontsize="14",color="black")
plt.grid(True, linestyle='--',linewidth=1,alpha=0.6)
plt.show()


## How do the number of votes (votes) relate to the restaurant ratings (rate)?

In [None]:
sns.set_style("whitegrid")
sns.scatterplot(x="rate", y="votes", data=df,hue="listed_in(type)",palette="plasma")

plt.title('Relationship between Number of Votes and Ratings', fontsize=16)
plt.xlabel('Number of Votes', fontsize=12)
plt.ylabel('Restaurant Rating', fontsize=12)


plt.show()

## What is the price distribution (approx_cost(for two people)) at different restaurants?

In [None]:
sns.set_style("white")
plt.figure(figsize=(10, 6))
sns.set_context("notebook")

palette = sns.color_palette("winter", n_colors=4)
sns.boxplot(x='listed_in(type)', y='approx_cost(for two people)', data=df,hue='listed_in(type)', palette=palette, legend=False)

plt.title('Price Distribution at Different Restaurants', fontsize=16, fontweight='semibold')
plt.xlabel('Restaurant Type', fontsize=14, color="black")
plt.ylabel('Approx. Cost for Two People', fontsize=12, color="black")

# Show the plot
plt.xticks(rotation=0) 
plt.show()


## What proportion of restaurants offer online orders and table bookings?

In [None]:
online_order_proportion = df['online_order'].value_counts(normalize=True)
book_table_proportion = df['book_table'].value_counts(normalize=True)

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)

online_order_proportion.plot.pie(autopct='%1.1f%%', colors=['#ff9999', '#66b3ff'], startangle=90, 
                                 labels=['Online Order: Yes', 'Online Order: No'], 
                                 wedgeprops={'edgecolor': 'black'}, shadow=True, explode=(0.1, 0))
plt.title('Proportion of Restaurants Offering Online Orders', fontsize=16, fontweight='bold', color='black')
plt.ylabel('')

plt.subplot(1, 2, 2)
book_table_proportion.plot.pie(autopct='%1.1f%%', colors=['#e0b3ff', '#99ff99'], startangle=90, 
                               labels=['Table Booking: Yes', 'Table Booking: No'], 
                               wedgeprops={'edgecolor': 'black'}, shadow=True, explode=(0.1, 0))
plt.title('Proportion of Restaurants Offering Table Bookings', fontsize=16, fontweight='bold', color='black')
plt.ylabel('')  

plt.tight_layout()

plt.show()


##  What is the distribution of restaurants offering online orders across different restaurant types

In [None]:
pivot_table = df.pivot_table(index='listed_in(type)', columns='online_order', aggfunc='size', fill_value=0)

plt.figure(figsize=(10, 6))
sns.heatmap(pivot_table, annot=True, cmap="cividis", fmt="d", linewidths=0.5,linecolor='white')

plt.title('Heatmap of Restaurants Offering Online Orders by Type')
plt.xlabel('Online Order')
plt.ylabel('Restaurant Type')
plt.show()

In [None]:
pivot_table = df.pivot_table(index='listed_in(type)', columns='online_order', aggfunc='size', fill_value=0)
plt.figure(figsize=(12, 8))
sns.heatmap(pivot_table, 
            annot=True,               
            cmap="RdBu_r",          
            fmt="d",                  
            linewidths=1,              
            linecolor='white',         
            cbar_kws={'label': 'Count of Restaurants'},  
            annot_kws={'size': 12, 'weight': 'bold', 'color': 'black'}, 
            square=True,              
            vmin=0,                    
            vmax=pivot_table.max().max() + 50)  

plt.title('Heatmap of Restaurants Offering Online Orders by Type', fontsize=16, fontweight='bold', color='darkblue')

plt.xlabel('Online Order', fontsize=14, fontweight='bold', color='darkblue')
plt.ylabel('Restaurant Type', fontsize=14, fontweight='bold', color='darkblue')

plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)

plt.tight_layout() 
plt.show()
