# Zomato Data Analysis

This notebook contains the analysis and visualization of the Zomato dataset. We will explore the dataset, visualize key insights, and derive meaningful conclusions.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set the style for seaborn
sns.set(style='whitegrid')

In [2]:
# Load the dataset
data_path = '../data/Zomato-data-.csv'
df = pd.read_csv(data_path)

# Display the first few rows of the dataframe
df.head()

In [3]:
# Data Overview
df.info()
df.describe(include='all')

In [4]:
# Cleaning the 'rate' column to extract numerical values
df['rate'] = df['rate'].str.replace('/5', '').astype(float)

# Checking for missing values
df.isnull().sum()

In [5]:
# Visualizing the distribution of ratings
plt.figure(figsize=(10, 6))
sns.histplot(df['rate'], bins=20, kde=True)
plt.title('Distribution of Ratings')
plt.xlabel('Rating')
plt.ylabel('Frequency')
plt.show()

In [6]:
# Visualizing the relationship between cost and rating
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df, x='approx_cost(for two people)', y='rate')
plt.title('Cost vs Rating')
plt.xlabel('Approx Cost for Two People')
plt.ylabel('Rating')
plt.show()

In [7]:
# Getting insights
top_restaurants = df.nlargest(10, 'rate')[['name', 'rate']]
average_cost_by_type = df.groupby('listed_in(type)')['approx_cost(for two people)'].mean().reset_index()

top_restaurants, average_cost_by_type