In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

In [2]:
data = pd.read_csv("Zomato data .csv", encoding = 'latin1')
df = data.copy()
df.sort_index()

Unnamed: 0,name,online_order,book_table,rate,votes,approx_cost(for two people),listed_in(type)
0,Jalsa,Yes,Yes,4.1/5,775,800,Buffet
1,Spice Elephant,Yes,No,4.1/5,787,800,Buffet
2,San Churro Cafe,Yes,No,3.8/5,918,800,Buffet
3,Addhuri Udupi Bhojana,No,No,3.7/5,88,300,Buffet
4,Grand Village,No,No,3.8/5,166,600,Buffet
...,...,...,...,...,...,...,...
143,Melting Melodies,No,No,3.3/5,0,100,Dining
144,New Indraprasta,No,No,3.3/5,0,150,Dining
145,Anna Kuteera,Yes,No,4.0/5,771,450,Dining
146,Darbar,No,No,3.0/5,98,800,Dining


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 148 entries, 0 to 147
Data columns (total 7 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   name                         148 non-null    object
 1   online_order                 148 non-null    object
 2   book_table                   148 non-null    object
 3   rate                         148 non-null    object
 4   votes                        148 non-null    int64 
 5   approx_cost(for two people)  148 non-null    int64 
 6   listed_in(type)              148 non-null    object
dtypes: int64(2), object(5)
memory usage: 8.2+ KB


<h4> changing data types of online order, book and listed_in(type) columns </h4>

In [4]:
df['online_order'] = df['online_order'].astype('category')
df['book_table'] = df['book_table'].astype('category')
df['listed_in(type)'] = df['listed_in(type)'].astype('category')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 148 entries, 0 to 147
Data columns (total 7 columns):
 #   Column                       Non-Null Count  Dtype   
---  ------                       --------------  -----   
 0   name                         148 non-null    object  
 1   online_order                 148 non-null    category
 2   book_table                   148 non-null    category
 3   rate                         148 non-null    object  
 4   votes                        148 non-null    int64   
 5   approx_cost(for two people)  148 non-null    int64   
 6   listed_in(type)              148 non-null    category
dtypes: category(3), int64(2), object(2)
memory usage: 5.6+ KB


In [6]:
df.describe()

Unnamed: 0,votes,approx_cost(for two people)
count,148.0,148.0
mean,264.810811,418.243243
std,653.676951,223.085098
min,0.0,100.0
25%,6.75,200.0
50%,43.5,400.0
75%,221.75,600.0
max,4884.0,950.0


In [7]:
df.duplicated().sum()

np.int64(0)

<h3> Q.1 - What type of Restaurant do the majority of customers order from ?</h3>

In [8]:
cusc = df.groupby(['listed_in(type)'], as_index = False, observed = True)['name'].count().sort_values(by = 'name', ascending = False)
cusc

Unnamed: 0,listed_in(type),name
2,Dining,110
1,Cafes,23
3,other,8
0,Buffet,7


In [9]:
trace = go.Bar(
    x = cusc['listed_in(type)'],
    y = cusc['name'],
    marker = dict(color = 'cyan', line = dict(color = 'white', width = 2)),
    text = cusc['name'],
    textposition = 'outside',
    textfont = dict(size = 18, color = 'white'),
    hovertemplate = "%{y}<extra></extra>",
    showlegend = True,
    name = 'Count of Orders'
)

layout = go.Layout(
    title = dict(text = '<b>Count of Orders by Customers count</b><br><sup>BAR CHART</sup>', 
                 font = dict(family = 'Arial', color = 'whitesmoke', size = 20), x = 0.5),
    xaxis = dict(title = dict(text = 'Type of Restaurant', font = dict(size = 26, color = 'whitesmoke', family = 'Arial')), 
                 tickfont = dict(size = 18, color = 'white')),
    yaxis = dict(title = dict(text = 'Count of Orders', font = dict(size = 18, color = 'whitesmoke', family = 'Arial')),
                 tickfont = dict(size = 18, color = 'white'), range = [0,120]),
    legend = dict(x = 0.8, y = 0.8, font = dict(size = 14)),
    template = 'plotly_dark', hovermode = 'x unified',
    margin = dict(t = 50, b = 50, l = 30, r = 30),
    height = 600
)

fig = go.Figure(trace , layout)
fig.show()

<h3> Q.2 - How many votes has each type of restaurant received from customers ?</h3>

In [10]:
ccv = df.groupby(['listed_in(type)'], as_index = False, observed = False)['votes'].sum().sort_values(by = 'votes', ascending = False)
ccv

Unnamed: 0,listed_in(type),votes
2,Dining,20363
3,other,9367
1,Cafes,6434
0,Buffet,3028


In [None]:
trace = go.Scatter(
    x = ccv['listed_in(type)'],
    y = ccv['votes'],
    marker = dict(color = 'white', size = 16, line = dict(color = 'cyan', width = 4)),
    line = dict(color = 'cyan', width = 4),
    text = ccv['votes'],
    textposition = 'top right',
    hovertemplate = '%{y}<extra></extra>',
    mode = 'lines+markers+text',
    name = 'Orders',
    showlegend = True
)

layout = go.Layout(
    title = dict(text = '<b>Total Votes by Restaurant Type</b><br><sup>LINE CHART</sup>',
                 font = dict(size = 36, color = 'whitesmoke', family = 'Arial'), x = 0.5),
    xaxis = dict(title = dict(text = 'Type of Restaurant', font = dict(size = 22, color = 'white', family = 'Arial')),
                 tickfont = dict(size = 18, color = 'whitesmoke')),
    yaxis = dict(title = dict(text = 'Total Votes', font = dict(size = 22, color = 'white', family = 'Arial')),
                 tickfont = dict(size = 18, color = 'whitesmoke'), range = [0,25000]),
    legend = dict(x = 0.8, y = 0.8, font = dict(size = 14)),
    template = 'plotly_dark', hovermode = 'x unified',
    margin = dict(t = 50, b = 50, l = 30, r = 30),
    height = 700, width = 600
)
fig = go.Figure(trace, layout)
fig.show()

<h3> Q.3 - What are the ratings that the majority of restaurants have received ?</h3>

In [None]:
# mask = df['rate'] == "3.8 /5"
# df.loc[mask, 'rate'] = '3.8/5'

In [13]:
df['rate'].value_counts()

rate
3.8/5    20
3.7/5    15
3.3/5    14
3.4/5    12
4.1/5    11
3.6/5    11
4.0/5    10
3.9/5    10
4.2/5     8
3.2/5     7
3.1/5     7
2.9/5     7
3.5/5     6
3.0/5     2
4.6/5     2
2.8/5     2
4.4/5     2
4.3/5     1
2.6/5     1
Name: count, dtype: int64

In [14]:
df['rate'].nunique()

19

In [15]:
trace = go.Histogram(
    x = df['rate'],
    marker = dict(color = 'cyan', line = dict(color = 'white', width = 2)),
    nbinsx = 19,
    showlegend = True,
    name = 'Rating Distribution',
    hovertemplate = '%{y}<extra></extra>'
)
layout = go.Layout(
    title = dict(text = '<b>Frequency Distribution of Ratings</b><br><sup>HISTOGRAM CHART</sup>', 
                 font = dict(size = 36, color = 'white', family = 'Arial'), x = 0.5),
    template = 'plotly_dark',
    hovermode = 'x unified',
    legend = dict(x = 0.8, y = 0.9),
    bargap = 0
)
fig= go.Figure(trace, layout)
fig.show()

<h3>Q.4 - Zomato has observed that most couples order most of their food online. What is their average spending on each order ?</h3>

In [16]:
df

Unnamed: 0,name,online_order,book_table,rate,votes,approx_cost(for two people),listed_in(type)
0,Jalsa,Yes,Yes,4.1/5,775,800,Buffet
1,Spice Elephant,Yes,No,4.1/5,787,800,Buffet
2,San Churro Cafe,Yes,No,3.8/5,918,800,Buffet
3,Addhuri Udupi Bhojana,No,No,3.7/5,88,300,Buffet
4,Grand Village,No,No,3.8/5,166,600,Buffet
...,...,...,...,...,...,...,...
143,Melting Melodies,No,No,3.3/5,0,100,Dining
144,New Indraprasta,No,No,3.3/5,0,150,Dining
145,Anna Kuteera,Yes,No,4.0/5,771,450,Dining
146,Darbar,No,No,3.0/5,98,800,Dining


In [17]:
couple_data = df[df['online_order']=="Yes"]
couple_data

Unnamed: 0,name,online_order,book_table,rate,votes,approx_cost(for two people),listed_in(type)
0,Jalsa,Yes,Yes,4.1/5,775,800,Buffet
1,Spice Elephant,Yes,No,4.1/5,787,800,Buffet
2,San Churro Cafe,Yes,No,3.8/5,918,800,Buffet
5,Timepass Dinner,Yes,No,3.8/5,286,600,Buffet
7,Onesta,Yes,Yes,4.6/5,2556,600,Cafes
8,Penthouse Cafe,Yes,No,4.0/5,324,700,other
9,Smacznego,Yes,No,4.2/5,504,550,Cafes
10,Village CafÃ©,Yes,No,4.1/5,402,500,Cafes
11,Cafe Shuffle,Yes,Yes,4.2/5,150,600,Cafes
12,The Coffee Shack,Yes,Yes,4.2/5,164,500,Cafes


In [18]:
trace = go.Histogram(
    x = couple_data['approx_cost(for two people)'],
    marker = dict(color = 'cyan', line = dict(color = 'white', width = 2)),
    nbinsx = 14,
    showlegend = True,
    name = 'Average Cost Distribution',
    hovertemplate = '%{y}<extra></extra>'
)
layout = go.Layout(
    title = dict(text = '<b>Frequency Distribution of Cost of Couples</b><br><sup>HISTOGRAM CHART</sup>', 
                 font = dict(size = 36, color = 'white', family = 'Arial'), x = 0.5),
    template = 'plotly_dark',
    hovermode = 'x unified',
    legend = dict(x = 0.8, y = 0.9),
    bargap = 0
)
fig= go.Figure(trace, layout)
fig.show()

<h3>Q.5 - Which mode (online or offline) has received the maximum rating ?</h3>

In [19]:
df['rate'] = df['rate'].str.replace('/5', '', regex = False).astype(float)

In [28]:
trace1 = go.Box(
    y = df[df['online_order'] == "Yes"]['rate'],
    name = 'Online Order: Yes',
    boxmean = True,
    marker_color = 'cyan',
    line = dict(width = 2, color = 'white'),
    fillcolor = 'rgba(0,255,255,0.3)',
    hovertemplate = '%{y}<extra></extra>',
    showlegend = True
)
trace2 = go.Box(
    y = df[df['online_order'] == "No"]['rate'],
    name = 'Online Order: No',
    boxmean = True,
    marker_color = 'magenta',
    line = dict(width = 2, color = 'white'),
    fillcolor = 'rgba(255,0,255,0.3)',
    hovertemplate = '%{y}<extra></extra>',
    showlegend = True
)
layout = go.Layout(
    title = dict(text = '<b>Average Rating of Online Orders VS Offline Orders</b><br><sup>BOX PLOT</sup>',
                 font = dict(size = 20, color = 'white'), x = 0.5),
    template = 'plotly_dark',
    hovermode = 'x unified',
    legend = dict(x = 0.8, y = 1), 
    width = 600
)
fig = go.Figure([trace1, trace2], layout)
fig.show()

<h3>Q.6 - Which type of Restaurant received more offline ofders, so that Zomato can provide customers with more good offers ?</h3>

In [23]:
df

Unnamed: 0,name,online_order,book_table,rate,votes,approx_cost(for two people),listed_in(type)
0,Jalsa,Yes,Yes,4.1,775,800,Buffet
1,Spice Elephant,Yes,No,4.1,787,800,Buffet
2,San Churro Cafe,Yes,No,3.8,918,800,Buffet
3,Addhuri Udupi Bhojana,No,No,3.7,88,300,Buffet
4,Grand Village,No,No,3.8,166,600,Buffet
...,...,...,...,...,...,...,...
143,Melting Melodies,No,No,3.3,0,100,Dining
144,New Indraprasta,No,No,3.3,0,150,Dining
145,Anna Kuteera,Yes,No,4.0,771,450,Dining
146,Darbar,No,No,3.0,98,800,Dining


In [None]:
piv = df.pivot_table(index = 'listed_in(type)', columns = 'online_order', aggfunc = 'size', fill_value = 0)
piv





online_order,No,Yes
listed_in(type),Unnamed: 1_level_1,Unnamed: 2_level_1
Buffet,3,4
Cafes,8,15
Dining,77,33
other,2,6


In [40]:
trace = go.Heatmap(
    z = piv.values,
    x = piv.columns,
    y = piv.index,
    colorscale = 'Viridis',
    colorbar = dict(title = 'Intensity', ticks = 'outside'),
    texttemplate = '%{z}',
    showscale = True
)
layout = go.Layout(
    title = dict(text = '<b>Online / Offline Orders by Type of Restaurant</b><br><sup>HEAT MAP</sup>', font = dict(size = 18, color = 'white'), x = 0.5),
    xaxis = dict(title = 'Categories'),
    yaxis = dict(title = 'Values'),
    width = 500,
    template = 'plotly_dark'
)
fig = go.Figure(trace, layout)
fig.show()

<h4>CONCLUSION: Dining restaurant primarily accept offline orders, whereas cafes primarily receive online orders. This suggests that clients prefer orders in person at restaurants, but prefer online orders at cafes.</h4>