In [1]:
import pandas as pd
import plotly.express as px

In [2]:
data = pd.read_csv('swiggy.csv')

In [3]:
data.head()

Unnamed: 0,ID,Area,City,Restaurant,Price,Avg ratings,Total ratings,Food type,Address,Delivery time
0,211,Koramangala,Bangalore,Tandoor Hut,300.0,4.4,100,"Biryani,Chinese,North Indian,South Indian",5Th Block,59
1,221,Koramangala,Bangalore,Tunday Kababi,300.0,4.1,100,"Mughlai,Lucknowi",5Th Block,56
2,246,Jogupalya,Bangalore,Kim Lee,650.0,4.4,100,Chinese,Double Road,50
3,248,Indiranagar,Bangalore,New Punjabi Hotel,250.0,3.9,500,"North Indian,Punjabi,Tandoor,Chinese",80 Feet Road,57
4,249,Indiranagar,Bangalore,Nh8,350.0,4.0,50,"Rajasthani,Gujarati,North Indian,Snacks,Desser...",80 Feet Road,63


In [4]:
data.shape

(8680, 10)

In [5]:
data.isnull().sum()

ID               0
Area             0
City             0
Restaurant       0
Price            0
Avg ratings      0
Total ratings    0
Food type        0
Address          0
Delivery time    0
dtype: int64

In [6]:
data.dtypes

ID                 int64
Area              object
City              object
Restaurant        object
Price            float64
Avg ratings      float64
Total ratings      int64
Food type         object
Address           object
Delivery time      int64
dtype: object

In [8]:
data[['Price', 'Avg ratings', 'Total ratings', 'Delivery time']].describe()

Unnamed: 0,Price,Avg ratings,Total ratings,Delivery time
count,8680.0,8680.0,8680.0,8680.0
mean,348.44447,3.655104,156.634793,53.967051
std,230.940074,0.647629,391.448014,14.292335
min,0.0,2.0,20.0,20.0
25%,200.0,2.9,50.0,44.0
50%,300.0,3.9,80.0,53.0
75%,400.0,4.2,100.0,64.0
max,2500.0,5.0,10000.0,109.0


In [10]:
# 1. Average price of food items by city and area
avg_price_city_area = data.groupby(['City', 'Area'])['Price'].mean().reset_index()
avg_price_city_area

Unnamed: 0,City,Area,Price
0,Ahmedabad,Akhbar Nagar Circle,200.000000
1,Ahmedabad,Acher,200.000000
2,Ahmedabad,Ahmedabad,344.444444
3,Ahmedabad,Ambavadi,200.000000
4,Ahmedabad,Ambawadi,302.000000
...,...,...,...
838,Surat,Vesu,333.333333
839,Surat,Vip Road,220.000000
840,Surat,Vishal Nagar,228.333333
841,Surat,Yamuna Nagar,250.000000


In [12]:
# 2. Distribution of average ratings
rating_distribution = data['Avg ratings'].value_counts().sort_index()
rating_distribution

Avg ratings
2.0       1
2.2       3
2.3       1
2.4       1
2.5       1
2.6       2
2.7      11
2.8      15
2.9    3279
3.0      25
3.1      26
3.2      45
3.3      51
3.4      60
3.5      95
3.6     118
3.7     207
3.8     313
3.9     421
4.0     524
4.1     700
4.2     754
4.3     792
4.4     573
4.5     338
4.6     166
4.7      96
4.8      29
4.9      15
5.0      18
Name: count, dtype: int64

In [17]:
# 3. Relationship between price and average ratings
# plt.figure(figsize=(10, 6))
fig = px.scatter(data, x = 'Price', y = 'Avg ratings', title = 'Relationship between Price and Average Ratings')
fig.show()


In [23]:
# 4. Popular food types
food_type_counts = data['Food type'].str.split(',').explode().value_counts()
food_type_counts.head(10)

Food type
Chinese         2588
North Indian    1921
Indian          1772
Fast Food       1550
Beverages       1524
Desserts        1436
Biryani         1079
Snacks          1013
South Indian     857
Continental      686
Name: count, dtype: int64

In [24]:
# 5. Average delivery time by city
avg_delivery_time_city = data.groupby('City')['Delivery time'].mean().reset_index()
avg_delivery_time_city

Unnamed: 0,City,Delivery time
0,Ahmedabad,44.709902
1,Bangalore,50.529598
2,Chennai,58.968354
3,Delhi,50.734861
4,Hyderabad,49.933023
5,Kolkata,67.809807
6,Mumbai,48.318716
7,Pune,55.854128
8,Surat,48.484375


In [28]:
correlation_matrix = data[['Price', 'Avg ratings', 'Total ratings', 'Delivery time']].corr()
correlation_matrix

Unnamed: 0,Price,Avg ratings,Total ratings,Delivery time
Price,1.0,0.11363,-0.014672,0.076009
Avg ratings,0.11363,1.0,0.1579,-0.146987
Total ratings,-0.014672,0.1579,1.0,-0.08409
Delivery time,0.076009,-0.146987,-0.08409,1.0


In [30]:
fig = px.imshow(correlation_matrix, text_auto=True, aspect="auto",
                color_continuous_scale=px.colors.sequential.Cividis_r)
fig.show()

In [37]:
fig = px.histogram(data, x = "Delivery time", nbins = 30, title = 'Distribution of Delivery Time', color_discrete_sequence=['orange'])
fig.show()