In [1]:
%%writefile zomato.py
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px


st.set_page_config(layout='wide',page_title = 'Zomato',page_icon=':bar_chart:')
#st.markdown("<h1 style='text_align:center ; color:#0F6466;'> Zomato </h1>", unsafe_allow_html=True)
st.title(" :bar_chart: Zomato")
st.markdown('<style>div.block-container{padding-top:1rem;}</style>',unsafe_allow_html=True)

df = pd.read_csv('zomato.csv',encoding ='ISO-8859-1')
df_no_duplicates = df.drop_duplicates()
df = df.drop(columns=['url','phone'])
df['rate'] = pd.to_numeric(df['rate'].str.split('/').str[0],errors= 'coerce')
df['approx_cost(for two people)'] = pd.to_numeric(df['approx_cost(for two people)'] , errors='coerce')

st.image('open-kitchen-restaurant.jpg', caption='Image Caption', use_column_width=True, width=300)

box = st.sidebar.checkbox('show data', False ,key =1)
if box:
    st.header('sample data')
    st.dataframe(df.head(10))

st.sidebar.header("Choose your filter: ")    
name=st.sidebar.selectbox('Select Restorant Name',df['name'].unique())
online_order=st.sidebar.selectbox('select online order status',df['online_order'].unique())
location=st.sidebar.selectbox('Select the location',df['location'].unique())

num_of_category=len(list(df['rest_type'].unique()))
num_of_rest=len(list(df['name'].unique()))
num_of_city=len(list(df['listed_in(city)'].unique()))


a1, a2, a3 = st.columns(3)

# Add HTML styling for the titles and values
a1.markdown("<h3 style='text-align:center; color:#0F6466;'>Category</h3>", unsafe_allow_html=True)
a2.markdown("<h3 style='text-align:center; color:#0F6466;'>Restaurant</h3>", unsafe_allow_html=True)
a3.markdown("<h3 style='text-align:center; color:#0F6466;'>City</h3>", unsafe_allow_html=True)

a1.metric("Category", num_of_category)
a2.metric("Restaurant", num_of_rest)
a3.metric("City", num_of_city)
#col1 , col2 , col3 = st.columns(3)
col1 , col2  = st.columns(2)
with col1:

    fig = px.bar(df, x='location' , y='rate',color='online_order',barmode='group',title='rate of loaction',color_discrete_sequence=['#0F6466','#D8B08C'])
    st.plotly_chart(fig,use_container_width=True)

    b=df.groupby('location')['name'].count().sort_values(ascending=False)[:11].reset_index()
    fig = px.bar(b, x='location' , y='name',title='num of rest per loaction',color_discrete_sequence=['#0F6466'])
    st.plotly_chart(fig,use_container_width=True)

    b=df.groupby('address')['online_order'].count().sort_values(ascending=False).head(10).reset_index()
    fig = px.bar(b, x='address' , y='online_order',title='num of online order per address',color_discrete_sequence=['#0F6466'])
    st.plotly_chart(fig,use_container_width=True)

    
    b=df.groupby('location')['approx_cost(for two people)'].mean().sort_values(ascending= False).head(10).reset_index()
    fig = px.bar(b, x='location' , y='approx_cost(for two people)',title='Approx cost for each rest',color_discrete_sequence=['#0F6466'])
    st.plotly_chart(fig,use_container_width=True)

    b=df.groupby(['rest_type'])['rate'].mean().sort_values().head(10).reset_index()
    fig = px.bar(b, x='rest_type' , y='rate',title='rate per rest_type',color_discrete_sequence=['#0F6466'])
    st.plotly_chart(fig,use_container_width=True)

    #new_df = df[df['location'] == location]
    b=df.groupby(['cuisines'])['name'].count().sort_values(ascending= False).head(10).reset_index()
    fig = px.bar(b, x='cuisines' , y='name',title='Top 10 popular cuisines',color_discrete_sequence=['#0F6466'])
    st.plotly_chart(fig,use_container_width=True)

    
    fig=px.histogram(df,'rate',title='Distribution of Restaurant Ratings',color_discrete_sequence=['#0F6466'])
    st.plotly_chart(fig,use_container_width=True)
    
    
    top_restaurant_types = df['rest_type'].value_counts().head(7)
    data = pd.DataFrame({'Restaurant Type': top_restaurant_types.index, 'Count': top_restaurant_types.values})
    custom_colors = ['#0F6466', '#004F4D', '#1F2024', '#4B4952', '#79717A', '#A68F97', '#183B59']
    fig = px.pie(data, names='Restaurant Type', values='Count',
                 title='Distribution of Restaurant Types')
    fig.update_traces(marker=dict(colors=custom_colors))
    st.plotly_chart(fig,use_container_width=True)
    
    #new_df = df[df['name'] == name]
    b=df.groupby(['cuisines'])['rate'].mean().sort_values(ascending= False).head(10).reset_index()
    fig = px.bar(b, x='cuisines' , y='rate',title='rate of cuisines',color_discrete_sequence=['#0F6466'])
    st.plotly_chart(fig,use_container_width=True)


with col2:

    b=df.groupby(['listed_in(type)'])['rate'].mean().sort_values(ascending= False).head(10).reset_index()
    fig = px.bar(b, x='listed_in(type)' , y='rate',title='rate per each type',color_discrete_sequence=['#D8B08C'])
    st.plotly_chart(fig,use_container_width=True)

    b=df.groupby(['book_table'])['name'].count().sort_values(ascending= False).reset_index()
    fig = px.bar(b, x='book_table' , y='name',title='book table per rest',color_discrete_sequence=['#D8B08C'])
    st.plotly_chart(fig,use_container_width=True)

    b=df.groupby(['book_table'])['rate'].mean().sort_values(ascending= False).reset_index()
    fig = px.bar(b, x='book_table' , y='rate',title='book table per rest',color_discrete_sequence=['#D8B08C'])
    st.plotly_chart(fig,use_container_width=True)
    
    b=df.groupby(['online_order'])['rate'].mean().sort_values(ascending= False).reset_index()
    fig = px.bar(b, x='online_order' , y='rate',title='rate of each online order status',color_discrete_sequence=['#D8B08C'])
    st.plotly_chart(fig,use_container_width=True)

    b=df.groupby('dish_liked')['name'].count().sort_values(ascending=False)[:11].reset_index()
    fig = px.bar(b, x='dish_liked' , y='name',title='num of dish_liked',color_discrete_sequence=['#D8B08C'])
    st.plotly_chart(fig,use_container_width=True)

    b=df.groupby('dish_liked')['rate'].mean().sort_values(ascending=False).head(10).reset_index()
    fig = px.bar(b, x='dish_liked' , y='rate',title='rate of dish_liked',color_discrete_sequence=['#D8B08C'])
    st.plotly_chart(fig,use_container_width=True)

    
    rating_bins = [1, 2, 3, 4, 5]
    rating_labels = ['1-2', '2-3', '3-4', '4-5']
    df['rating_range'] = pd.cut(df['rate'].astype(float), bins=rating_bins, labels=rating_labels)
    avg_cost_by_rating = df.groupby('rating_range')['approx_cost(for two people)'].mean().reset_index()
    fig = px.line(avg_cost_by_rating, x='rating_range', y='approx_cost(for two people)', title='Average Cost for Two People by Rating Range',color_discrete_sequence=['#0F6466'])
    fig.update_xaxes(categoryorder='array', categoryarray=rating_labels)
    st.plotly_chart(fig,use_container_width=True)
    
    top_cuisines = df['cuisines'].value_counts().sort_values(ascending=False).head(10).index
    filtered_df = df[df['cuisines'].isin(top_cuisines)]
    fig = px.box(filtered_df, x='cuisines', y='rate', title='Box Plot of Ratings by Cuisine Type',color_discrete_sequence=['#0F6466'])
    fig.update_layout(
        xaxis_title='Cuisine Type',
        yaxis_title='Rating',
        xaxis_categoryorder='total descending'  # Order cuisines by the total number of restaurants
    )
    st.plotly_chart(fig,use_container_width=True)
    
    word_frequencies = df['dish_liked'].str.split().explode().value_counts().head(30).reset_index()
    word_frequencies.columns = ['Word', 'Frequency']
    fig = px.scatter(word_frequencies, x='Word', y='Frequency', size='Frequency', 
                     title='Restaurant Name Word Cloud', color='Word', text='Word')
    fig.update_traces(marker=dict(size=word_frequencies['Frequency'] * 0.5))  # Adjust the size scaling

    # Customize the layout
    fig.update_layout(
        xaxis_title=None,
        yaxis_title=None,
        showlegend=False
    )
    st.plotly_chart(fig,use_container_width=True)


    new_df = df[df['online_order'] == online_order]
    b=new_df.groupby('name')['votes'].mean().sort_values(ascending= False).head(10).reset_index()
    fig = px.bar(b, x='name' , y='votes',title='vote per each rest',color_discrete_sequence=['#D8B08C'])
    st.plotly_chart(fig,use_container_width=True)

    b=df.groupby(['cuisines'])['votes'].mean().sort_values(ascending= False).head(10).reset_index()
    fig = px.bar(b, x='cuisines' , y='votes',title='vote per each cuisine',color_discrete_sequence=['#D8B08C'])
    st.plotly_chart(fig,use_container_width=True)

Overwriting zomato.py


In [None]:
! streamlit run zomato.py