# Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('dark_background')

# Reading CSV File

In [None]:
df = pd.read_csv('/kaggle/input/zomato-bangalore-restaurants/zomato.csv')
df.head()

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df = df.drop(['url','address','phone','menu_item','dish_liked','reviews_list'], axis = 1)
df.head()

In [None]:
df.info()

# Dropping Duplicates

In [None]:
df.drop_duplicates(inplace = True)
df.shape

# Cleaning Rate Column

In [None]:
df['rate'].unique()

* ### Removing "NEW","-" and "/5" from Rate Column

In [None]:
def handlerate(value):
    if(value=='NEW' or value=='-'):
        return np.nan
    else:
        value = str(value).split('/')
        value = value[0]
        return float(value)
    
df['rate'] = df['rate'].apply(handlerate)
df['rate'].head()

In [None]:
df.rate.isnull().sum()

# Filling Null Values in Rate Column with Mean

In [None]:
df['rate'].fillna(df['rate'].mean(), inplace = True)
df['rate'].isnull().sum()

In [None]:
df.info()

# Dropping null values

In [None]:
df.dropna(inplace = True)
df.head()

In [None]:
df.rename(columns = {'approx_cost(for two people)': 'Cost2plates','listed_in(type)':'Type'}, inplace = True)
df.head()

In [None]:
df['location'].unique()

In [None]:
df['listed_in(city)'].unique()

In [None]:
df = df.drop(['listed_in(city)'], axis = 1)

In [None]:
df['Cost2plates'].unique()

# Removing , from Cost2plates

In [None]:
def handlecomma(value):
    value = str(value)
    if ',' in value:
        value = value.replace(',','')
        return float(value)
    else:
        return float(value)

df['Cost2plates'] = df['Cost2plates'].apply(handlecomma)
df['Cost2plates'].unique()

In [None]:
df.head()

# Cleaning Rest types of the restaurants

In [None]:
df['rest_type'].value_counts()

In [None]:
rest_types = df['rest_type'].value_counts(ascending = False)
rest_types

In [None]:
rest_types_lst1000 = rest_types[rest_types<1000]
rest_types_lst1000

# Making Restaurent types less than 1000 as 'others'

In [None]:
def handle_rest_type(value):
    if(value in rest_types_lst1000):
        return 'others'
    else:
        return value
    
df['rest_type'] = df['rest_type'].apply(handle_rest_type) 
df['rest_type'].value_counts()

In [None]:
df.head()

# Cleaning Location Columns

In [None]:
df['location'].value_counts()

In [None]:
location = df['location'].value_counts(ascending = False)
location_lst300 = location[location<300]

def handle_location(value):
    if(value in location_lst300):
        return 'others'
    else:
        return value
    
df['location'] = df['location'].apply(handle_location)
df['location'].value_counts()

In [None]:
df.head()

# Cleaning Cuisines Data

In [None]:
cuisines = df['cuisines'].value_counts(ascending = False)
cuisines_lst100 = cuisines[cuisines<100]

def cuisines_handle(value):
    if(value in cuisines_lst100):
        return 'others'
    else:
        return value

df['cuisines'] = df['cuisines'].apply(cuisines_handle)
df['cuisines'].value_counts()

In [None]:
df.head()

In [None]:
df['Type'].value_counts()

#### Now the data is clean, let's jump to visualization

# LET'S VISUALIZE THE DATA

* ## **Count plots for various locations**

In [None]:
plt.figure(figsize=(16, 10))
location_counts = df['location'].value_counts()
ax = sns.barplot(x=location_counts.index, y=location_counts.values)
plt.xticks(rotation=90)

## Visualizing online order providing restaurants 

In [None]:
plt.figure(figsize=(6,6))
online = df['online_order'].value_counts()
ax = sns.barplot(x=online.index, y=online.values, palette = 'inferno')

## Visulaizing Book Table 

In [None]:
plt.figure(figsize = (6,6))
book = df['book_table'].value_counts()
sns.barplot(x=book.index,y=book.values, palette = 'rainbow')

## Visualizing Online Order vs Rate 

In [None]:
plt.figure(figsize = (6,6))
sns.boxplot(x = 'online_order', y = 'rate', data = df)

## Visualizing book table vs rate 

In [None]:
plt.figure(figsize =(6,6))
sns.boxplot(x='book_table',y='rate',data=df)

## Visualizing Online order VS Location wise

In [None]:
df1 = df.groupby(['location','online_order'])['name'].count()
df1.to_csv('location_online.csv')
df1 = pd.read_csv('location_online.csv')
df1 = pd.pivot_table(df1, values=None, index=['location'], columns=['online_order'], fill_value=0, aggfunc=np.sum)
df1

In [None]:
df1.plot(kind = 'bar', figsize = (15,8))

## Visualizing Book Table Facility, Location Wise 

In [None]:
df2 = df.groupby(['location','book_table'])['name'].count()
df2.to_csv('location_booktable.csv')
df2 = pd.read_csv('location_booktable.csv')
df2 = pd.pivot_table(df2, values=None, index=['location'], columns=['book_table'], fill_value=0, aggfunc=np.sum)
df2

In [None]:
df2.plot(kind = 'bar', figsize = (15,8))

## Visualizing Types of Restaurants vs Rate 

In [None]:
plt.figure(figsize = (14, 8))
sns.boxplot(x = 'Type', y = 'rate', data = df, palette = 'inferno')

##  Grouping Types of Restaurents, location wise

In [None]:
df3 = df.groupby(['location','Type'])['name'].count()
df3.to_csv('location_Type.csv')
df3 = pd.read_csv('location_Type.csv')
df3 = pd.pivot_table(df3, values=None, index=['location'], columns=['Type'], fill_value=0, aggfunc=np.sum)
df3

In [None]:
df3.plot(kind = 'bar', figsize = (36,8))

##  No. of Votes, Location Wise

In [None]:
df4 = df[['location', 'votes']]
df4.drop_duplicates()
df5 = df4.groupby(['location'])['votes'].sum()
df5 = df5.to_frame()
df5 = df5.sort_values('votes', ascending=False)
df5.head()

In [None]:
plt.figure(figsize = (15,8))
sns.barplot(x=df5.index ,y= df5['votes'])
plt.xticks(rotation = 90)

In [None]:
df.head()

##  Visualizing Top Cuisines

In [None]:
df6 = df[['cuisines', 'votes']]
df6.drop_duplicates()
df7 = df6.groupby(['cuisines'])['votes'].sum()
df7 = df7.to_frame()
df7 = df7.sort_values('votes', ascending=False)
df7.head()

In [None]:
df7 = df7.iloc[1:, :]
df7.head()

In [None]:
plt.figure(figsize = (15,8))
sns.barplot(x=df7.index , y= df7['votes'])
plt.xticks(rotation = 90)