# **Zomato Data Set Analysis and Visualization**


## Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('dark_background')


## Reading CSV

In [None]:
df = pd.read_csv('../input/zomato-bangalore-restaurants/zomato.csv')
df.head()

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df = df.drop(['url', 'address', 'phone', 'menu_item', 'dish_liked', 'reviews_list'], axis = 1)
df.head()

In [None]:
df.info()

In [None]:
df.drop_duplicates(inplace = True)
df.shape

## Dropping Duplicates

## Cleaning Rate Column

In [None]:
df['rate'].unique()

## Removing "NEW" ,  "-" and "/5" from Rate Column

In [None]:
def ratehandle(value):
    if(value == 'NEW' or value =='-'):
        return np.nan
    else:
        value = str(value).split('/')
        value = value[0]
        return float(value)
df['rate'] = df['rate'].apply(ratehandle)
df['rate'].head()

## Filling Null Values in Rate Column with Mean

In [None]:
df['rate'].fillna(df['rate'].mean(), inplace = True)
df['rate'].isnull().sum()

In [None]:
df.info()

## Dropping Null Values

In [None]:
df.dropna(inplace = True)
df.head()

In [None]:
df.rename(columns = {'approx_cost(for two people)': 'Cost_2_plates','listed_in(type)': 'Type'}, inplace = True)
df.head()

In [None]:
df['location'].unique()

## Listed in(city) and location, both are there, lets keep only one.

In [None]:
df.head()

## Removing , from Cost2Plates Column

In [None]:
df['Cost_2_plates'].unique()
        

In [None]:
def commahandle(value):
    value = str(value)
    if ',' in value:
        value = value.replace(',','')
        return float(value)
    else:
        return float(value)
df['Cost_2_plates'] = df['Cost_2_plates'].apply(commahandle)
df['Cost_2_plates'].unique()

In [None]:
df.head()

## Cleaning Rest Type Column

In [None]:
df['rest_type'].value_counts()

In [None]:
rest_types = df['rest_type'].value_counts(ascending = False)
rest_types

In [None]:
rest_types_lessthan1000 = rest_types[rest_types<1000]
rest_types_lessthan1000

## Making Rest Types less than 1000 in frequency as others

In [None]:
def resttypehandle(value):
    if(value in rest_types_lessthan1000):
        return 'others'
    else:
        return value
    
df['rest_type'] = df['rest_type'].apply(resttypehandle)
df['rest_type'].value_counts()
        

In [None]:
df.head()

## Cleaning Location Column

In [None]:
df['location'].value_counts()

In [None]:
location = df['location'].value_counts(ascending = False)

location_lessthan500 = location[location < 500]

def locationhandle(value):
    if (value in location_lessthan500):
        return 'others'
    else:
        return value
    
df['location'] = df['location'].apply(locationhandle)
df['location'].value_counts()


## Cleaning Cuisines Column

In [None]:
df['cuisines'].value_counts()


In [None]:
cuisines = df['cuisines'].value_counts(ascending = False)

cuisine_lessthan100 = cuisines[cuisines < 100]

def cuisinehandle(value):
    if (value in cuisine_lessthan100):
        return 'others'
    else:
        return value
    
df['cuisines'] = df['cuisines'].apply(cuisinehandle)
df['cuisines'].value_counts()

In [None]:
df.head()

In [None]:
df['Type'].value_counts()

## **Data is Clean, Lets jump to Visualization**

## Count Plot of Various Locations

In [None]:
plt.figure(figsize =(16,15))
ax = sns.countplot(df['location'])
plt.xticks(rotation = 90)

In [None]:
plt.figure(figsize =(7,10))
ax = sns.countplot(df['Type'])
plt.xticks(rotation = 90)

## Visualizing Online Order

In [None]:
plt.figure(figsize =(6,9))
ax = sns.countplot(df['online_order'], palette = 'inferno')


## Visualizing Book Table

In [None]:
plt.figure(figsize =(6,9))
ax = sns.countplot(df['book_table'], palette = 'inferno')

## Visualizing Online Order vs Rate

In [None]:
plt.figure(figsize =(6,9))
sns.boxplot(x = 'online_order', y ='rate', data = df)

## Visualizing Book Table vs Rate

In [None]:
plt.figure(figsize =(6,9))
sns.boxplot(x = 'book_table', y ='rate', data = df)

## Visualizing Online Order Facility, Location Wise

In [None]:
df1 = df.groupby(['location', 'online_order'])['name'].count()
df1.to_csv('location_online.csv')
df1 = pd.read_csv('location_online.csv')
df1 = pd.pivot_table(df1, values = None, index = ['location'], columns = ['online_order'], fill_value = 0 , aggfunc = np.sum )
df1

In [None]:
df1.plot(kind = 'bar', figsize =(15,9))

## Visualizing Book Table Facility, Location Wise

In [None]:
df2 = df.groupby(['location', 'book_table'])['name'].count()
df2.to_csv('location_booktable.csv')
df2 = pd.read_csv('location_booktable.csv')
df2 = pd.pivot_table(df2, values = None, index = ['location'], columns = ['book_table'], fill_value = 0 , aggfunc = np.sum )
df2

In [None]:
df2.plot(kind = 'bar', figsize =(15,9))

## Visualizing Types of Restaurents vs Rate 

In [None]:
df.head()

In [None]:
plt.figure(figsize =(6,9))
ax = sns.countplot(df['Type'], palette = 'inferno')
plt.xticks(rotation =90)

In [None]:
plt.figure(figsize =(14,8))
sns.boxplot(x = 'Type', y ='rate', data = df, palette ='inferno')
plt.xticks(rotation =90)

## Grouping Types of Restaurents, location wise

In [None]:
df3 = df.groupby(['location', 'Type'])['name'].count()
df3.to_csv('location_restype.csv')
df3 = pd.read_csv('location_restype.csv')
df3 = pd.pivot_table(df3, values = None, index = ['location'], columns = ['Type'], fill_value = 0 , aggfunc = np.sum )
df3

In [None]:
df3.plot(kind = 'bar', figsize = (36,8))

## No. of Votes, Location Wise

In [None]:
df4 = df[['location', 'votes']]
df4.drop_duplicates()
df5 = df4.groupby(['location'])['votes'].sum()
df5 = df5.to_frame()
df5 = df5.sort_values('votes', ascending = False)
df5

In [None]:
plt.figure(figsize = (16,9))
sns.barplot(df5.index,df5['votes'])
plt.xticks(rotation = 90)


## Visualizing Top Cuisines

In [None]:
df.head()

In [None]:
plt.figure(figsize =(16,10))
ax = sns.countplot(df['cuisines'], palette = 'rainbow')
plt.xticks(rotation =90)