# Import Libraries

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

# Reading CSV

In [None]:
df = pd.read_csv('../input/zomato-bangalore-restaurants/zomato.csv')
df.head()

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df = df.drop(['url', 'address', 'phone', 'menu_item', 'dish_liked', 'reviews_list'], axis = 1)
df.head()

In [None]:
df.info()

# Duplicates Drop

In [None]:
df.drop_duplicates(inplace = True)
df.shape

# Cleaning Rate Column

In [None]:
df['rate'].unique()

#  "NEW" , "-" and "/5" from Rate Column Remov

In [None]:
def handlerate(value):
    if(value=='NEW' or value=='-'):
        return np.nan
    else:
        value = str(value).split('/')
        value = value[0]
        return float(value)
    
df['rate'] = df['rate'].apply(handlerate)
df['rate'].head()

 Null Values in Rate Column

In [None]:
df['rate'].fillna(df['rate'].mean(), inplace = True)
df['rate'].isnull().sum()

In [None]:
df.info()

# dropping null values

In [None]:
df.dropna(inplace = True)
df.head()

In [None]:
df.rename(columns = {'approx_cost(for two people)':'Cost2plates', 'listed_in(type)':'Type'}, inplace = True)
df.head()

# listed city and loc, both are there

In [None]:
df = df.drop(['listed_in(city)'], axis = 1)

In [None]:
df['Cost2plates'].unique()

In [None]:
def handlecomma(value):
    value = str(value)
    if ',' in value:
        value = value.replace(',', '')
        return float(value)
    else:
        return float(value)
    
df['Cost2plates'] = df['Cost2plates'].apply(handlecomma)
df['Cost2plates'].unique()

In [None]:
df.head()

# clean rest type column

In [None]:
rest_types = df['rest_type'].value_counts(ascending  = False)
rest_types

In [None]:
rest_types_lessthan1000 = rest_types[rest_types<1000]
rest_types_lessthan1000

# less than 1k in frequency 

In [None]:
def handle_rest_type(value):
    if(value in rest_types_lessthan1000):
        return 'others'
    else:
        return value
        
df['rest_type'] = df['rest_type'].apply(handle_rest_type)
df['rest_type'].value_counts()

In [None]:
location = df['location'].value_counts(ascending  = False)

location_lessthan300 = location[location<300]



def handle_location(value):
    if(value in location_lessthan300):
        return 'others'
    else:
        return value
        
df['location'] = df['location'].apply(handle_location)
df['location'].value_counts()

In [None]:
cuisines = df['cuisines'].value_counts(ascending  = False)


cuisines_lessthan100 = cuisines[cuisines<100]



def handle_cuisines(value):
    if(value in cuisines_lessthan100):
        return 'others'
    else:
        return value
        
df['cuisines'] = df['cuisines'].apply(handle_cuisines)
df['cuisines'].value_counts()

In [None]:
df.head()

# Visualization

**plot of various loc**

In [None]:
plt.figure(figsize = (16,10))
ax = sns.countplot(df['location'])
plt.xticks(rotation=90)

# visualizing online order and book table

In [None]:
plt.figure(figsize = (6,6))
sns.countplot(df['online_order'], palette = 'inferno')

In [None]:
plt.figure(figsize = (6,6))
sns.countplot(df['book_table'], palette = 'rainbow')

# online order vs rate
# book table vs rate

In [None]:
plt.figure(figsize = (6,6))
sns.boxplot(x = 'online_order', y = 'rate', data = df)

In [None]:
plt.figure(figsize = (6,6))
sns.boxplot(x = 'book_table', y = 'rate', data = df)

# Online Order Facility, Location Wise

# Book Table Facility, Location Wise

In [None]:
df1 = df.groupby(['location','online_order'])['name'].count()
df1.to_csv('location_online.csv')
df1 = pd.read_csv('location_online.csv')
df1 = pd.pivot_table(df1, values=None, index=['location'], columns=['online_order'], fill_value=0, aggfunc=np.sum)
df1

In [None]:
df1.plot(kind = 'bar', figsize = (12,6))

In [None]:
df2 = df.groupby(['location','book_table'])['name'].count()
df2.to_csv('location_booktable.csv')
df2 = pd.read_csv('location_booktable.csv')
df2 = pd.pivot_table(df2, values=None, index=['location'], columns=['book_table'], fill_value=0, aggfunc=np.sum)
df2

In [None]:
df2.plot(kind = 'bar', figsize = (12,6))

# Group types of restaurents  loc

In [None]:
df3 = df.groupby(['location','Type'])['name'].count()
df3.to_csv('location_Type.csv')
df3 = pd.read_csv('location_Type.csv')
df3 = pd.pivot_table(df3, values=None, index=['location'], columns=['Type'], fill_value=0, aggfunc=np.sum)
df3

In [None]:
df3.plot(kind = 'bar', figsize = (36,8))

In [None]:
df.head()