In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_json('file1.json')

In [3]:
df['restaurants'].iloc[0][0]['restaurant'].keys()

dict_keys(['has_online_delivery', 'photos_url', 'url', 'price_range', 'apikey', 'user_rating', 'R', 'name', 'cuisines', 'is_delivering_now', 'deeplink', 'menu_url', 'average_cost_for_two', 'book_url', 'switch_to_order_menu', 'offers', 'has_table_booking', 'location', 'featured_image', 'zomato_events', 'currency', 'id', 'thumb', 'establishment_types', 'events_url'])

In [4]:
zomato = pd.read_csv('zomato.csv', encoding = 'ISO-8859-1')

In [5]:
zomato.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [6]:
zomato.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

In [26]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re

class CSVNotFoundException(Exception):
    pass

class InvalidRangeException(Exception):
    pass

class ContentFiltering(object):
    
    def __init__(self, csvPath, countryPath):
        self.dataset = self.get_data_csv(csvPath)
        self.countryCode = self.get_country_csv(countryPath)
        self.create_cuisine_bag()   
    
    def filter_online_delivery(self, data, param):
        data = data[data['Has Online delivery'] == param]
        return data
    
    def filter_table_booking(self, data, param):
        data = data[data['Has Table booking'] == param]
        return data
    
    def filter_price_range(self, data, lower_range = 1, upper_range = 5):
        
        if (type(data) != pd.DataFrame):
            raise(TypeError('Not a dataframe'))
            
        if not (lower_range >= 1 and upper_range <= 5):
            raise(InvalidRangeException('only ranging from 1 to 5'))
        mask1 = data['Price range'] >= lower_range
        mask2 = data['Price range'] <= upper_range
        data =  data[mask1 & mask2]
        return data
    
    def filter_avg_cost(self, dataset, lower_range = 0, upper_range= 800000):
        if (type(data) != pd.DataFrame):
            raise(TypeError('Not a dataframe'))
            
        mask1 = data['Average Cost for two'] >= lower_range
        mask2 = data['Average Cost for two'] <= upper_range
        data =  data[mask1 & mask2]
        return data
    
    
    def country_filter(self, country):
        if type(country) != str:
            raise(TypeError('Country Name is a string'))
        code = self.countryCode[self.countryCode['country'] == country.lower()]['code'][0]
        print (code)
        data = self.dataset[self.dataset['Country Code'] == code]
        return data
    
    def get_country_csv(self, csvPath):

        if not (os.path.exists(csvPath)):
            raise(FileNotFoundError('{} is not found'.format(csvPath)))

        if not (os.path.isfile(csvPath)):
            raise(IsADirectoryError('{} is not a file'.format(csvPath)))
        
        if (not csvPath.endswith('.csv')):
            raise(CSVNotFoundException('{} is not a csv'.format(csvPath)))
        
        country_code = pd.read_csv(csvPath, encoding ='ISO-8859-1')
        country_code['country'] = country_code['country'].apply(lambda x: x.lower())
        return country_code
        
    def get_data_csv(self, csvPath):
        if not (os.path.exists(csvPath)):
            raise(FileNotFoundError('{} is not found'.format(csvPath)))

        if not (os.path.isfile(csvPath)):
            raise(IsADirectoryError('{} is not a file'.format(csvPath)))
        
        if (not csvPath.endswith('.csv')):
            raise(CSVNotFoundException('{} is not a csv'.format(csvPath)))
        
        
        print ('Reading data from {}'.format(csvPath))
        dataset = pd.read_csv(csvPath, encoding = 'ISO-8859-1')
        return dataset
    

    def create_cuisine_bag(self):
        slice_strip = lambda x: [y.lower().strip() for y in x] if type(x) != float else x
        bag = set([y for x in self.dataset['Cuisines'].str.split(',').apply(slice_strip) if type(x) != float for y in x ])
        self.cuisine_bag = list(bag)
        
    def filter_cuisine(self, cuisine_list, dataset = None):
        bag = []
        if type(cuisine_list) != list:
            cuisine_list = [cuisine_list]
        print (cuisine_list)
        for l in cuisine_list:
            print (l)
            bag.extend([x for x in filter(lambda x: l in x, self.cuisine_bag)])
        bag_exp = '|'.join([x for x in bag])
        bag_exp = '(' + bag_exp + ')'
        print (bag_exp)
        if (type(dataset) == None):
            dataset = self.dataset
        mask1 = dataset['Cuisines'].apply(lambda x: x.find(bag_exp) != None)
        data = dataset[mask1]
        #print (data)
        return data
    
    def filter_city(self, city):
        if city == None:
            raise(TypeError('City is not present'))
        mask = self.dataset['City'].apply(lambda x: city.lower() in x.lower())
        data = self.dataset[mask]
        #print (data)
        return data
    
    def filter_rating(self, dataset, min_rating):
        
        if type(min_rating) is not float:
            raise(TypeError('Only float values allowed'))
        elif not (min_rating >= 1) and (min_rating <= 5):
            raise(InvalidRangeException('Only from 1 to 5'))
        
        mask = dataset['Aggregate rating'] >= min_rating
        data = dataset[mask]
        return data

In [43]:
def filter_data(country = None, city = None, cuisine = None, rating = 1, price_min_rate = 1, price_max_rate = 5, table_book = 'No', online_order = 'No'):
    
    clb = CollaborativeFilter('zomato.csv', 'Country-code.csv')
    if type(country) == str:
        print(country)
        data = clb.country_filter(country)
    elif type(city) == str:
        data = clb.filter_city(city)
    else:
        data = clb.dataset
    data = clb.filter_cuisine(cuisine, data)
    data = clb.filter_online_delivery(data, online_order)
    data = clb.filter_table_booking(data, table_book)
    data = clb.filter_rating(data, rating)
    if (price_min_rate > price_max_rate):
        raise(InvalidRangeException('Lower range should be less than upper range'))
        
    if price_min_rate >= 1 and price_max_rate <= 5:
        data = clb.filter_price_range(data, price_min_rate, price_max_rate)
    elif price_min_rate < 5:
        data = clb.filter_price_range(data, price_min_rate, 5)
    elif price_max_rate <= 5:
        data = clb.filter_price_range(data, 1, price_max_rate)
    else:
        data = clb.filter_avg_cost(data, price_min_rate, price_max_rate)
    
    return data

In [8]:
clb = ContentFiltering('zomato.csv', 'Country-code.csv')

Reading data from zomato.csv


In [9]:
data = clb.country_filter('india')
b = clb.filter_price_range(data, 1, 4)

1


In [10]:
clb.filter_cuisine(['cafe', 'gujarati'], data)

['cafe', 'gujarati']
cafe
gujarati
(cafe|restaurant cafe|gujarati)
      Restaurant ID                           Restaurant Name  Country Code  \
624         3400025                                Jahanpanah             1   
625         3400341                       Rangrezz Restaurant             1   
626         3400005                   Time2Eat - Mama Chicken             1   
627         3400021     Chokho Jeeman Marwari Jain Bhojanalya             1   
628         3400017                            Pinch Of Spice             1   
629         3400325                                 MoMo Cafe             1   
630         3400059                     Peshawri - ITC Mughal             1   
631         3400060                     Taj Bano - ITC Mughal             1   
632         3400348                                    G Thal             1   
633         3400072              Dawat-e-Nawab - Radisson Blu             1   
634         3400073               The Latitude - Radisson Blu   

In [11]:
clb.filter_city('delhi')

      Restaurant ID             Restaurant Name  Country Code       City  \
2560       18287358                  Food Cloud             1  New Delhi   
2561       18216944                   Burger.in             1  New Delhi   
2562         313333             Days of the Raj             1  New Delhi   
2563       18384127              Dilli Ka Dhaba             1  New Delhi   
2564            582                   Govardhan             1  New Delhi   
2565       18414465              Mezbaan Grills             1  New Delhi   
2566         304243                  Say Cheese             1  New Delhi   
2567           3554                      Southy             1  New Delhi   
2568       18369872                     Monosoz             1  New Delhi   
2569            948                       Waves             1  New Delhi   
2570           2853                Delhi Darbar             1  New Delhi   
2571       18433900                     Chateau             1  New Delhi   
2572       1

In [12]:
clb.filter_rating(data, 4.2)

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
628,3400017,Pinch Of Spice,1,Agra,"23/453, Opposite Sanjay Cinema, Wazipura Road,...",Civil Lines,"Civil Lines, Agra",78.007553,27.201725,"North Indian, Chinese, Mughlai",...,Indian Rupees(Rs.),No,No,No,No,3,4.2,Green,Very Good,177
630,3400059,Peshawri - ITC Mughal,1,Agra,"ITC Mughal, Fatehabad Road, Tajganj, Agra","ITC Mughal, Tajganj","ITC Mughal, Tajganj, Agra",78.044095,27.160934,"North Indian, Mughlai",...,Indian Rupees(Rs.),No,No,No,No,4,4.3,Green,Very Good,133
637,3400346,Sheroes Hangout,1,Agra,"Opposite The Gateway Hotel, Fatehabad Road, Ta...",Tajganj,"Tajganj, Agra",78.040165,27.161850,"Cafe, North Indian, Chinese",...,Indian Rupees(Rs.),No,No,No,No,1,4.9,Dark Green,Excellent,77
641,3400105,Pizza Hut,1,Agra,"8, Handicraft Nagar, Fatehabad Road, Tajganj, ...",Tajganj,"Tajganj, Agra",78.034714,27.161694,"Italian, Pizza",...,Indian Rupees(Rs.),No,No,No,No,2,4.4,Green,Very Good,134
642,3400326,Tea'se Me - Rooftop Tea Boutique,1,Agra,"Near Purani Mandi Crossing,Fatehabad Road, Taj...",Tajganj,"Tajganj, Agra",0.000000,0.000000,"Chinese, Italian, Continental, North Indian",...,Indian Rupees(Rs.),No,No,No,No,3,4.2,Green,Very Good,166
644,111895,650 - The Global Kitchen,1,Ahmedabad,"Shreekunj Mandapam, Beside Golden Tulip Bunglo...",Ambavadi,"Ambavadi, Ahmedabad",72.537574,23.010451,"Chinese, Italian, North Indian, Mexican, Medit...",...,Indian Rupees(Rs.),No,No,No,No,3,4.2,Green,Very Good,1582
646,18396250,Huber & Holly,1,Ahmedabad,"7 B, Circle B, Opposite Rajpath Club, Sarkhej...",Bodakdev,"Bodakdev, Ahmedabad",72.512395,23.038311,"Ice Cream, Desserts, Continental",...,Indian Rupees(Rs.),No,Yes,No,No,1,4.5,Dark Green,Excellent,217
648,113433,Fozzie's Pizzaiolo,1,Ahmedabad,"Ground Floor, Maruti Crystal, Opposite Rajpath...",Bodakdev,"Bodakdev, Ahmedabad",72.509806,23.033069,"Pizza, Italian, Beverages, Desserts",...,Indian Rupees(Rs.),No,Yes,No,No,3,4.3,Green,Very Good,731
649,18438909,La Pino'z Pizza,1,Ahmedabad,"Shop 10, Circle B, Nyay Marg, Bodakdev, Ahmed...",Bodakdev,"Bodakdev, Ahmedabad",72.512487,23.038231,"Pizza, Italian",...,Indian Rupees(Rs.),No,Yes,No,No,2,4.4,Green,Very Good,113
650,18143128,Mocha,1,Ahmedabad,"6-9, Ground Floor, Devashish Business Park, Op...",Bodakdev,"Bodakdev, Ahmedabad",72.511307,23.031851,"Cafe, Continental, Desserts",...,Indian Rupees(Rs.),No,Yes,No,No,3,4.4,Green,Very Good,944


In [13]:
zomato.iloc[0]

Restaurant ID                                                     6317637
Restaurant Name                                          Le Petit Souffle
Country Code                                                          162
City                                                          Makati City
Address                 Third Floor, Century City Mall, Kalayaan Avenu...
Locality                        Century City Mall, Poblacion, Makati City
Locality Verbose        Century City Mall, Poblacion, Makati City, Mak...
Longitude                                                         121.028
Latitude                                                          14.5654
Cuisines                                       French, Japanese, Desserts
Average Cost for two                                                 1100
Currency                                                 Botswana Pula(P)
Has Table booking                                                     Yes
Has Online delivery                   

In [14]:
zomato.shape

(9551, 21)

In [15]:
## Type of cities in dataset
cities = zomato['City'].unique()
cities

array(['Makati City', 'Mandaluyong City', 'Pasay City', 'Pasig City',
       'Quezon City', 'San Juan City', 'Santa Rosa', 'Tagaytay City',
       'Taguig City', 'Brasí_lia', 'Rio de Janeiro', 'Sí£o Paulo',
       'Albany', 'Armidale', 'Athens', 'Augusta', 'Balingup',
       'Beechworth', 'Boise', 'Cedar Rapids/Iowa City', 'Chatham-Kent',
       'Clatskanie', 'Cochrane', 'Columbus', 'Consort', 'Dalton',
       'Davenport', 'Des Moines', 'Dicky Beach', 'Dubuque',
       'East Ballina', 'Fernley', 'Flaxton', 'Forrest', 'Gainesville',
       'Hepburn Springs', 'Huskisson', 'Inverloch', 'Lakes Entrance',
       'Lakeview', 'Lincoln', 'Lorn', 'Macedon', 'Macon', 'Mayfield',
       'Mc Millan', 'Middleton Beach', 'Miller', 'Monroe', 'Montville',
       'Ojo Caliente', 'Orlando', 'Palm Cove', 'Paynesville', 'Penola',
       'Pensacola', 'Phillip Island', 'Pocatello', 'Potrero', 'Princeton',
       'Rest of Hawaii', 'Savannah', 'Singapore', 'Sioux City',
       'Tampa Bay', 'Tanunda', 'Trentha

In [17]:
## Type of cuisines in the dataset
clb.cuisine_bag

['finger food',
 'street food',
 'indian',
 'nepalese',
 'teriyaki',
 'burmese',
 'belgian',
 'afghani',
 'gourmet fast food',
 'moroccan',
 'filipino',
 'cuban',
 'indonesian',
 'goan',
 'healthy food',
 'mithai',
 'maharashtrian',
 'peranakan',
 'gujarati',
 'cafe',
 'peruvian',
 'seafood',
 'bar food',
 'beverages',
 'brazilian',
 'biryani',
 'drinks only',
 'european',
 'north indian',
 'german',
 'british',
 'kashmiri',
 'italian',
 'burger',
 'asian fusion',
 'charcoal grill',
 'caribbean',
 'dim sum',
 'cuisine varies',
 'breakfast',
 'naga',
 'mediterranean',
 'coffee and tea',
 'hawaiian',
 'sushi',
 'salad',
 'fish and chips',
 'raw meats',
 'chinese',
 'malwani',
 'tapas',
 'restaurant cafe',
 'latin american',
 'mangalorean',
 'sandwich',
 'fast food',
 'japanese',
 'bubble tea',
 'patisserie',
 'greek',
 'fusion',
 'ice cream',
 'western',
 'singaporean',
 'spanish',
 'american',
 'kebab',
 'soul food',
 'izgara',
 'parsi',
 'sunda',
 'pizza',
 'contemporary',
 'tea',
 'so

In [20]:
# Has Online Booking type 
clb.dataset['Has Online delivery'].unique()

array(['No', 'Yes'], dtype=object)

In [21]:
# Has Table Booking type 
clb.dataset['Has Table booking'].unique()

array(['Yes', 'No'], dtype=object)

In [22]:
clb.dataset['Rating text'].unique()

array(['Excellent', 'Very Good', 'Good', 'Average', 'Not rated', 'Poor'],
      dtype=object)

In [28]:
clb.dataset['Rating color'].unique()

array(['Dark Green', 'Green', 'Yellow', 'Orange', 'White', 'Red'],
      dtype=object)

In [42]:
filter_data(country = 'india', cuisine = 'gujarati', rating = 4, price_min_rate= 3, price_max_rate= 890)

Reading data from zomato.csv
india
1
['gujarati']
gujarati
Reading data from zomato.csv


TypeError: Country Name is a string

In [34]:
clb.country_filter('india')

1


Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
624,3400025,Jahanpanah,1,Agra,"E 23, Shopping Arcade, Sadar Bazaar, Agra Cant...",Agra Cantt,"Agra Cantt, Agra",78.011544,27.161661,"North Indian, Mughlai",...,Indian Rupees(Rs.),No,No,No,No,3,3.9,Yellow,Good,140
625,3400341,Rangrezz Restaurant,1,Agra,"E-20, Shopping Arcade, Sadar Bazaar, Agra Cant...",Agra Cantt,"Agra Cantt, Agra",0.000000,0.000000,"North Indian, Mughlai",...,Indian Rupees(Rs.),No,No,No,No,2,3.5,Yellow,Good,71
626,3400005,Time2Eat - Mama Chicken,1,Agra,"Main Market, Sadar Bazaar, Agra Cantt, Agra",Agra Cantt,"Agra Cantt, Agra",78.011608,27.160832,North Indian,...,Indian Rupees(Rs.),No,No,No,No,2,3.6,Yellow,Good,94
627,3400021,Chokho Jeeman Marwari Jain Bhojanalya,1,Agra,"1/48, Delhi Gate, Station Road, Raja Mandi, Ci...",Civil Lines,"Civil Lines, Agra",77.998092,27.195928,Rajasthani,...,Indian Rupees(Rs.),No,No,No,No,2,4.0,Green,Very Good,87
628,3400017,Pinch Of Spice,1,Agra,"23/453, Opposite Sanjay Cinema, Wazipura Road,...",Civil Lines,"Civil Lines, Agra",78.007553,27.201725,"North Indian, Chinese, Mughlai",...,Indian Rupees(Rs.),No,No,No,No,3,4.2,Green,Very Good,177
629,3400325,MoMo Cafe,1,Agra,"Courtyard by Marriott Agra, Phase 2, Fatehabad...","Courtyard by Marriott Agra, Tajganj","Courtyard by Marriott Agra, Tajganj, Agra",0.000000,0.000000,"North Indian, European",...,Indian Rupees(Rs.),No,No,No,No,4,4.0,Green,Very Good,45
630,3400059,Peshawri - ITC Mughal,1,Agra,"ITC Mughal, Fatehabad Road, Tajganj, Agra","ITC Mughal, Tajganj","ITC Mughal, Tajganj, Agra",78.044095,27.160934,"North Indian, Mughlai",...,Indian Rupees(Rs.),No,No,No,No,4,4.3,Green,Very Good,133
631,3400060,Taj Bano - ITC Mughal,1,Agra,"ITC Mughal, Fatehabad Road, Tajganj, Agra","ITC Mughal, Tajganj","ITC Mughal, Tajganj, Agra",78.044095,27.160934,Mughlai,...,Indian Rupees(Rs.),No,No,No,No,4,4.0,Green,Very Good,41
632,3400348,G Thal,1,Agra,"3/20, KPS Tower, Near Tulsi Talkies, Bypass Ro...",Khandari,"Khandari, Agra",0.000000,0.000000,"Rajasthani, Gujarati, Mughlai",...,Indian Rupees(Rs.),No,No,No,No,3,3.6,Yellow,Good,59
633,3400072,Dawat-e-Nawab - Radisson Blu,1,Agra,"Radisson Blu, Taj East Gate Road, Tajganj, Agra","Radisson Blu, Tajganj","Radisson Blu, Tajganj, Agra",78.057044,27.163303,"North Indian, Mughlai",...,Indian Rupees(Rs.),No,No,No,No,4,3.8,Yellow,Good,46


In [15]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re


class CSVNotFoundException(Exception):
    pass


class InvalidRangeException(Exception):
    pass


class CollaborativeFilter(object):

    def __init__(self, csvPath, countryPath):
        self.dataset = self.get_data_csv(csvPath)
        self.countryCode = self.get_country_csv(countryPath)
        self.create_cuisine_bag()

    def filter_online_delivery(self, data, param):
        data = data[data['Has Online delivery'] == param]
        return data

    def filter_table_booking(self, data, param):
        data = data[data['Has Table booking'] == param]
        return data

    def filter_price_range(self, data, lower_range=1, upper_range=5):

        if (type(data) != pd.DataFrame):
            raise (TypeError('Not a dataframe'))

        if not (lower_range >= 1 and upper_range <= 5):
            raise (InvalidRangeException('only ranging from 1 to 5'))
        mask1 = data['Price range'] >= lower_range
        mask2 = data['Price range'] <= upper_range
        data = data[mask1 & mask2]
        return data

    def filter_avg_cost(self, dataset, lower_range=0, upper_range=800000):
        mask1 = data['Average Cost for two'] >= lower_range
        mask2 = data['Average Cost for two'] <= upper_range
        data = data[mask1 & mask2]
        return data

    def country_filter(self, country):
        if type(country) != str:
            raise (TypeError('Country Name is a string'))
        code = self.countryCode[self.countryCode['country'] == country.lower()]['code'][0]
        print(code)
        data = self.dataset[self.dataset['Country Code'] == code]
        return data

    def get_country_csv(self, csvPath):

        if not (os.path.exists(csvPath)):
            raise (FileNotFoundError('{} is not found'.format(csvPath)))

        if not (os.path.isfile(csvPath)):
            raise (IsADirectoryError('{} is not a file'.format(csvPath)))

        if (not csvPath.endswith('.csv')):
            raise (CSVNotFoundException('{} is not a csv'.format(csvPath)))

        country_code = pd.read_csv(csvPath, encoding='ISO-8859-1')
        country_code['country'] = country_code['country'].apply(lambda x: x.lower())
        return country_code

    def get_data_csv(self, csvPath):
        if not (os.path.exists(csvPath)):
            raise (FileNotFoundError('{} is not found'.format(csvPath)))

        if not (os.path.isfile(csvPath)):
            raise (IsADirectoryError('{} is not a file'.format(csvPath)))

        if (not csvPath.endswith('.csv')):
            raise (CSVNotFoundException('{} is not a csv'.format(csvPath)))

        print('Reading data from {}'.format(csvPath))
        dataset = pd.read_csv(csvPath, encoding='ISO-8859-1')
        return dataset

    def create_cuisine_bag(self):
        slice_strip = lambda x: [y.lower().strip() for y in x] if type(x) != float else x
        bag = set(
            [y for x in self.dataset['Cuisines'].str.split(',').apply(slice_strip) if type(x) != float for y in x])
        self.cuisine_bag = list(bag)

    def filter_cuisine(self, cuisine_list, dataset=None):
        bag = []
        if type(cuisine_list) != list:
            cuisine_list = [cuisine_list]
        #print(cuisine_list)
        for l in cuisine_list:
            #print(l)
            bag.extend([x for x in filter(lambda x: l in x, self.cuisine_bag)])
        bag_exp = '|'.join([x for x in bag])
        bag_exp = '(' + bag_exp + ')'
        #print(bag_exp)
        if (type(dataset) == None):
            dataset = self.dataset
        mask1 = dataset['Cuisines'].apply(lambda x: x.find(bag_exp) != None)
        data = dataset[mask1]
        # print (data)
        return data

    def filter_city(self, city):
        if city == None:
            raise (TypeError('City is not present'))
        mask = self.dataset['City'].apply(lambda x: city.lower() in x.lower())
        data = self.dataset[mask]
        # print (data)
        return data

    def filter_rating(self, dataset, min_rating):

        if type(min_rating) is not float:
            raise (TypeError('Only float values allowed'))
        elif not (min_rating >= 1) and (min_rating <= 5):
            raise (InvalidRangeException('Only from 1 to 5'))

        mask = dataset['Aggregate rating'] >= min_rating
        data = dataset[mask]
        return data


def filter_data(country=None, city=None, cuisine=None, rating=1, price_min_rate=1, price_max_rate=5, table_book='No',
                online_order='No'):
    clb = CollaborativeFilter('zomato.csv', 'Country-code.csv')
    if type(country) == str:
        print(country)
        data = clb.country_filter(country)
    elif type(city) == str:
        data = clb.filter_city(city)
    else:
        data = clb.dataset
    data = clb.filter_cuisine(cuisine, data)
    data = clb.filter_online_delivery(data, online_order)
    data = clb.filter_table_booking(data, table_book)
    data = clb.filter_rating(data, float(rating))
    if (price_min_rate > price_max_rate):
        raise (InvalidRangeException('Lower range should be less than upper range'))

    if price_min_rate >= 1 and price_max_rate <= 5:
        data = clb.filter_price_range(data, price_min_rate, price_max_rate)
    elif price_min_rate < 5:
        data = clb.filter_price_range(data, price_min_rate, 5)
    elif price_max_rate <= 5:
        data = clb.filter_price_range(data, 1, price_max_rate)
    else:
        data = clb.filter_avg_cost(data, price_min_rate, price_max_rate)

    return data

In [19]:

data = filter_data(city = 'agra', cuisine = ['gujarati'], rating = 4.5, price_min_rate= 1, price_max_rate= 5000)

Reading data from zomato.csv


In [20]:
data.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
637,3400346,Sheroes Hangout,1,Agra,"Opposite The Gateway Hotel, Fatehabad Road, Ta...",Tajganj,"Tajganj, Agra",78.040165,27.16185,"Cafe, North Indian, Chinese",...,Indian Rupees(Rs.),No,No,No,No,1,4.9,Dark Green,Excellent,77
