In [58]:
import numpy as np 
import pandas as pd
import Levenshtein
from utils import *

In [3]:
df = pd.read_csv("../data/restaurant_info.csv")
df

Unnamed: 0,restaurantname,pricerange,area,food,phone,addr,postcode
0,saint johns chop house,moderate,west,british,01223 353110,21 - 24 northampton street,c.b 3
1,restaurant alimentum,moderate,north,modern european,01223 413000,152 - 154 hills road,c.b 2
2,pizza hut cherry hinton,moderate,south,italian,01223 323737,g4 cambridge leisure park clifton way cherry h...,c.b 1
3,the gardenia,moderate,west,romanian,01223 356354,2 rose crescent city centre,c.b 2
4,loch fyne,moderate,centre,seafood,01223 362433,the little rose 37 trumpington street,c.b 2
...,...,...,...,...,...,...,...
104,panahar,expensive,centre,mediterranean,,,
105,ugly duckling,expensive,centre,catalan,,12 st. johns street city centre,
106,pizza express fen ditton,moderate,centre,european,,jesus lane fen ditton,
107,sesame restaurant and bar,moderate,,turkish,,,


In [169]:
food_types = list(np.unique(df.food)) + ['greek']
pricerange = np.unique(df.pricerange)
areas = list(np.unique(df.area.fillna('unknown')))
areas.remove("unknown")

print(food_types)
print(pricerange)
print(areas)

['african', 'asian oriental', 'australasian', 'bistro', 'british', 'catalan', 'chinese', 'cuban', 'european', 'french', 'fusion', 'gastropub', 'indian', 'international', 'italian', 'jamaican', 'japanese', 'korean', 'lebanese', 'mediterranean', 'modern european', 'moroccan', 'north american', 'persian', 'polynesian', 'portuguese', 'romanian', 'seafood', 'spanish', 'steakhouse', 'swiss', 'thai', 'traditional', 'turkish', 'tuscan', 'vietnamese', 'greek']
['cheap' 'expensive' 'moderate']
['centre', 'east', 'north', 'south', 'west']


In [209]:
def find_preferences(sentence):
    sentence = sentence.lower().split(' ')
    f, p, a = None, None, None

    # Iterate over sentence
    for i in range(len(sentence)):
        # Discard very short words
        word = str(sentence[i])
        if len(word) < 4:
            continue

        # Check for special food types:
        if Levenshtein.distance(word, "north") < 3 and Levenshtein.distance(str(sentence[i+1]), "american") < 3:
            f = "north american"
        elif Levenshtein.distance(word, "modern") < 3 and Levenshtein.distance(str(sentence[i+1]), "european") < 3:
            f = "modern european"
            
        # Look for exact words or structures
        elif word in food_types and not f:
            f = word
        elif (Levenshtein.distance(word, "food") < 3 or Levenshtein.distance(word, "restaurant") < 3) and not f:
            food = str(sentence[i-1])
            if len(food) < 4:
                continue
            else:
                distances = []
                for j in food_types:
                    distances.append(Levenshtein.distance(food, j))
                if min(distances) < 3:
                    f = food_types[np.argmin(distances)]   
                    
        elif word in areas:
            # Check that is not north american type of food
            a = word
        elif (Levenshtein.distance(word, "area") < 3 or Levenshtein.distance(word, "part") < 3) and not a:
            area = str(sentence[i-1])
            if len(area) < 4:
                continue
            else:
                distances = []
                for j in areas:
                    distances.append(Levenshtein.distance(area, j))
                if min(distances) < 3:                
                    a = areas[np.argmin(distances)]           
                    
        elif word in pricerange:
            p = word
        elif (Levenshtein.distance(word, "price") < 3 or  Levenshtein.distance(word, "restaurant") < 3) and not p:
            price = str(sentence[i-1])
            if len(price) < 4:
                continue
            else:
                distances = []
                for j in pricerange:
                    distances.append(Levenshtein.distance(price, j))
                if min(distances) < 3:                
                    p = pricerange[np.argmin(distances)]

        # Since we have few possible areas and priceranges, check that the word is none of them
        else:
            if not p:
                distances = []
                for j in pricerange:
                    distances.append(Levenshtein.distance(word, j))
                if min(distances) < 3:                
                    p = pricerange[np.argmin(distances)]
            if not a:
                distances = []
                for j in areas:
                    distances.append(Levenshtein.distance(word, j))
                if min(distances) < 3:                
                    a = areas[np.argmin(distances)]
            
    return {"food": f, "price": p, "area":a}


In [210]:
find_preferences(" looking for a moderately priced restaurant in the east part of town")

{'food': None, 'price': 'moderate', 'area': 'east'}

In [211]:
find_preferences("im looking for a cheap restaurant in the east part of town")

{'food': None, 'price': 'cheap', 'area': 'east'}

In [212]:
find_preferences("im looking for a restaurant in the center serving greek food")

{'food': 'greek', 'price': None, 'area': 'centre'}

In [213]:
find_preferences("im looking for gastropub food that is cheap in the east")

{'food': 'gastropub', 'price': 'cheap', 'area': 'east'}

In [214]:
find_preferences("im looking for a moderately priced restaurant that serves turkish food")

{'food': 'turkish', 'price': 'moderate', 'area': None}

In [215]:
find_preferences("how about north american food")

{'food': 'north american', 'price': None, 'area': None}

In [216]:
find_preferences("i want a modern european restaurant")

{'food': 'modern european', 'price': None, 'area': None}