In [1]:
# Setting up the workplace
import pandas as pd

# Importing functions with updated names
from functions import (
    description_cleaner, 
    vocabulary_creator, 
    reverse_index_creator, 
    compute_TF, 
    compute_IDF, 
    compute_TF_IDF,
    drop_down_menu,
    extract_facilities
)
# Importing engines
from engine import non_ranked_engine, ranked_engine, upgraded_ranked_engine
from jupyter_ui_poll import ui_events
import time
restaurants_df = pd.read_csv("restaurants_i.tsv", sep="\t", header = 0)

### ***Main preprocessing***

In [2]:
# Extract the 'description' column from the DataFrame, which contains restaurant descriptions
restournats_descriptions = [description for description in restaurants_df.description]

# Clean and preprocess the restaurant descriptions using the custom description_cleaner function
parsed_descriptions = description_cleaner(restournats_descriptions)

# Create a vocabulary and convert descriptions into numerical IDs using the vocabulary_creator function
# The function returns a list of IDs for each description and a vocabulary dictionary
ID_descritpion, vocabulary = vocabulary_creator(parsed_descriptions)

# Create a reverse index from the numerical IDs of words to document IDs using reverse_index_creator
reverse_index = reverse_index_creator(ID_descritpion)

# Save the vocabulary (word-to-ID mapping) to a CSV file for future use
pd.Series(vocabulary).to_csv("vocabulary.csv", index=True, encoding="utf-8", header=False)


### ***Not ranked serch engine***

In [3]:
# Initialize a flag (done) to control the while loop for searching
done = False

# Example query for testing the non-ranked search engine
sample_input = "modern seasonal cuisine"
top_k_to_print = 5

# Loop for querying the search engine until the search is completed only usefull in case of custom serch
while (not done):
    # Uncomment the next line to enable user input
    # sample_input = input("what do you want to eat?\n")
    
    # Call the non_ranked_engine function with the input query, the restaurant DataFrame,
    # the vocabulary, and the reverse index to find matching restaurants
    done = non_ranked_engine(sample_input, restaurants_df, vocabulary, reverse_index, top_k_to_print)

We found 41 matches!

╭─────────────────────────┬─────────────────────────┬───────────────────────────┬───────────────────────────╮
│ Restaurant Name         │ Address                 │ Description               │ Website                   │
├─────────────────────────┼─────────────────────────┼───────────────────────────┼───────────────────────────┤
│ Mima                    │ via Madonnelle 9        │ You’ll be won over by the │ http://www.domo20.com/res │
│                         │                         │ seasonal Mediterranea...  │ taurant                   │
├─────────────────────────┼─────────────────────────┼───────────────────────────┼───────────────────────────┤
│ Materia | Spazio Cucina │ via Teatro Massimo 29   │ The entrance to this      │ https://www.materiaspazio │
│                         │                         │ restaurant is typical of  │ cucina.it/                │
│                         │                         │ a...                      │                 

### ***Computing TF-IDF***

In [4]:
TF_by_restournats = compute_TF(ID_descritpion)
total_documents = len(ID_descritpion)
IDF_by_words = compute_IDF(reverse_index, total_documents)

reverse_index_tf_idf = compute_TF_IDF(TF_by_restournats, IDF_by_words)


### ***Ranked serch engine***

In [5]:
# Initialize a flag (done) to control the while loop for searching
done = False

# Example query for testing the non-ranked search engine
sample_input = "modern seasonal cuisine"
top_k_to_print = 3
# Loop for querying the search engine until the search is completed, only usefull in case of custom serch
while (done == False):
    # Uncomment the next line to enable user input
    # sample_input = input("what do you want to eat?\n")
    top_rest = ranked_engine(sample_input, restaurants_df, vocabulary, reverse_index_tf_idf, IDF_by_words, top_k_to_print)
    done = True



╭─────────────────────────┬─────────────────────────┬───────────────────────────┬───────────────────────────┬──────────╮
│ Restaurant Name         │ Address                 │ Description               │ Website                   │   Cosine │
├─────────────────────────┼─────────────────────────┼───────────────────────────┼───────────────────────────┼──────────┤
│ Saporium                │ località Palazzetto 110 │ Saporium is the new fine- │ http://www.saporium.com/i │        1 │
│                         │                         │ dining restaurant at t... │ t/borgo-santo.pietro/     │          │
├─────────────────────────┼─────────────────────────┼───────────────────────────┼───────────────────────────┼──────────┤
│ Le Vie del Borgo        │ via alla Piazza 6       │ Le Vie del Borgo is       │ https://www.leviedelborgo │        1 │
│                         │                         │ situated in a restored    │ guesthouse.it/            │          │
│                         │     

### ***Verifing cosine similarty computation***

To verify if the cosine similarity is computed correctly we can querry the description of a restournat and it should return 1 as the cosine similarity with a huge gap to the second place

##### description from restournat L'Acciuga :
Anchovies, king prawns, cuttlefish and freshly caught fish are just some of the options available in this restaurant specialising in fish and seafood, where guests will be delighted by the fresh flavours of the ingredients. The small wine list, which also includes a few non-Italian labels, offers good value for money. The maritime-style decor evokes the interior of an old ship, while the warm welcome is typical of the region. All in all, an excellent choice!

In [6]:
# Specifc querry for the ranked serch engine, since there are two l'Acciuga we specify the one to pick
sample_input = restaurants_df[restaurants_df.restaurantName == "L'Acciuga"].description.iloc[0]
# Loop for querying the search engine until the search is completed
top_k_to_print = 3
done = ranked_engine(sample_input, restaurants_df, vocabulary, reverse_index_tf_idf, IDF_by_words, top_k_to_print)

╭───────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬──────────╮
│ Restaurant Name   │ Address                   │ Description               │ Website                   │   Cosine │
├───────────────────┼───────────────────────────┼───────────────────────────┼───────────────────────────┼──────────┤
│ L'Acciuga         │ viale Francesco Baracca   │ Anchovies, king prawns,   │ https://lacciugaosteria.e │ 1        │
│                   │ 74                        │ cuttlefish and freshly    │ atbu.com/?lang=it         │          │
│                   │                           │ ...                       │                           │          │
├───────────────────┼───────────────────────────┼───────────────────────────┼───────────────────────────┼──────────┤
│ Cetaria           │ piazza della Repubblica 9 │ This beautiful restaurant │ https://www.cetariaristor │ 0.400065 │
│                   │                           │ inland from Sa

### ***Upgraded raked engine***

In [7]:

facilities = extract_facilities(restaurants_df["facilitiesServices"])
cusine_types = extract_facilities(restaurants_df["cuisineType"])


top_result, restoraunt_cost = upgraded_ranked_engine(facilities, cusine_types, vocabulary, IDF_by_words, reverse_index_tf_idf, restaurants_df)
restoraunt_cost

Text(value='modern seasonal cuisine', description='what do you want to eat?', placeholder='type something', st…

BoundedIntText(value=5, description='How many restourants to display?', layout=Layout(width='45%'), style=Desc…

SelectionRangeSlider(description='Choose a price range:', index=(0, 3), layout=Layout(width='45%'), options=((…

Dropdown(description='Choose the cusine specialty: ', options=("don't care", 'calabrian', 'country cooking', '…

VBox(children=(HBox(children=(Checkbox(value=False, description="don't care", layout=Layout(max_width='auto', …

Button(description='Serch for some restournats', layout=Layout(width='250px'), style=ButtonStyle())

╭───────────────────┬──────────────────────────┬───────────────────────────┬───────────────────────────┬──────────╮
│ Restaurant Name   │ Address                  │ Description               │ Website                   │   Cosine │
├───────────────────┼──────────────────────────┼───────────────────────────┼───────────────────────────┼──────────┤
│ Ca' Del Moro      │ località Erbin 31        │ Situated within the La    │ https://www.cadelmoro.win │  1.4     │
│                   │                          │ Collina dei Ciliegi       │ e/it                      │          │
│                   │                          │ wine...                   │                           │          │
├───────────────────┼──────────────────────────┼───────────────────────────┼───────────────────────────┼──────────┤
│ La Bandiera       │ contrada Pastini 4       │ Although it takes a while │ https://www.labandiera.it │  1.38629 │
│                   │                          │ to reach this restaur..

['€€€', '€€€', '€€', '€€€', '€€€', '€€€€', '€€€', '€€€€']