In [1]:
# setting up the workplace
import pandas as pd

from functions import description_cleaner, vocabulary_creator, reverse_index_creator, compute_TF, compute_IDF, compute_TF_IDF, reverse_TF_IDF

from engine import non_ranked_engine, ranked_engine

restaurants_df = pd.read_csv("restaurants_i.tsv", sep="\t", header = 0)

### ***Main preprocessing***

In [2]:
# Extract the 'description' column from the DataFrame, which contains restaurant descriptions
restournats_descriptions = restaurants_df.description

# Clean and preprocess the restaurant descriptions using the custom description_cleaner function
parsed_descriptions = description_cleaner(restournats_descriptions)

# Create a vocabulary and convert descriptions into numerical IDs using the vocabulary_creator function
# The function returns a list of IDs for each description and a vocabulary dictionary
ID_descritpion, vocabulary = vocabulary_creator(parsed_descriptions)

# Create a reverse index from the numerical IDs of words to document IDs using reverse_index_creator
reverse_index = reverse_index_creator(ID_descritpion)

# Save the vocabulary (word-to-ID mapping) to a CSV file for future use
pd.Series(vocabulary).to_csv("vocabulary.csv", index=True, encoding="utf-8", header=False)


### ***Not ranked serch engine***

In [3]:
# Initialize a flag (done) to control the while loop for searching
done = False

# Example query for testing the non-ranked search engine
sample_input = "modern seasonal cuisine"

# Loop for querying the search engine until the search is completed
while (not done):
    # Uncomment the next line to enable user input
    # sample_input = input("what do you want to eat?\n")
    
    # Call the non_ranked_engine function with the input query, the restaurant DataFrame,
    # the vocabulary, and the reverse index to find matching restaurants
    done = non_ranked_engine(sample_input, restaurants_df, vocabulary, reverse_index)

We found 41 matches!

╭─────────────────────────┬─────────────────────────┬───────────────────────────┬───────────────────────────╮
│ Restaurant Name         │ Address                 │ Description               │ Website                   │
├─────────────────────────┼─────────────────────────┼───────────────────────────┼───────────────────────────┤
│ Mima                    │ via Madonnelle 9        │ You’ll be won over by the │ http://www.domo20.com/res │
│                         │                         │ seasonal Mediterranea...  │ taurant                   │
├─────────────────────────┼─────────────────────────┼───────────────────────────┼───────────────────────────┤
│ Materia | Spazio Cucina │ via Teatro Massimo 29   │ The entrance to this      │ https://www.materiaspazio │
│                         │                         │ restaurant is typical of  │ cucina.it/                │
│                         │                         │ a...                      │                 

### ***Computing TF-IDF***

In [4]:
TF_by_restournats = compute_TF(ID_descritpion)
total_documents = len(ID_descritpion)
IDF_by_words = compute_IDF(reverse_index, total_documents)

reverse_index_tf_idf = compute_TF_IDF(TF_by_restournats, IDF_by_words)

index_tf_idf = reverse_TF_IDF(reverse_index_tf_idf)

### ***Ranked serch engine***

In [None]:
# Initialize a flag (done) to control the while loop for searching
done = False

# Example query for testing the non-ranked search engine
sample_input = "modern seasonal cuisine"

# Loop for querying the search engine until the search is completed
while (not done):
    # Uncomment the next line to enable user input
    # sample_input = input("what do you want to eat?\n")
    
    done = ranked_engine(sample_input, restaurants_df, vocabulary, reverse_index_tf_idf, IDF_by_words)

# 658, 0.8248382438257549)
## (6, 0.8248382438257549)

We found 15 matches!

╭──────────────────────────┬───────────────────────────┬───────────────────────────┬───────────────────────────┬──────────╮
│ Restaurant Name          │ Address                   │ Description               │ Website                   │   Cosine │
├──────────────────────────┼───────────────────────────┼───────────────────────────┼───────────────────────────┼──────────┤
│ Stube Hermitage          │ via Castelletto Inferiore │ Situated in a tranquil    │ https://stubehermitage.it │  0.33686 │
│                          │ 69                        │ and secluded location,    │ /                         │          │
│                          │                           │ t...                      │                           │          │
├──────────────────────────┼───────────────────────────┼───────────────────────────┼───────────────────────────┼──────────┤
│ Angiò-Macelleria di Mare │ viale Africa 28/h         │ The phrase “macellaria di │ https://albertoangiolucci