In [1]:
import pandas as pd
from pathlib import Path
import importlib

import src.charles.utils as utils

importlib.reload(utils)

DATA_DIR          = Path.cwd() / '..' / 'data'
DATA_URL          = "https://github.com/prayer-position/Project_NLP/releases/latest/download"


In [10]:
file_names = ["AttractionSubCategorie.csv", "Tripadvisor.csv", "AttractionSubType.csv", "cuisine.csv", "dietary_restrictions.csv", "restaurantType.csv"]

for file_name in file_names:
    url = DATA_URL + f"/{file_name}"
    utils.ensure_data(DATA_DIR, DATA_DIR / file_name, url)

Data found locally, skipping download
Data not found at e:\Documents\A4 - DIA\DIA\Project_NLP\notebooks\..\data\Tripadvisor.csv. Dowloading from GitHub
Download successful!
Data found locally, skipping download
Data found locally, skipping download
Data found locally, skipping download
Data found locally, skipping download


In [11]:
df_path = DATA_DIR / 'Tripadvisor.csv'
results_path = DATA_DIR / 'similarity_results.json'

places = utils.read_places(df_path)
similarity_results = pd.read_json(results_path)

In [12]:
places.head()

Unnamed: 0,id,nom,rating,nbAvis,latitude,longitude,typeR,adresse,priceRange,closed,...,ap_exclusion,ap_inclusions,ap_introduction,ap_primary_supplier_attraction_id,ap_primary_supplier_subtype,ap_primary_ta_geo_id,ap_product_code,ap_product_highlights,ap_product_text,ap_raw
0,188467,Place des Vosges,4.108407,5663,48.855614,2.365553,A,"Place des Vosges 4th Arrondissement, 75004 Par...",,0.0,...,,,,,,,,,,
1,188468,Rue des Francs Bourgeois,3.316532,73,48.85814,2.35988,A,"Rue des Francs-Bourgeois, 75003 Paris France",,0.0,...,,,,,,,,,,
2,188470,Village Saint-Paul,3.017118,98,48.853733,2.361295,A,"Rue Saint-paul, 75004 Paris France",,0.0,...,,,,,,,,,,
3,188471,Au Passe-partout,2.743157,2,48.85347,2.3616,A,"21 rue Saint Paul, 75004 Paris France",,0.0,...,,,,,,,,,,
4,188472,Cloître des Billettes,2.942987,23,48.858,2.35498,A,"24 rue des Archives, 75004 Paris France",,0.0,...,,,,,,,,,,


In [13]:
similarity_results.head()

Unnamed: 0,idplace,neighbours
0,188467,"[{'id': 1526800, 'confidence': 0.9516}, {'id':..."
1,188470,"[{'id': 292257, 'confidence': 0.9019}, {'id': ..."
2,188679,"[{'id': 2397509, 'confidence': 0.986}, {'id': ..."
3,188738,"[{'id': 188745, 'confidence': 0.9889}, {'id': ..."
4,188745,"[{'id': 189228, 'confidence': 0.9928}, {'id': ..."


## Level 1 evaluation

In [14]:
sum = 0
for i in range(len(similarity_results)):
    line = similarity_results.iloc[i] 
    sum += utils.lvl_1_eval(line.iloc[0], utils.dict_to_tuple(line.iloc[1]), places)
    
mean = sum / len(similarity_results)

print(mean)

0.8996924861471195


## Level 2 evaluation

In [15]:
def get_lvl_2_score(results):
    sum = 0
    translation_df = utils.get_translation_dicts()
    for i in range(len(results)):
        line = results.iloc[i]
        recommendations, _ = utils.dict_to_tuple(line.iloc[1])
        score = utils.lvl_2_eval(line.iloc[0], recommendations, places, translation_df)
        if score is not None:
            sum += score
        else :
            sum += len(recommendations)
    return sum / len(results)

In [16]:
print(get_lvl_2_score(similarity_results))

2.3633540372670807


The score is pretty bad, let's see where the model is having the most trouble

In [17]:
restaurant_ids = places[places["typeR"] == "R"]['id'].values
restaurant_results = similarity_results[similarity_results['idplace'].isin(restaurant_ids)]
print(get_lvl_2_score(restaurant_results))

0.3719298245614035


In [18]:
attraction_ids = places[places["typeR"] == "A"]['id'].values
attraction_results = similarity_results[similarity_results['idplace'].isin(attraction_ids)]
print(get_lvl_2_score(attraction_results))

2.45


In [19]:
hotel_ids = places[places['typeR'] == "H"]['id'].values
hotel_results = similarity_results[similarity_results['idplace'].isin(hotel_ids)]
print(get_lvl_2_score(hotel_results))

1.5


In [20]:
attrac_prod_ids = places[places['typeR'] == "AP"]['id'].values
attrac_prod_results = similarity_results[similarity_results['idplace'].isin(attrac_prod_ids)]
print(get_lvl_2_score(attrac_prod_results))

5.0


The metric isn't functioning for the AP type because we don't have any instructions on how to evaluate that kind of recommendation

In [21]:
def see_metadata_reco(results, places):
    translation_df = utils.get_translation_dicts()
    for j in range(len(results)):
        line = results.iloc[j]
        print(f"Query metadata : {utils.get_metadata(line.iloc[0], places, translation_df)}")
        recommendations, _ = utils.dict_to_tuple(line.iloc[1])
        for i, recommendation in enumerate(recommendations):
            print(f"    Recommendation {i} metadata : {utils.get_metadata(recommendation, places, translation_df)}")

In [22]:
see_metadata_reco(attrac_prod_results, places)

Query metadata : set()
    Recommendation 0 metadata : set()
    Recommendation 1 metadata : set()
    Recommendation 2 metadata : set()
    Recommendation 3 metadata : set()
    Recommendation 4 metadata : set()
Query metadata : set()
    Recommendation 0 metadata : set()
    Recommendation 1 metadata : set()
    Recommendation 2 metadata : set()
    Recommendation 3 metadata : set()
    Recommendation 4 metadata : set()
Query metadata : set()
    Recommendation 0 metadata : set()
    Recommendation 1 metadata : set()
    Recommendation 2 metadata : set()
    Recommendation 3 metadata : set()
    Recommendation 4 metadata : set()
Query metadata : set()
    Recommendation 0 metadata : set()
    Recommendation 1 metadata : set()
    Recommendation 2 metadata : set()
    Recommendation 3 metadata : set()
    Recommendation 4 metadata : set()
Query metadata : set()
    Recommendation 0 metadata : set()
    Recommendation 1 metadata : set()
    Recommendation 2 metadata : set()
    Recomme

In [23]:
see_metadata_reco(attraction_results, places)

Query metadata : {'sites et monuments', 'sites touristiques'}
    Recommendation 0 metadata : {'fontaines', 'sites touristiques'}
    Recommendation 1 metadata : {'quartiers'}
    Recommendation 2 metadata : {'quartiers'}
    Recommendation 3 metadata : set()
    Recommendation 4 metadata : {'shopping', 'centres commerciaux'}
Query metadata : set()
    Recommendation 0 metadata : {'quartiers'}
    Recommendation 1 metadata : set()
    Recommendation 2 metadata : {'bakeries'}
    Recommendation 3 metadata : {'french'}
    Recommendation 4 metadata : {'shopping', 'centres commerciaux'}
Query metadata : {'sites touristiques'}
    Recommendation 0 metadata : {'sites touristiques'}
    Recommendation 1 metadata : {'sites touristiques'}
    Recommendation 2 metadata : {'ruines anciennes', 'sites touristiques'}
    Recommendation 3 metadata : {'sites touristiques', 'ponts'}
    Recommendation 4 metadata : {'sites touristiques'}
Query metadata : {'musées', 'musées spécialisés'}
    Recommendat