In [None]:
# set correct path for imports for this notebook
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

# external imports 
import logging
logging.basicConfig(level=logging.INFO)

# local imports
from src.fetch.google_api import fetch_google_places_data
from src.fetch.google_api import fetch_place_id
from src.nlp.extractor_openai import extract_dishes_openai, _cached_extract_single
from src.ranking.scoring import assign_dish_scores
from src.recommendation.recs import form_recommendations

# Clear dish extractor cache to ensure fresh model load
_cached_extract_single.cache_clear()

# Restaurants to test with
r1_id = fetch_place_id("funkyfish berlin")
r2_id = fetch_place_id("standard pizza west berlin")
r3_id = fetch_place_id("borschert berlin")
r4_id = fetch_place_id("Gio's Berlin") 
r5_id = fetch_place_id("hasir kreuzberg")
r6_id = fetch_place_id("paolo pinkel berlin")

# Getting restaurant info and reviews
r1, funky = fetch_google_places_data(r1_id)
r2, pizza = fetch_google_places_data(r2_id)
r3, borch = fetch_google_places_data(r3_id)
r4, gio  = fetch_google_places_data(r4_id)
r5, hasir = fetch_google_places_data(r5_id)
r6, paolo = fetch_google_places_data(r6_id)

INFO:src.fetch.google_api:
Found restaurant: FunkyFisch
Kantstra√üe 135-136, 10625 Berlin, Deutschland
ID = ChIJU7-KvONQqEcRCmPCCvo-Fg4

INFO:src.fetch.google_api:
Found restaurant: Standard Serious Pizza Charlottenburg
Schl√ºterstra√üe 63, 10625 Berlin, Deutschland
ID = ChIJc2uwDI1RqEcRfAsy3YxyfIY

INFO:src.fetch.google_api:
Found restaurant: Borchardt
Franz√∂sische Str. 47, 10117 Berlin, Deutschland
ID = ChIJVfEmINtRqEcRsch0pslaanQ

INFO:src.fetch.google_api:
Found restaurant: Gio's
Dresdener Str. 16, 10999 Berlin, Deutschland
ID = ChIJ65Liv5JPqEcRodr8aFFNAvE

INFO:src.fetch.google_api:
Found restaurant: Hasir Kreuzberg
Adalbertstra√üe 10, 10999 Berlin, Deutschland
ID = ChIJZ-8TQ8hPqEcRf0R-vl8UwwY

INFO:src.fetch.google_api:
Found restaurant: Paolo Pinkel
Karl-Marx-Stra√üe 55, 12043 Berlin, Deutschland
ID = ChIJpRECXmtPqEcRJMfokCTNvD8

INFO:src.fetch.google_api:
Found restaurant: FunkyFisch
Kantstra√üe 135-136, 10625 Berlin, Germany

INFO:src.fetch.google_api:Retrieved 5 reviews for 

In [127]:
# Extracting dishes
restaurant = paolo #change restaurant here

%autoawait
review_with_dishes = await extract_dishes_openai(restaurant, True)

INFO:src.nlp.extractor_openai:üöÄ Starting async extraction for 5 chunks...
INFO:src.nlp.extractor_openai:üçΩÔ∏è Extracted from Review #1: none
INFO:src.nlp.extractor_openai:üçΩÔ∏è Extracted from Review #2: roastbeef, schnitzel, ceviche
INFO:src.nlp.extractor_openai:üçΩÔ∏è Extracted from Review #3: fries, fried chicken
INFO:src.nlp.extractor_openai:üçΩÔ∏è Extracted from Review #4: none
INFO:src.nlp.extractor_openai:üçΩÔ∏è Extracted from Review #5: kimcheese fries, korean fried chicken
INFO:src.nlp.extractor_openai:‚úÖ Completed dish extraction for 5 reviews in 0.03s


IPython autoawait is `on`, and set to use `asyncio`


In [None]:
# only relevant in notebook to do reruns
import copy
reviews = copy.deepcopy(review_with_dishes)

# import
import logging
logger = logging.getLogger(__name__)
from typing import Dict, List, Any
logging.basicConfig(level=logging.INFO)


# helper function
def count_words(string:str) -> int:
    """
    Counts words in a given string.
    """
    word_count = len(string.split())
    return word_count



# main function
def assign_dish_scores(reviews: List[Dict[str, Any]]) -> None:
    """
    Assigns scores to dishes based on review source, author name, and dish name length.

    Google reviews get a small author bonus; blog reviews start with higher base points.
    Longer dish names earn extra points up to a cap. Logs all scoring steps.

    Args:
        reviews: List of review dicts with 'source_type', 'author', and 'dishes'.
    """
    
    # constants
    google_default_points = 0
    blog_default_points = 1000
    
    logger.info("=" * 16 + " SCORING " +"=" * 15 )

    if any(
        dish.get("ranking") is not None
        for review in reviews
        if review.get("dishes")
        for dish in review["dishes"]
    ):
        logger.info("This review set already contains ranked dishes. Skipping scoring.")
    else:
        for i, review in enumerate(reviews):
                # variables
                final_score: int = 0
                author_p: int = 0
                source_p: int = 0
                dish_name_p: int = 0

                # scoring source type
                if review.get("source_type") == "google" and review.get("dishes"):
                    source_p = google_default_points
                    if count_words(review.get("author","")) > 1:
                        author_p = 10 # rewards real min. two word names for google reviewrs

                elif review.get("source_type") == "blog" and review.get("dishes"): 
                    source_p = blog_default_points

                # scoring dish based on number of words         
                for dish in review["dishes"]:
                    dish_name_p = 0
                    if dish.get("ranking") is None:
                        dish_name = dish.get("name")
                        word_count = len(dish_name.split())
                        dish_name_p = min(word_count**3, 50) # reward more words exponentially with a cap on 4
                    
                        final_score = author_p + source_p + dish_name_p
                        dish["ranking"] = final_score
                        logger.info(f"SCORE: Review #{i+1} -- {final_score}p -- {dish.get('name')} -- ({source_p} +{author_p} +{dish_name_p})")  
                logger.info("=" * 40)

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 75)

In [126]:
assign_dish_scores(paolo)

INFO:__main__:SCORE: Review #2 -- 11p -- roastbeef -- (0 +10 +1)
INFO:__main__:SCORE: Review #2 -- 11p -- schnitzel -- (0 +10 +1)
INFO:__main__:SCORE: Review #2 -- 11p -- ceviche -- (0 +10 +1)
INFO:__main__:SCORE: Review #3 -- 11p -- fries -- (0 +10 +1)
INFO:__main__:SCORE: Review #3 -- 18p -- fried chicken -- (0 +10 +8)
INFO:__main__:SCORE: Review #5 -- 18p -- kimcheese fries -- (0 +10 +8)
INFO:__main__:SCORE: Review #5 -- 37p -- korean fried chicken -- (0 +10 +27)


In [106]:
form_recommendations(paolo)

INFO:src.recommendation.recs:üçΩÔ∏è Generated 7 recommendations from 5 reviews.


[{'dish_name': 'roastbeef',
  'ranking': 10,
  'author': 'Deer Ozzie',
  'source': 'Google Reviews',
  'timestamp': 1749415038,
  'review_link': HttpUrl('https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sChZDSUhNMG9nS0VNNjQ4T2ltZzlxTUl3EAE!2m1!1s0x47a84f6b5e0211a5:0x3fbccd2490e8c724')},
 {'dish_name': 'schnitzel',
  'ranking': 10,
  'author': 'Deer Ozzie',
  'source': 'Google Reviews',
  'timestamp': 1749415038,
  'review_link': HttpUrl('https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sChZDSUhNMG9nS0VNNjQ4T2ltZzlxTUl3EAE!2m1!1s0x47a84f6b5e0211a5:0x3fbccd2490e8c724')},
 {'dish_name': 'ceviche',
  'ranking': 10,
  'author': 'Deer Ozzie',
  'source': 'Google Reviews',
  'timestamp': 1749415038,
  'review_link': HttpUrl('https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sChZDSUhNMG9nS0VNNjQ4T2ltZzlxTUl3EAE!2m1!1s0x47a84f6b5e0211a5:0x3fbccd2490e8c724')},
 {'dish_name': 'fries',
  'ranking': 10,
  'author': 'M K',
  'source': 'Google Reviews',
  'timestamp': 