In [None]:
# set correct path for imports for this notebook
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

# external imports 
import logging
logging.basicConfig(level=logging.INFO)

# local imports
from src.fetch.google_api import fetch_google_places_data
from src.fetch.google_api import fetch_place_id
from src.nlp.extractor_openai import extract_dishes_openai, _cached_extract_single
from src.ranking.functions import assign_rankings
from src.recommendation.recs import form_recommendations

# Clear dish extractor cache to ensure fresh model load
_cached_extract_single.cache_clear()

# Restaurants to test with
r1_id = fetch_place_id("funkyfish berlin")
r2_id = fetch_place_id("standard pizza west berlin")
r3_id = fetch_place_id("borschert berlin")
r4_id = fetch_place_id("Gio's Berlin") 
r5_id = fetch_place_id("hasir kreuzberg")
r6_id = fetch_place_id("paolo pinkel berlin")

# Getting restaurant info and reviews
r1, funky = fetch_google_places_data(r1_id)
r2, pizza = fetch_google_places_data(r2_id)
r3, borch = fetch_google_places_data(r3_id)
r4, gio  = fetch_google_places_data(r4_id)
r5, hasir = fetch_google_places_data(r5_id)
r6, paolo = fetch_google_places_data(r6_id)

In [None]:
# Extracting dishes
restaurant = paolo #change restaurant here

%autoawait
review_with_dishes = await extract_dishes_openai(restaurant, True)

INFO:src.nlp.extractor_openai:üöÄ Starting async extraction for 5 chunks...


IPython autoawait is `on`, and set to use `asyncio`


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/responses "HTTP/1.1 200 OK"
INFO:src.nlp.extractor_openai:üçΩÔ∏è Extracted: none from review #1
INFO:src.nlp.extractor_openai:üçΩÔ∏è Extracted: schnitzel, roastbeef, ceviche from review #2
INFO:src.nlp.extractor_openai:üçΩÔ∏è Extracted: fried chicken, fries from review #3
INFO:src.nlp.extractor_openai:üçΩÔ∏è Extracted: none from review #4
INFO:src.nlp.extractor_openai:üçΩÔ∏è Extracted: korean fried chicken, kimcheese fries from review #5
INFO:src.nlp.extractor_openai:‚úÖ Completed dish extraction for 5 reviews in 6.40s


In [5]:
def assign_rankings(reviews, verbose = False):
    """
    Assigns a default ranking of -1 to all dishes in reviews 
    where the source is 'google' and the dish ranking is None.
    """
    for i, review in enumerate(reviews):
        if review.get("source") == "google" and review.get("dishes"):
            google_default_ranking = -1
            for dish in review["dishes"]:
                if dish.get("ranking") is None:
                    dish["ranking"] = google_default_ranking
                    if verbose:
                        print(f"Assigned ranking {google_default_ranking} to dish '{dish.get('name')}' in review #{i+1}")  
        elif review.get("source") == "bing" and review.get("dishes"):
            pass  # Future logic for Bing reviews can be added here



In [7]:
assign_rankings(paolo, True)

In [8]:
form_recommendations(paolo)

INFO:src.recommendation.recs:üçΩÔ∏è Generated 7 recommendations from 5 reviews.


[{'dish_name': 'schnitzel',
  'ranking': None,
  'author': 'Deer Ozzie',
  'source': 'Google Reviews',
  'timestamp': 1749415038,
  'review_link': HttpUrl('https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sChZDSUhNMG9nS0VNNjQ4T2ltZzlxTUl3EAE!2m1!1s0x47a84f6b5e0211a5:0x3fbccd2490e8c724')},
 {'dish_name': 'roastbeef',
  'ranking': None,
  'author': 'Deer Ozzie',
  'source': 'Google Reviews',
  'timestamp': 1749415038,
  'review_link': HttpUrl('https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sChZDSUhNMG9nS0VNNjQ4T2ltZzlxTUl3EAE!2m1!1s0x47a84f6b5e0211a5:0x3fbccd2490e8c724')},
 {'dish_name': 'ceviche',
  'ranking': None,
  'author': 'Deer Ozzie',
  'source': 'Google Reviews',
  'timestamp': 1749415038,
  'review_link': HttpUrl('https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sChZDSUhNMG9nS0VNNjQ4T2ltZzlxTUl3EAE!2m1!1s0x47a84f6b5e0211a5:0x3fbccd2490e8c724')},
 {'dish_name': 'fried chicken',
  'ranking': None,
  'author': 'M K',
  'source': 'Google Reviews',

In [None]:
get_recommendations(r2_id)

: 

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

place_id = "ChIJVfEmINtRqEcRsch0pslaanQ"

cmd = f"""curl -X GET \
  -H "Content-Type: application/json" \
  -H "X-Goog-Api-Key: {GOOGLE_API_KEY}" \
  -H "X-Goog-FieldMask: displayName,formattedAddress,websiteUri" \
  "https://places.googleapis.com/v1/places/{place_id}"
"""


import subprocess

result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
print(result.stdout)

: 

In [None]:
#get more data from Google API

# set correct path for imports for this notebook
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

from __future__ import annotations
import os
import requests
from typing import Any, Dict, List, Optional
from dotenv import load_dotenv
from src.normalisation.normaliser import normalise_review

# ---- Logging ----
import logging
logger = logging.getLogger(__name__)

# ---- Config ----
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

BASE_URL_FINDPLACE = "https://maps.googleapis.com/maps/api/place/findplacefromtext/json"
BASE_URL_DETAILS = "https://maps.googleapis.com/maps/api/place/details/json"


def fetch_restaurant_info(place_id: str) -> Dict[str, Optional[str]]:
    """
    Retrieve basic metadata for a restaurant by Place ID.
    Returns: {"name": ..., "address": ..., "id": ...}
    """
    params = {
        "place_id": place_id,
        "fields": "url",
        "key": GOOGLE_API_KEY,
    }

    resp = requests.get(BASE_URL_DETAILS, params=params)
    data = resp.json()

    if data.get("status") != "OK":
        raise ValueError(f"Google API error: {data.get('status')} - {data.get('error_message')}")

    result = data.get("result", {})
    return result


# ---- Helper Functions (place ID is normally provided in the API call) ----
def fetch_place_id(restaurant_name: str) -> Optional[str]:
    """
    Fetch the Google Place ID for a given restaurant name.
    Returns the place_id if found, otherwise None.
    """
    params = {
        "input": restaurant_name,
        "inputtype": "textquery",
        "fields": "place_id,name,formatted_address",
        "key": GOOGLE_API_KEY,
    }
    response = requests.get(BASE_URL_FINDPLACE, params=params)
    data = response.json()

    candidates = data.get("candidates", [])
    if not candidates:
        logger.warning(f"No candidates found for '{restaurant_name}'")
        return None

    candidate = candidates[0]
    logger.info(
        f"\n Found restaurant: {candidate.get('name')}\n"
        f"{candidate.get('formatted_address')}\n"
        f"ID = {candidate.get('place_id')}\n"
        f"URL = {candidate.get('websiteUri')}\n"

    )
    return candidate.get("place_id")


##
##
##

# ---- Importing packages ----

import requests

# ---- Config ----
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
BASE_URL_GOOGLEPLACES = "https://places.googleapis.com/v1/places/"

def fetch_google_places_data(place_id: str):
    headers = {
        "Content-Type": "application/json",
        "X-Goog-Api-Key": GOOGLE_API_KEY,
        "X-Goog-FieldMask": "*"
    }
    resp = requests.get(BASE_URL_GOOGLEPLACES+place_id, headers=headers)
    data = resp.json()
    
    restaurant_info = ""
    reviews = ""

    return restaurant_info, reviews


fetch_google_places_data("ChIJVfEmINtRqEcRsch0pslaanQ")

: 

In [None]:
# ---- Importing packages ----
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import requests
import logging
from dotenv import load_dotenv
from src.normalisation.schema import Restaurant, Review

logging.basicConfig(level=logging.INFO) #remove later in .py
logger = logging.getLogger(__name__)

# ---- Config ----
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
BASE_URL_GOOGLEPLACES = "https://places.googleapis.com/v1/places/"

def fetch_google_places_data(place_id: str):
    headers = {
        "Content-Type": "application/json",
        "X-Goog-Api-Key": GOOGLE_API_KEY,
        "X-Goog-FieldMask": (
            "id,"
            "displayName.text,"
            "formattedAddress,"
            "websiteUri,"
            "googleMapsUri,"
            "rating,"
            "reviews.rating,"
            "reviews.text.text,"
            "reviews.authorAttribution.displayName,"
            "reviews.publishTime,"
            "reviews.googleMapsUri,"
            "reviews.originalText.languageCode"
        ),
        
    }
    resp = requests.get(BASE_URL_GOOGLEPLACES+place_id, headers=headers)
    data = resp.json()
    
    logger.info(
        f"\nFound restaurant: {data.get('displayName').get('text')}\n"
        f"{data.get('formattedAddress')}\n")

    #pass data into BaseModel classes to normalise them

    reviews_data = data.get("reviews") or []
    reviews = [Review(**r) for r in reviews_data]

    restaurant = Restaurant(**data)


    return restaurant, reviews


fetch_google_places_data("ChIJVfEmINtRqEcRsch0pslaanQ")

: 

In [None]:
# ---- Importing packages ----
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import logging
logging.basicConfig(level=logging.INFO) #remove later in .py
from src.fetch.google_api import fetch_google_places_data


fetch_google_places_data("ChIJVfEmINtRqEcRsch0pslaanQ")


: 