In [None]:
# Installation
%pip install fastapi uvicorn    # API
%pip install numpy              # Efficient data handling
%pip install python-dotenv      # .env => extracting hidden info
%pip install requests           # Performing API calls

# Imports
from fastapi import FastAPI, HTTPException
from dotenv import load_dotenv
import os
import requests
from enum import Enum

In [None]:
# Configuration
load_dotenv()

# Global Variables
DB_URL = os.getenv("DATABASE_URL")
class ConsumerField(Enum):
    VISITS = 1
    ORDERS = 2
    FAVORITED = 3
    FRIENDS = 4
    ALL = 5

categories = [ # Based off of prisma schema categories enum
    "asian",
    "bakery",
    "barfood",
    "bbq",
    "breakfast",
    "burgers",
    "cafe",
    "chinese",
    "desserts",
    "fastfood",
    "french",
    "greek",
    "healthy",
    "indian",
    "italian",
    "japanese",
    "korean",
    "latinamerican",
    "mediterranean",
    "mexican",
    "middleeastern",
    "pizza",
    "salads",
    "sandwiches",
    "seafood",
    "sushi",
    "thai",
    "vegan",
    "vegetarian",
    "vietnamese",
]

In [None]:
app = FastAPI()

In [None]:
# Pull consumer information from the DB
async get_consumer_info(field):
    response = await requests.get(f"{DB_URL}/consumer")
    if not response.ok:
        raise HTTPException(status_code=424, detail="Failed to pull consumer information from DB")
    consumer_data = await response.json()

    # Extract desired information
    match field:
        case ConsumerField.VISITS:
            return consumer_data['restaurant_visits']
        case ConsumerField.ORDERS:
            return consumer_data['orders']
        case ConsumerField.FAVORITED:
            return consumer_data['favorite_restaurants']
        case ConsumerField.FRIENDS:
            all_friends = consumer_data['friendships_a'] | consumer_data['friendships_b']
            return all_friends
        case ConsumerField.ALL:
            return consumer_data
        case _: # default case
            return consumer_data

# Pull restaurant information from the DB
async get_restaurant_info():
    response = await requests.get(f"{DB_URL}/restaurant")
    if not response.ok:
        raise HTTPException(status_code=424, detail="Failed to pull restaurant information from DB")
    restaurant_data = await response.json()
    return restaurant_data

In [None]:
## Features & Default Weights

# Feature Weights
rating_weight = 0.3; avg_cost_weight = 0.2; distance_weight = 0.125
category_budget = 1 - rating_weight - avg_cost_weight - distance_weight
num_categories = 29
category_weight = category_budget / num_categories

feature_weights = {
    'rating': rating_weight,
    'avg_cost': avg_cost_weight,
    'distance': distance_weight,
}
for category in categories { # Add in category feature weights
    feature_weights[category] = category_weight
}

# Existing Restaurant Interaction Weights
# NOTE: Used to define how to weigh restaurant's to generate consumer vector
# TODO: Edit later?
restaurant_type_weights = {
    'survey': 0.6,
    'liked': 0.8,
    'order': 0.4,
    'visit': 0.2,
}

# Consumer Vector Feedback/Interaction Weights
# NOTE: Used to define feedback on updating consumer vector based on interactions with recommendations
# TODO: Edit later?
recommendation_feedback_weights = {
    'liked': 0.2,
    'order': 0.1,
    'visit': 0.04,
}

In [None]:
## Utility Functions
# Winsorization is used to solve min-max normalization isues caused by outliers by taking, say, the 1st percentile & 99th percentile rather than min & max
# Resource: https://medium.com/@whyamit404/implementing-pandas-winsorize-ad1e51ec548b
def winsorization(arr):
    np_arr = np.array(arr)

    winsorized_min = np.percentile(arr, 1)
    winsorized_max = np.percentile(arr, 99)

    return [winsorized_min, winsorized_max]

In [None]:
# Consumer Vector Operations
# TODO: 
def generate_init_consumer_vector():
    # TODO: 
def update_consumer_vector():
    # TODO: 
    

In [None]:
# Restaurant Vectors Generation
async def generate_restaurant_vectors():
    # TODO: 
    restaurant_data = await get_restaurant_info()

    # Find average values (for default & normalization)
    ratings_arr = [restaurant['avg_rating'] for restaurant in restaurant_data] # NOTE: Array composition method inspired by https://stackoverflow.com/questions/50216362/how-to-extract-from-a-json-array-in-python
    rating_sum = sum(rating for rating in ratings_arr if (rating != -1)) # Only use ratings from restaurants w/ existing ratings
    num_ratings = sum(1 for rating in ratings_arr if (rating != -1)) # Only count ratings from restaurants w/ existing ratings
    avg_rating =  rating_sum / num_ratings 

    cost_arr = [restaurant['avg_cost'] for restaurant in restaurant_data]
    avg_cost = sum(cost_arr) / len(cost_arr)
    min_winsorized_cost, max_winsorized_cost = winsorization(cost_arr)

    # NOTE: Distance may not yet be populated if user hasn't entered the address they're ordering from (this approach is taken to reduce Google Maps API exhaustion)
    distance_meters_arr = [restaurant['avg_rating'] for restaurant in restaurant_data]
    is_distance_field_valid = distance_meters_arr[0] is not None
    avg_distance_meters = 0.5
    min_winsorized_distance, max_winsorized_distance = None
    if is_distance_field_valid:
        avg_distance_meters = sum(distance_meters_arr) / len(distance_meters_arr)
        min_winsorized_distance, max_winsorized_distance = winsorization(distance_meters_arr)

    # Generate restaurant vectors
    restaurant_vectors_dict = {} # Restaurant ID to vector mappings
    for restaurant in restaurant_data:

        # Normalizing Values (NOTE: Cost & Distance are Min-Max Normalized w/ Inversion (lower cost/distance is more favorable))
        rating_normalized = (restaurant['avg_rating'] if (restaurant['avg_rating'] == -1 ) else default_rating) / 5.0
        cost_normalized_inverted = 1 - ( ( restaurant['avg_cost'] - min_winsorized_cost ) / ( max_winsorized_cost - min_winsorized_cost ) ) 
        distance_normalized_inverted = 1 - ( ( restaurant['distance_value'] - min_winsorized_distance ) / ( max_winsorized_cost - max_winsorized_distance ) if is_distance_field_vaid else avg_distance_meters )
        # TODO: Maybe make ^ a little more readable lol
        
        restaurant_vector = np.array([
            rating_normalized,
            cost_normalized_inverted,
            distance_normalized_inverted,
        ])
        # Add in category weights
        for category in categories:
            if(category in restaurant['categories']):
                np.append(restaurant_vector, 1)
            else: 
                np.append(restaurant_vector, 0)

        restaurant[restaurant['restaurant_id']] = restaurant_vector


In [None]:
# Cosine similarity
# TODO: 

In [None]:
# Return top-N recommendations
# TODO: 

In [None]:
@app.get("/recommend/{consumer_id}")
async def recommend(consumer_id: int):
    # TODO: Recommendation code to be called here
    await ...