In [None]:
# To install packages to use jupyter notebook as rest api, run following in cmd
# pip install jupyter_kernel_gateway
# jupyter kernelgateway --generate-config
# run the the next line in terminal to start server, at folder containing this file
# jupyter kernelgateway --KernelGatewayApp.api='kernel_gateway.notebook_http' --KernelGatewayApp.seed_uri='Backend.ipynb'

In [None]:
import json
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

In [None]:
# POST /recommendCoffee

req = json.loads(REQUEST)

reqBody = req['body']

# Define the customer's input
customer_input = []
customer_input.append(reqBody['timeOfDay'])
customer_input.append(reqBody['coffee-type'])
customer_input.extend(reqBody['flavour'])
customer_input.extend(reqBody['fruitType'])

keywords = [token.strip() for token in customer_input]
#print(f"Customer Input: {customer_input}")
#print(f"keywords: {keywords}")

# Extract the time and milk preferences
time_pref = ''
milk_pref = ''
for keyword in keywords:
    if keyword in ['morning', 'afternoon', 'anytime']:
        time_pref = keyword
    elif keyword in ['filter', 'espresso']:
        milk_pref = keyword

# Read the coffee dataset from an Excel file using pandas
coffee_df = pd.read_excel('Coffee dataset.xlsx')

# Match user's time input with the coffee dataset
time_match = coffee_df[coffee_df['time'].str.contains(time_pref, case=False)]

# Match user's milk input with the coffee dataset
milk_match = time_match[time_match['w/wo milk'].str.contains(milk_pref, case=False)]

# Remove the time and milk preferences from the keywords list
keywords = [keyword for keyword in keywords if keyword not in [time_pref, milk_pref]]

# Construct the keyword string for matching
keyword_string = " ".join(keywords)

# Create a TfidfVectorizer object and fit_transform the cupping notes
vectorizer = TfidfVectorizer()
cupping_notes_vectorized = vectorizer.fit_transform(coffee_df['Flavor'])

# Transform the keyword string using the same vectorizer
keyword_vectorized = vectorizer.transform([keyword_string])

# Compute the cosine similarity between the keywords and the cupping notes
cos_sim = cosine_similarity(keyword_vectorized, cupping_notes_vectorized)

# Create a new column with cosine similarity scores
coffee_df['Cosine similarity'] = cos_sim[0]

# Filter the coffee dataset to only include matches for time and milk preferences
filtered_coffee_df = coffee_df.loc[(coffee_df['time'].str.contains(time_pref, case=False)) & 
                                   (coffee_df['w/wo milk'].str.contains(milk_pref, case=False))]

# Compute cosine similarity only for filtered data
if not filtered_coffee_df.empty:
    filtered_cupping_notes_vectorized = vectorizer.transform(filtered_coffee_df['Flavor'])
    filtered_cos_sim = cosine_similarity(keyword_vectorized, filtered_cupping_notes_vectorized)
    filtered_coffee_df['Cosine similarity'] = filtered_cos_sim[0]

# Sort the final data that meets the requirements
final_df = filtered_coffee_df.sort_values(['Cosine similarity'], ascending=False)

# Print the top 3 matching cupping notes and their corresponding row information
coffeeRecommendations = []
for index, row in final_df.head(3).iterrows():
    # print(f"Name: {row['Name']}")
    # print(f"Recommend for: {row['Recommend for']}")
    # print(f"Cupping note: {row['Cupping notes']}")
    # print(f"Origin: {row['Origin']}")
    # print(f"Varietal: {row['Varietal']}")
    # print(f"Processing: {row['Processing']}")
    # print(f"Cosine similarity score: {row['Cosine similarity']}")
    # print()
    coffee = {
        "name": row['Name'],
        "recommendedFor": row['Recommend for'],
        "cuppingNotes": row['Cupping notes'],
        "origin": row['Origin'],
        "varietal": row['Varietal'],
        "processing": row['Processing'],
        "cosineScore": row['Cosine similarity']
    }
    coffeeRecommendations.append(coffee)

print(json.dumps({'Recommendations': coffeeRecommendations}))