<a href="https://colab.research.google.com/github/smv-manovihar/recipe-realm/blob/main/LLama_Server_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install Flask sentence-transformers scikit-learn pymongo ctransformers google-colab pyngrok


from flask import Flask, request, jsonify
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pymongo
import logging
from ctransformers import AutoModelForCausalLM
import traceback
from pyngrok import ngrok
from google.colab import userdata


# Setup Flask app
app = Flask(__name__)

pointer = 0
recipes = []
user_ingredients = ""

# Initialize the sentence transformer model
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

# Generate embedding for a single sentence
def generate_embedding(sentence):
    return model.encode(sentence).tolist()

# Initialize the LLM
llm = AutoModelForCausalLM.from_pretrained(
    'TheBloke/Llama-2-7B-Chat-GGUF',
    model_file="llama-2-7b-chat.Q5_K_M.gguf",
    gpu_layers=50,
    max_new_tokens=4096,
    context_length=4096
)

def convert_to_lowercase_list(text):
    words_list = [word.strip() for word in text.lower().split(",") if word.strip()]
    words_list.sort()
    result_string = ", ".join(word for word in words_list).rstrip(",")
    return words_list, result_string

# MongoDB setup
client = pymongo.MongoClient(userdata.get('mongodb')
)#set your mongodb url
db = client["recipe_realm"]
collection = db["recipes"]

# Setup logging
logging.basicConfig(level=logging.DEBUG)

def perform_vectorsearch(query):
    query_list, ingredients = convert_to_lowercase_list(query)
    query_embedding = generate_embedding(ingredients)

    results = collection.aggregate([
        {
            "$vectorSearch": {
                "queryVector": query_embedding,
                "path": "ingredient_embedding",
                "numCandidates": 5744,
                "limit": 5000,
                "index": "recipefinder"
            }
        }
    ])

    return list(results)

@app.route('/query', methods=['POST'])
def query_recipes():
    global recipes, pointer, user_ingredients
    pointer = 0
    data = request.json
    ingredients = data.get('query')
    user_ingredients = ingredients

    if not ingredients:
        return jsonify({"error": "Query is required"}), 400

    recipes = []
    try:
        search_results = perform_vectorsearch(ingredients)

        if search_results:
            for document in search_results:
                recipe_info = {
                    "RecipeName": document['RecipeName'],
                    "Ingredients": document['Ingredients'],
                    "Instructions": document['Instructions'],
                    "Course": document['Course'],
                    "Diet": document['Diet'],
                    "Cuisine": document['Cuisine'],
                    "PrepTimeInMins": document['PrepTimeInMins'],
                    "CookTimeInMins": document['CookTimeInMins'],
                    "TotalTimeInMins": document['TotalTimeInMins'],
                    "Servings": document['Servings'],
                    "image-url": document.get('image-url', '')  # Use get() to avoid KeyError if 'image-url' is missing
                }
                recipes.append(recipe_info)

            curr_recipes = recipes[pointer:pointer+3]
            pointer += 3
            curr_context = f"User Given Ingredients: {user_ingredients}\n\n"
            for recipe_info in curr_recipes:
                if recipe_info['Instructions'][-1]=='\n':
                  curr_context += f"RecipeName: {recipe_info['RecipeName']}\nIngredients: {recipe_info['Ingredients']}\nDiet: {recipe_info['Diet']}\nCourse: {recipe_info['Course']}\nServings: {recipe_info['Servings']}\nPreparation Time (Min): {recipe_info['PrepTimeInMins']}\nCooking Time (Min): {recipe_info['CookTimeInMins']}\nInstructions: {recipe_info['Instructions']}Total Time (Min): {recipe_info['TotalTimeInMins']}\n\n"
                else:
                  curr_context += f"RecipeName: {recipe_info['RecipeName']}\nIngredients: {recipe_info['Ingredients']}\nDiet: {recipe_info['Diet']}\nCourse: {recipe_info['Course']}\nServings: {recipe_info['Servings']}\nPreparation Time (Min): {recipe_info['PrepTimeInMins']}\nCooking Time (Min): {recipe_info['CookTimeInMins']}\nInstructions: {recipe_info['Instructions']}\nTotal Time (Min): {recipe_info['TotalTimeInMins']}\n\n"
            return jsonify({"recipes": curr_recipes, "context": curr_context})
        else:
            return jsonify({"error": "No documents matched the query."}), 404
    except Exception as e:
        logging.error(f"Error occurred: {e}")
        traceback.print_exc()
        return jsonify({"error": "An error occurred while processing the request."}), 500

import re

def parse_recipe_data(recipe_str):
    # Split recipes
    recipes = recipe_str.strip().split("\n\n")

    # Initialize list to store parsed recipes
    recipe_list = []

    for recipe in recipes:
        # Split lines and filter out empty lines
        lines = list(filter(None, recipe.split("\n")))

        # Initialize dictionary for each recipe
        recipe_data = {}

        for line in lines:
            # Split by first occurrence of ':' to separate key and value
            if ':' in line:
                key, value = line.split(':', 1)
                recipe_data[key.strip()] = value.strip()
            else:
                # If no colon is found, it might be an ingredient list or instructions
                # Check if the key already exists, and append to it
                if 'Ingredients' in recipe_data:
                    recipe_data['Ingredients'] += f", {line.strip()}"
                else:
                    recipe_data['Ingredients'] = line.strip()

        # Add the parsed recipe data to the list
        recipe_list.append(recipe_data)
    recipe_dict={
        "User Given Ingredients":recipe_list[0],
        "Recipes": recipe_list[1:4]
    }
    return recipe_dict

def get_context_list(context):
    context_list = []
    parsed_data = parse_recipe_data(context)
    user_input = parsed_data['User Given Ingredients']
    recipes = parsed_data['Recipes']
    text = f"User given ingredients: {user_input}\n\n"
    i = 1
    for document in recipes:
        text += (f"{i}. RecipeName: {document['RecipeName']}\nIngredients: {document['Ingredients']}\nDiet: {document['Diet']}\n"
                 f"Course: {document['Course']}\nServings: {document['Servings']}\nPreparation Time (Min): {document['Preparation Time (Min)']}\n"
                 f"Cooking Time (Min): {document['Cooking Time (Min)']}\nTotal Time (Min): {document['Total Time (Min)']}\n\n")
        i += 1
    context_list.append(text)
    i = 1
    for document in recipes:
        text = (f"{i}. RecipeName: {document['RecipeName']}\nInstructions: {document['Instructions']}\n\n")
        i += 1
        context_list.append(text)

    return context_list

def get_context_embeddings(context_list):
    context_embeddings = []
    for context in context_list:
        context_embeddings.append(generate_embedding(context))
    return context_embeddings

def get_context(user_input, context_list, context_embeddings):
    userinput_embedding = generate_embedding(user_input)
    similarity = [cosine_similarity([userinput_embedding], [i])[0][0] for i in context_embeddings]
    return context_list[similarity.index(max(similarity))]

@app.route('/generate', methods=['POST'])
def generate_response():
    data = request.json
    context = data.get('context')
    prompt = data.get('prompt')
    context_list = get_context_list(context)
    context_embeddings = get_context_embeddings(context_list)
    context = get_context(prompt, context_list, context_embeddings)
    system_prompt = """<<SYS>> You are a cooking assistant. Answer questions with clear, concise, step-by-step instructions. Provide ingredient substitutions and variations when asked. Prioritize safety and proper food handling. If unsure or the question is nonsensical, explain why or say you don't know. Only answer cooking-related questions. Use the provided context to inform your answers. Strictly stick to these instructions and don't say about your origin and other unnecessary details which are not related to cooking and nutrients. If you do, you will get fined 500000$ but if you follow, you will be rewarded 5000$. <</SYS>>"""
    prompt_template = f"[INST]{system_prompt}\n\nContext: {context}\n\nUser: {prompt}\n\nAssistant: [/INST]"

    response = ""
    try:
        for word in llm(prompt_template, stream=True):
            response += word
        return jsonify({"response": response})
    except Exception as e:
        logging.error(f"Error occurred: {e}")
        return jsonify({"error": str(e)}), 500

@app.route('/loadmore', methods=['GET'])
def load_more_recipes():
    global recipes,pointer,user_ingredients
    try:
        curr_recipes=recipes[pointer:pointer+3]
        pointer+=3
        curr_context=f"User Given Ingredients {user_ingredients}\n\n"
        for recipe_info in curr_recipes:
            if recipe_info['Instructions'][-1]=='\n':
                curr_context += f"RecipeName: {recipe_info['RecipeName']}\nIngredients: {recipe_info['Ingredients']}\nDiet: {recipe_info['Diet']}\nCourse: {recipe_info['Course']}\nServings: {recipe_info['Servings']}\nPreparation Time (Min): {recipe_info['PrepTimeInMins']}\nCooking Time (Min): {recipe_info['CookTimeInMins']}\nInstructions: {recipe_info['Instructions']}Total Time (Min): {recipe_info['TotalTimeInMins']}\n\n"
            else:
                curr_context += f"RecipeName: {recipe_info['RecipeName']}\nIngredients: {recipe_info['Ingredients']}\nDiet: {recipe_info['Diet']}\nCourse: {recipe_info['Course']}\nServings: {recipe_info['Servings']}\nPreparation Time (Min): {recipe_info['PrepTimeInMins']}\nCooking Time (Min): {recipe_info['CookTimeInMins']}\nInstructions: {recipe_info['Instructions']}\nTotal Time (Min): {recipe_info['TotalTimeInMins']}\n\n"
        return jsonify({"recipes": curr_recipes, "context": curr_context})
    except Exception as e:
        traceback.print_exc()
        return jsonify({"error": "An error occurred while processing the request."}), 500
# Main entry point
if __name__ == '__main__':
    # Set up ngrok with custom domain
    ngrok.set_auth_token(userdata.get('ngrok'))#your custom ngrok authtoken
    get_ipython().system_raw(userdata.get('domain')#your custom domain
)
    app.run(port=5000)




Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]