# References

https://lancedb.github.io/lancedb/basic/#installation

# Step 1: Install lancedb

In [1]:
!pip install lancedb


Collecting lancedb
  Downloading lancedb-0.15.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.8 kB)
Collecting deprecation (from lancedb)
  Downloading deprecation-2.1.0-py2.py3-none-any.whl.metadata (4.6 kB)
Collecting pylance==0.19.1 (from lancedb)
  Downloading pylance-0.19.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.4 kB)
Downloading lancedb-0.15.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.7/27.7 MB[0m [31m70.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hDownloading pylance-0.19.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.4/30.4 MB[0m [31m88.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hDownloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)
Installing collected packages: pylance, deprecation, lancedb
Successfully installed deprecation-2.1.0 lan

# Step 2: Load the CSV files 

## Import the Cleaned CSV file from Spoonacular

In [2]:
import pandas as pd

# Load the CSV file

#load in data
data = pd.read_csv("Clean.csv")
print(data.head())


FileNotFoundError: [Errno 2] No such file or directory: 'Clean.csv'

## Import the Pantry CSV where Items within Categories will be matched to specific Ingredients

In [None]:
import pandas as pd

# Load the CSV file

#load in data
pantry = pd.read_csv("Pantry.csv")
print(pantry.head())


## Add in the output from the UI - JSON?

In [None]:
import json
with open ('output.json', 'r') as file:
    UI = json.load(file)

print(UI.head())

# Step 3: establish expected user-owned ingredients
We don't want the user to have to input things like salt and pepper every time.

In [None]:
common_pantry = ['salt', 'pepper', 'oil', 'olive oil', 'vegetable oil', 'butter']

# Step 4: Create Columns for Nutrition based on Clean.csv

In [None]:
low_calorie_threshold = 200

# https://www.nal.usda.gov/programs/fnic#:~:text=How%20many%20calories%20are%20in,find%20information%20about%20food%20composition

# 4 calories per gram for protein; 9 calories per gram for fat

data['Calories from Protein'] = data['Protein']*4
data['Calories from Fat'] = data['Fat']*9

data['Low Calorie'] = data['Calories'].apply(lambda x: x <= low_calorie_threshold)
data['High Protein'] = data['Calories from Protein'] > (data['Calories']*.2)
data['Low Fat'] = data['Calories from Fat'] < (data['Calories']*.3)


# Step 5: Filter on User's dietary restrictions

In [None]:
# renaming columns for dietary restrictions to match what is in the User Output from the UI
data = data.rename(columns={'vegetarian': 'Vegetarian', 'vegan': 'Vegan', 'glutenFree': 'Gluten Free', 'dairyFree': 'Dairy Free'})

restriction_list = 'Dietary Restrictions'
if restriction_list in UI.keys():
    restrictions = list(restriction_list.keys())
    for i in restrictions:
        data = data[data[i] == TRUE]

# Step 6: Filter on User's nutritional needs

In [None]:
nutrition_needs = 'Nutrition'
if nutrition_needs in UI.keys():
    nutrition = list(nutrition_list.keys())
    for i in nutrition:
        data = data[data[i] == TRUE]


# Step 7: Filter on User's Selected Cuisine

In [None]:
selected_cuisine = 'Cuisine'
if selected_cuisine in UI.keys():
    cuisine = list(selected_cuisine.keys())
    for i in cuisine:
        data = data[data[i] == TRUE]

# Step 8: Match Pantry Items to Potential Ingredients in the Recipes

#### expanding anything in the Category column (ex. spices) to everything in the Item column (ex. paprika, cumin, coriander)

In [None]:
merged_data = data.merge(pantry, left_on = 'Ingredients', right_on = 'Category', how = 'left)

category_mapping = data.groupby('Category')['Item'].apply(list).to_dict()
                         
user_ingredients = data['Ingredients']
expanded_ingredients = user_ingredients + [item for category in user_ingredients if category in category_mapping for item in category_mapping[category]]

### replace old Ingredients column with new expanded Ingredients column 

In [None]:
data['Ingredients'] = expanded_ingredients

# Step 9: Recommend the top 10 Recipes 

### Will be sorted by the Spoonacular Score 

#### Spoonacular score takes into account popularity, nutritional value, ingredient quality, etc.

In [None]:
def recipe_ingredients(ingredients, user_inputs):
    return all(ingredient in user_inputs for ingredient in ingredients)

filtered_data = data[data['Ingredients'].apply(lambda x: recipe_ingredients(x, user_inputs))]

filtered_data = filtered_data.sort_values(by = 'spoonacularScore', ascending = False)

data = filtered_data.head(10)



# Step 10: Vectorize the data

In [None]:
from sentence_transformers import SentenceTransformer

# Initialize the model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Vectorize the text column
vectors = model.encode(data['text_column'].tolist())


# Step 11: Create a Lancedb database

In [None]:
import lancedb

# Create a new Lancedb database (or connect to an existing one)
db = lancedb.connect('my_lancedb')

# Create a new table for your vectors
db.create_table('my_vectors', {
    'id': 'int',
    'vector': 'vector<float>',  # Adjust the type as necessary
    # Add other columns if needed
})

# Step 12: Insert vectors into Lancedb

In [None]:
# Prepare data for insertion
insert_data = [{'id': row['id'], 'vector': vector} for row, vector in zip(data.itertuples(), vectors)]

# Insert data into Lancedb
db['my_vectors'].insert(insert_data)

# Step 13: Query the vector database

In [None]:
query_vector = model.encode(["your query here"])[0]

# Perform a similarity search
results = db['my_vectors'].query(
    query_vector=query_vector,
    top_k=10  # Number of similar items to retrieve
)

print(results)

# Step 7: Maintain the database?