In [1]:
! pip install torch torchvision torchaudio PyPDF2 transformers langchain langchain-pinecone pinecone-client langchain_community bitsandbytes

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain-pinecone
  Downloading langchain_pinecone-0.2.2-py3-none-any.whl.metadata (1.6 kB)
Collecting pinecone-client
  Downloading pinecone_client-5.0.1-py3-none-any.whl.metadata (19 kB)
Collecting langchain_community
  Downloading langchain_community-0.3.16-py3-none-any.whl.metadata (2.9 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.

In [2]:
# Importing the necessary libraries
import os
from torch import bfloat16
import torch
import transformers
from transformers import AutoTokenizer
from time import time
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain_pinecone import PineconeEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain.vectorstores import Pinecone as LangChainPinecone
from pinecone import Pinecone, ServerlessSpec

In [5]:
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec
import os

In [17]:
# Load environment variables from the .env file
load_dotenv()

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

In [18]:
pc = Pinecone(api_key=PINECONE_API_KEY)

In [9]:
pc.delete_index('recipes')

In [6]:
# Defining model and index names for Pinecone (a vector database for storing and retrieving embeddings)
model_name="multilingual-e5-large"
index_name="recipes"
namespace="recipeVector"

# Specify serverless environment
spec = ServerlessSpec(cloud='aws', region='us-east-1')

In [10]:
# Create the Pinecone index
pc.create_index(
    name=index_name,
    dimension=1024,  # Dimensionality of the embeddings
    metric='cosine',  # Similarity metric
    spec=spec,
)

In [11]:
from PyPDF2 import PdfReader

# Load and split the recipes
def load_and_split_recipes(file_path):
    reader = PdfReader(file_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text()

    # Split based on the delimiter "***********************************************"
    chunks = text.split("***********************************************")
    cleaned_chunks = [chunk.strip() for chunk in chunks if chunk.strip()]

    # Structure data into a dictionary
    structured_data = []
    for chunk in cleaned_chunks:
        lines = chunk.split("\n")
        name = lines[0].split(":")[1].strip() if "Recipe Name" in lines[0] else "Unknown Recipe"
        structured_data.append({
            "name": name,
            "content": chunk
        })
    return structured_data

# Example usage
file_path = "Recipes.pdf"  # Update with your uploaded file path
recipes = load_and_split_recipes(file_path)
print(f"Loaded {len(recipes)} recipes.")

Loaded 40 recipes.


In [12]:
from sentence_transformers import SentenceTransformer

# Load the multilingual E5-large embedding model
embedding_model = SentenceTransformer("intfloat/multilingual-e5-large")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/387 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/160k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/690 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/418 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/201 [00:00<?, ?B/s]

In [13]:
# Access the Pinecone index
index = pc.Index(index_name)

In [14]:
# Create embeddings for each recipe and upsert into Pinecone
for recipe in recipes:
    embedding = embedding_model.encode([recipe['name']])[0]
    metadata = {"name": recipe['name'], "text": recipe['content']}
    index.upsert([(recipe['name'], embedding.tolist(), metadata)], namespace=namespace)

In [15]:
# Define the embeddings model and connect to the Pinecone knowledge store
embeddings = PineconeEmbeddings(
    model=model_name,
    pinecone_api_key=api_key)

In [16]:
# Load the vector store (knowledge base) from the existing Pinecone index
knowledge = PineconeVectorStore.from_existing_index(
    index_name=index_name,
    namespace=namespace,
    embedding=embeddings)

In [17]:
from torch import bfloat16
import transformers

# Configuration for 4-bit quantization to save memory
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,                      # Enable 4-bit precision loading
    bnb_4bit_quant_type="nf4",              # Use NF4 quantization
    bnb_4bit_use_double_quant=True,         # Enable double quantization for better precision
    bnb_4bit_compute_dtype=bfloat16         # Use bfloat16 for compute
)

# Access token and model name
access_token = "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"
model_nm = "meta-llama/Llama-2-7b-chat-hf"

# Load model configuration
model_config = transformers.AutoConfig.from_pretrained(
    model_nm,
    use_auth_token=access_token,
    trust_remote_code=True
)

# Load the model with 4-bit quantization
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_nm,
    use_auth_token=access_token,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,          # Apply the BitsAndBytes configuration
    device_map="auto",                       # Automatically map layers to available devices
)

# Load the tokenizer
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_nm,
    use_auth_token=access_token
)

print("Model and tokenizer loaded successfully!")




config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]



model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Model and tokenizer loaded successfully!


In [18]:
# Configuration for loading the model with 4-bit quantization to save memory
# bnb_config = transformers.BitsAndBytesConfig(
#             load_in_4bit=True,
#             bnb_4bit_quant_type='nf4',
#             bnb_4bit_use_double_quant=True,
#             bnb_4bit_compute_dtype=bfloat16
# )

# # Load the Llama-2 model and tokenizer from Hugging Face using the provided access token
# access_token = "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"
# model_nm = "meta-llama/Meta-Llama-3-8B"
# model_config = transformers.AutoConfig.from_pretrained(model_nm, use_auth_token=access_token,  trust_remote_code=True)
# model = transformers.AutoModelForCausalLM.from_pretrained(
#           model_nm,
#           use_auth_token=access_token,
#           trust_remote_code=True,
#           config=model_config,
#           quantization_config=bnb_config,
#           device_map='auto',
# )
# tokenizer = AutoTokenizer.from_pretrained(model_nm, use_auth_token=access_token)

In [19]:
query_pipeline = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        device_map="auto"
    )

Device set to use cuda:0


In [20]:
llm = HuggingFacePipeline(pipeline=query_pipeline)

  llm = HuggingFacePipeline(pipeline=query_pipeline)


In [27]:
# Define a prompt template for the LLM to generate responses
prompt_template = """
    *Context*:{}
    You are a virtual nutritionist and culinary expert. Given a user query and a relevant recipe, provide a comprehensive response that includes:

    * Recipe details: Name, ingredients and instructions. A URL to the original source (if available).
    * Personalized recommendations: Tailor the recipe to the user's dietary needs, preferences, or allergies.
    * Cooking tips: Offer helpful advice or substitutions to enhance the dish.

    You can use the context given below from the previous conversation if the context is not empty.
    Format:
    Use the context given from the previous conversation if the context is not empty.

    *User*:{}
    Response:
    """

In [28]:
# Set up a retrieval-based question-answering system using the LLM and Pinecone knowledge base
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=knowledge.as_retriever()
)

In [29]:
context = ""
query = "How to make palak paneer"

In [30]:
# Run the prompt through the QA system to generate an answer
answer = qa.run(prompt_template.format(context,query))


In [31]:
index = answer.find('Helpful')
# index += len('Helpful Response :')

In [32]:
print(answer[index:])

Helpful Answer:
    Palak paneer is a delicious and healthy vegetarian dish that can be made with spinach, paneer (Indian cheese), and various spices. Here are the details of the recipe:

Recipe Name: Palak Paneer / Spinach Cottage Cheese Curry
Type: Vegetarian
Cuisine: Indian
Ingredients:
• 200g paneer (cubed)
• 250g spinach leaves
• 2 medium onions (chopped)
• 2 tomatoes (pureed)
• 2 cloves garlic (minced)
• 1 teaspoon ginger paste
• 1 green chili (optional)
• 1/2 teaspoon turmeric
• 1 teaspoon cumin seeds
• 1 teaspoon garam masala
• 2 tablespoons oil

Procedure:
1. Blanch spinach leaves in boiling water for 2-3 minutes. Cool and blend to a smooth paste.
2. Heat oil in a pan, add cumin seeds and let them splutter.
3. Add onions, garlic, ginger, and green chili; sauté until golden.
4. Stir in tomato puree, turmeric, and garam masala. Cook until oil separates.
5. Add spinach puree, mix well, and simmer for 5 minutes.
6. Add paneer cubes and cook for another 5 minutes. Finish with cream