In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from pinecone import Pinecone, ServerlessSpec
import json
import os
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

In [None]:

pc.create_index(
    name="rag-restuarant",
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [None]:
# HuggingFace Embeddings
# Use this instead of OpenAI embeddings if you don't have an OpenAI account with credits
hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
data = json.load(open("reviews.json"))
data["restaurants"]

In [None]:
processed_data = []

# Initialize the model
hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Load the mock data from reviews.json
with open('reviews.json', 'r') as file:
    data = json.load(file)

# Create an empty list to hold the processed data
processed_data = []

# Iterate through each restaurant and their reviews
for restaurant in data["restaurants"]:
    for review in restaurant["reviews"]:
        # Generate embedding for the review text
        #embedding = model.encode(review['review']).tolist()
        embedding = hf_embeddings.embed_query(review['review'])

        # Append processed data
        processed_data.append(
            {
                "values": embedding,
                "id": restaurant["name"],
                "metadata": {
                    "review": review["review"],
                    "cuisine": restaurant["cuisine"],
                    "category": restaurant["category"],
                    "restaurant_rating": restaurant["rating"],
                    "review_rating": review["rating"]
                }
            }
        )

# processed_data now contains the embeddings and metadata for each review
# You can save it to a file or use it further in your application
with open('processed_data.json', 'w') as outfile:
    json.dump(processed_data, outfile, indent=4)

print("Embeddings created and saved to 'processed_data.json'.")

In [None]:
processed_data[0]

In [8]:
# Insert the embeddings into the Pinecone index
index = pc.Index("rag-restuarant")
upsert_response = index.upsert(
    vectors=processed_data,
    namespace="ns1",
)

In [None]:
index.describe_index_stats()