In [None]:
import os 
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec

load_dotenv()

In [None]:

try:
    pc = Pinecone(api_key = os.getenv('PINECONE_API_KEY'))
    pc.create_index(
        name = 'rag',
        dimension = 384,
        metric = 'cosine',
        spec = ServerlessSpec(
            cloud = 'aws',
            region = 'us-east-1'
        )
    )
except:
    pass 

In [None]:
import json 
data = json.load(open('reviews.json'))
data['reviews']

In [None]:
from sentence_transformers import SentenceTransformer

processed_data = [] 
for review in data['reviews']:
    model = SentenceTransformer("all-MiniLM-L6-v2")
    sentence = review['review']
    
    embeddings = model.encode(sentence)
    print(embeddings.shape)
    processed_data.append( {
        'values': embeddings,
        'id': review['professor'],
        'metadata': {
            'review': review['review'],
             'subject': review['subject'],
             'stars' : review['stars']  
        }
    })

In [None]:
index = pc.Index('rag')
index.upsert(
    vectors = processed_data,
    namespace = "ns1"
)

In [None]:
index.describe_index_stats()