In [1]:
from sentence_transformers import SentenceTransformer
import json
from pinecone import Pinecone
from constants import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import os

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
ENCODE_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"

In [3]:
def get_transform(json_file):
    with open(json_file) as f:
        data = json.load(f)
    id_list = []
    summary_list = []
    metadata_list = []
    for cafe in data:
        summary_list.append(cafe['summary'])
        summary = cafe.pop("summary")
        hash_id = hash(summary)
        id_list.append(hash_id)
        metadata_list.append(cafe)
    return summary_list, metadata_list, id_list

In [4]:
def SentenceTransformer_model(summary_list):
    model = SentenceTransformer(ENCODE_MODEL_NAME)
    embeddings = model.encode(summary_list)
    embeddings = embeddings.tolist()
    return embeddings

In [5]:
def convert_to_vector(id_list: list[str], embeddings, metadata_list):
    vector_list = []
    for doc_id, embedding, metadata in zip(id_list, embeddings, metadata_list):
        vector_dict = {
            "id": str(doc_id),
            "values": embedding,
            "metadata": metadata
        }
        vector_list.append(vector_dict)
    return vector_list

In [6]:
def process_data(FILE_PATH):
    summary_list, metadata_list, id_list = get_transform(FILE_PATH)
    embeddings = SentenceTransformer_model(summary_list)
    vector_list = convert_to_vector(id_list, embeddings, metadata_list)
    pc = Pinecone(api_key=PINECONE_API_KEY)
    index = pc.Index('items')
    upsert_response = index.upsert(
        vectors= vector_list
    )

In [6]:
process_data('summaries/cafe_summary.json')

In [7]:
process_data('summaries/tandoor_summary.json')

In [11]:
process_data('summaries/ginger_and_soy.json')

In [9]:
process_data('summaries/il_forno_summary.json')

In [10]:
process_data('summaries/panera_summary.json')

In [7]:
process_data('summaries/devil_summary.json')

In [8]:
process_data('summaries/JB_summary.json')

In [10]:
process_data('summaries/sazon_summary.json')