### Pinecone Vector DB

In [8]:
from dotenv import load_dotenv
load_dotenv()


# 파일 로드
from langchain_community.document_loaders import CSVLoader

loader = CSVLoader('./winemag-data-130k-v2.csv', encoding='utf-8')

documents = loader.load()   # 모든 행을 Document 객체 리스트로 변환

print(len(documents))
print(documents[0])

129971
page_content=': 0
country: Italy
description: Aromas include tropical fruit, broom, brimstone and dried herb. The palate isn't overly expressive, offering unripened apple, citrus and dried sage alongside brisk acidity.
designation: Vulkà Bianco
points: 87
price: 
province: Sicily & Sardinia
region_1: Etna
region_2: 
taster_name: Kerin O’Keefe
taster_twitter_handle: @kerinokeefe
title: Nicosia 2013 Vulkà Bianco  (Etna)
variety: White Blend
winery: Nicosia' metadata={'source': './winemag-data-130k-v2.csv', 'row': 0}


In [9]:
# 임베딩 모델 생성
from langchain_openai.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model='text-embedding-3-small')

In [10]:
%pip install langchain-pinecone

Note: you may need to restart the kernel to use updated packages.


In [18]:
# Pinecone 클라이언트 생성
from langchain_pinecone import PineconeVectorStore
import os

PINECONE_INDEX_NAME = os.getenv('PINECONE_INDEX_NAME')
PINECONE_NAMESPACE = os.getenv('PINECONE_NAMESPACE')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

vector_db = PineconeVectorStore.from_documents(
    documents[:30000],  # 업로드할 문서 리스트
    embeddings, # 사용할 임베딩 모델
    index_name=PINECONE_INDEX_NAME,
    namespace=PINECONE_NAMESPACE, # 데이터를 구분 보관할 네임스페이스
    pinecone_api_key=PINECONE_API_KEY
)

RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for text-embedding-3-small in organization org-tEkN7kkb6kFBT1jbUmEAuN0y on tokens per min (TPM): Limit 1000000, Used 982799, Requested 154363. Please try again in 8.229s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}

In [20]:
# 유사도 검색

# 클라이언트 객체 생성
vector_db = PineconeVectorStore(
    embedding=embeddings,
    index_name=PINECONE_INDEX_NAME,
    namespace=PINECONE_NAMESPACE,
    pinecone_api_key=PINECONE_API_KEY
)

results = vector_db.similarity_search(
    "소고기의 육즙과 향긋한 허브향이 어우러져 깊은 풍미를 내는 음식입니다.",
    k=10,
    namespace=PINECONE_NAMESPACE)
results

[Document(id='4e14a4f7-354a-4cd5-9fa1-1018ffe40979', metadata={'row': 78.0, 'source': './winemag-data-130k-v2.csv'}, page_content=": 78\ncountry: US\ndescription: Some rosés are made simply by bleeding the juice from the fermenter, to concentrate the remaining wine. Whether or not that is the case here, the wine has the simple pleasant lightly candied strawberry flavors one might expect from such a technique. It's fruity and undemanding. Drink up.\ndesignation: Rosé of\npoints: 86\nprice: 25.0\nprovince: Oregon\nregion_1: Eola-Amity Hills\nregion_2: Willamette Valley\ntaster_name: Paul Gregutt\ntaster_twitter_handle: @paulgwine\ntitle: Z'IVO 2015 Rosé of Pinot Noir (Eola-Amity Hills)\nvariety: Pinot Noir\nwinery: Z'IVO"),
 Document(id='200593a6-ae22-4277-aab4-3b5a14c564d4', metadata={'row': 805.0, 'source': './winemag-data-130k-v2.csv'}, page_content=': 805\ncountry: US\ndescription: Bone dry and replete with skin flavors of pear and grapefruit pith, this is perfect for those who want 