# Pinecone 환경 설정 확인

In [7]:
from dotenv import load_dotenv
import os
import pinecone
from pinecone import Pinecone as PineconeClient
from pinecone import ServerlessSpec

In [8]:
pinecone_api_key = os.getenv("PINECONE_API_KEY")
pinecone_environment = os.getenv("PINECONE_ENVIRONMENT")
print(pinecone_api_key[:10])
print(pinecone_environment[:10])

pcsk_2p7mV
us-east-1-


In [None]:
# 파인코인 서비스 서버의 연결 설정
pc = PineconeClient(api_key=pinecone_api_key, environment=pinecone_environment)

In [None]:
# 서비스 서버 인덱스 유무에 따라서 인덱스 생성
index_name = "example-index"
embedding_dim = 1536 # text-embedding-3-small의 벡터 차원
if not pc.has_index(index_name):
  pc.create_index(name=index_name, dimension=embedding_dim,
                  spec=ServerlessSpec(cloud="aws", region="us-east-1"))

In [None]:
# 서비스 서버의 인덱스와 연결 (받아오는)
index = pc.Index(index_name)

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
# OpenAI의 임베딩 모델 설정 : text-embedding-3-small
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [13]:
from langchain_core.documents import Document
# 예시 문서 생성 (내용과 메타데이터)
doc1 = Document(page_content="Building an exciting new project with LangChain - come check it out!",
                metadata={"source": "tweet"})
doc2 = Document(page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
                metadata={"source": "news"})
doc3 = Document(page_content="LangGraph is the best framework for building stateful, agentic applications!",
                metadata={"source": "tweet"})

In [None]:
from langchain_pinecone import PineconeVectorStore
vector_store = PineconeVectorStore(index=index, embedding=embeddings)
vector_store.add_documents([doc1, doc2, doc3])

In [16]:
print(f"현재 벡터 DB 내 벡터 수: {index.describe_index_stats().total_vector_count}")

현재 벡터 DB 내 벡터 수: 3


In [17]:
# 쿼리 해보기
query1 = "LangChain은 LLM을 쉽게 작업할 수 있도록 추상화를 제공합니다."
results1 = vector_store.similarity_search(query=query1, k=2)
print(f"쿼리 : {query1}")
for result in results1:
  print(f"* {result.page_content} (source: {result.metadata['source']})")

쿼리 : LangChain은 LLM을 쉽게 작업할 수 있도록 추상화를 제공합니다.
* Building an exciting new project with LangChain - come check it out! (source: tweet)
* LangGraph is the best framework for building stateful, agentic applications! (source: tweet)


In [18]:
results2 = vector_store.similarity_search(query=query1, k=2, filter={"source": "tweet"})
print(f"쿼리 : {query1}")
for result in results2:
  print(f"* {result.page_content} (source: {result.metadata['source']})")

쿼리 : LangChain은 LLM을 쉽게 작업할 수 있도록 추상화를 제공합니다.
* Building an exciting new project with LangChain - come check it out! (source: tweet)
* LangGraph is the best framework for building stateful, agentic applications! (source: tweet)


In [21]:
query3 = "Will it be hot tomorrow"
results3 = vector_store.similarity_search_with_score(query=query3, k=1, filter={"source": "news"})
print(f"쿼리 : {query3}")
for result, score in results3:
  print(f"* SIM=({score:.4f}) {result.page_content} (source: {result.metadata['source']})")

쿼리 : Will it be hot tomorrow
* SIM=(0.5440) The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. (source: news)


In [23]:
query3 = "Will it be hot tomorrw"
results4 = vector_store.similarity_search_with_score(query=query3, k=3)
print(f"쿼리 : {query3}")
for result, score in results4:
  print(f"* SIM=({score:.4f}) {result.page_content} (source: {result.metadata['source']})")

쿼리 : Will it be hot tomorrw
* SIM=(0.5091) The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. (source: news)
* SIM=(0.0756) Building an exciting new project with LangChain - come check it out! (source: tweet)
* SIM=(0.0406) LangGraph is the best framework for building stateful, agentic applications! (source: tweet)
