In [None]:
!pip install langchain langchain-community langchain-pinecone langchain-openai pypdf

In [None]:
from google.colab import userdata
import os

os.environ['LANGSMITH_TRACING'] = userdata.get('LANGSMITH_TRACING')
os.environ['LANGSMITH_ENDPOINT'] = userdata.get('LANGSMITH_ENDPOINT')
os.environ['LANGSMITH_API_KEY'] = userdata.get('LANGSMITH_API_KEY')
os.environ['LANGSMITH_PROJECT'] = userdata.get('LANGSMITH_PROJECT')
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
os.environ['OPENAI_EMBEDDING_MODEL'] = userdata.get('OPENAI_EMBEDDING_MODEL')
os.environ['PINECONE_API_KEY'] = userdata.get('PINECONE_API_KEY')

In [None]:
def extract_product_info(api_response):
    items = api_response['body']['items']

    simplified_products = []

    for entry in items:
        item = entry['item']
        simplified_products.append({
            '제품명': item.get('PRDUCT', '').strip(),
            '기능성': item.get('MAIN_FNCTN', '').strip(),
            '제조사': item.get('ENTRPS', '').strip(),
            '섭취방법': item.get('SRV_USE', '').strip(),
            '보관방법': item.get('PRSRV_PD', '').strip(),
            '주의사항': item.get('INTAKE_HINT1', '').strip(),
        })

    return simplified_products

product_list = extract_product_info(response.json())

# 출력문 (예쁘게 정리)
for i, p in enumerate(product_list[:3], 1):  # TOP3만 예시
    print(f"[{i}] {p['제품명']}")
    print(f"기능성: {p['기능성']}")
    print(f"제조사: {p['제조사']}")
    print(f"섭취방법: {p['섭취방법']}")
    print(f"보관방법: {p['보관방법']}")
    print(f"주의사항: {p['주의사항']}")
    print("-" * 50)

In [None]:
print(len(product_list))

# document생성

In [None]:
from langchain.schema import Document

def convert_to_documents(product_list):
    docs = []

    for product in product_list:
        content = (
            f"제품명: {product['제품명']}\n"
            f"기능성: {product['기능성']}\n"
            f"섭취방법: {product['섭취방법']}\n"
            f"보관방법: {product['보관방법']}\n"
            f"주의사항: {product['주의사항']}"
        )

        metadata = {
            "제품명": product["제품명"], # 중복으로 넣음
            "제조사": product["제조사"],
        }

        docs.append(Document(page_content=content, metadata=metadata))

    return docs

documents = convert_to_documents(product_list)

# 임베딩

In [None]:
from pinecone import Pinecone
from langchain_openai import OpenAIEmbeddings
from langchain_pinecone import PineconeVectorStore

pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])

# 임베딩 모델
embeddings = OpenAIEmbeddings(model=os.environ['OPENAI_EMBEDDING_MODEL'])

# 벡터스토어 객체(client) 생성
vector_store = PineconeVectorStore.from_documents(
    documents,
    embedding=embeddings,
    index_name='healthcare'
)

# Pinecone 저장

In [None]:
from langchain.vectorstore import pinecone

