In [None]:
import pandas as pd
import chromadb
from chromadb.utils import embedding_functions
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document

In [None]:
# 1. 데이터 로드

ingredients_df = pd.read_csv("ingredients.csv")
cosmetics_df = pd.read_csv("cosmetics.csv")

In [None]:
# 2. 임베딩 모델 준비
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

In [None]:
# 3. CSV -> LangChain Document로 변환
documents = []
cosmetics = []

In [None]:
# 성분 Document
for _, row in ingredients_df.iterrows():
    content = row['description']
    metadata = {
        "type": "ingredient",
        "name": row['ingredient']
    }
    documents.append(Document(page_content=content, metadata=metadata))

In [None]:
# 화장품 Document
for _, row in cosmetics_df.iterrows():
    content = f"제품명: {row['product_name']}, 성분: {row['ingredient']}, 리뷰: {row['reviews']}, 사용법: {row['usage']}"
    metadata = {
        "type": "cosmetic",
        "product_name": row['product_name']
    }
    cosmetics.append(Document(page_content=content, metadata=metadata))
    documents.append(Document(page_content=content, metadata=metadata))

In [None]:
# 4. Chroma 벡터 DB로 저장

vectordb = Chroma.from_documents(
    documents=documents,
    embedding=embedding_model,
    persist_directory="./chroma_db"
)

vectordb.persist()
print("ChromaDB에 저장 완료")

In [None]:
cosmetic_vectordb = Chroma.from_documents(
    documents=cosmetics,
    embedding=embedding_model,
    persist_directory="./cosmetic_chroma_db"
)

cosmetic_vectordb.persist()
print("ChromaDB에 저장 완료")