In [1]:
import dotenv

dotenv.load_dotenv()

True

In [22]:
from typing import List
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

class PDFParser:
    def load_documents(self, source_uris: List[str]):
        docs = []
        for source_uri in source_uris:
            loader = PDFPlumberLoader(source_uri)
            docs.extend(loader.load())

        return docs
    
    def trim_path(self, path):
        return path.lstrip('./').rsplit('.', 1)[0]
    
    def format_docs(self, docs):
        return "\n".join(
            [
                f"<document><content>{doc.page_content}</content><source>{self.trim_path((doc.metadata['source']))}</source></document>"
                for doc in docs
            ]
        )

    def create_text_splitter(self):
        return RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)




In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer


In [27]:
import os
from openai import OpenAI
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance, SparseVectorParams, SparseIndexParams, SparseVector
from konlpy.tag import Okt
import re

class QdrantHandler:
    def __init__(self, host="localhost", port=6333):
        self.client = QdrantClient(host=host, port=port)
        self.openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

    def create_collection(self, name: str, dim: int, distance: Distance = Distance.COSINE):
        self.client.recreate_collection(
            collection_name=name,
            vectors_config={
                "dense": VectorParams(size=dim, distance=distance)
            },
            sparse_vectors_config={
                "sparse": SparseVectorParams(
                    index=SparseIndexParams(
                        on_disk=False
                    )
                )
            }
        )
        print(f"컬렉션 '{name}' 생성 완료 (dim={dim}, distance={distance.value})")

    def get_dense_embedding(self, text: str) -> list:
        """
        OpenAI text-embedding-3-small로 임베딩 생성
        """
        response = self.openai_client.embeddings.create(
            model="text-embedding-3-small",
            input=text
        )
        return response.data[0].embedding
    
    def get_sparse_embedding(self, texts: list[str]) -> list[SparseVector]:
        vectorizer = TfidfVectorizer(tokenizer=self.tokenize_okt)
        tfidf = vectorizer.fit_transform(texts)

        sparse_vectors = []
        for i in range(tfidf.shape[0]):
            row = tfidf.getrow(i)
            sparse_vectors.append(
                SparseVector(indices=row.indices.tolist(), values=row.data.tolist())
            )
        return sparse_vectors
    
    def tokenize_okt(self, text):
        tokenizer = Okt()
        return tokenizer.morphs(text)
    
    def hierarchical_split(self, text: str) -> list[str]:
        chunks = []

        # 1. 문단 단위 분할 (\n\n 또는 \r\n\r\n)
        paragraphs = re.split(r'\n\s*\n', text.strip())
        
        for para in paragraphs:
            para = para.strip()
            if not para:
                continue

            # 2. 문장 단위 분할 (마침표, 물음표, 느낌표 뒤에 띄어쓰기)
            sentences = re.split(r'(?<=[.!?])\s+', para)

            for sent in sentences:
                sent = sent.strip()
                if not sent:
                    continue

                # 3. 쉼표 단위 분할
                comma_parts = [p.strip() for p in sent.split(',') if p.strip()]
                chunks.extend(comma_parts)

        return chunks

    def upsert_texts(self, collection_name: str, texts: list):
        """
        dense, sparse 기반 적재
        """
        points = []
        for item in texts:
            dense_vector = self.get_dense_embedding(item["text"])
            
            text_item: str = item["text"]
            text_list = self.hierarchical_split(text_item)
            sparse_vector = self.get_sparse_embedding(text_list)
            points.append(PointStruct(
                id=item["id"],
                vector={"dense": dense_vector, "sparse": sparse_vector},
                payload=item.get("payload", {"text": item["text"]})
            ))

        self.client.upsert(collection_name=collection_name, points=points)
        print(f"{len(points)}개 텍스트 업로드 완료")

    def search_text(self, collection_name: str, query: str, limit: int = 3):
        """
        쿼리 텍스트를 임베딩 후 Qdrant에서 검색
        """
        query_vector = self.get_dense_embedding(query)
        results = self.client.search(
            collection_name=collection_name,
            query_vector=query_vector,
            limit=limit
        )
        return [
            {"id": r.id, "score": r.score, "payload": r.payload}
            for r in results
        ]

In [28]:
qdrant = QdrantHandler()
parser = PDFParser()

In [9]:
qdrant.create_collection(name="law_collection_with_sparse", dim=1536)

  self.client.recreate_collection(


컬렉션 'law_collection_with_sparse' 생성 완료 (dim=1536, distance=Cosine)


In [10]:
docs = parser.load_documents(["./pdf/law.pdf"])

In [11]:
docs

[Document(metadata={'source': './pdf/law.pdf', 'file_path': './pdf/law.pdf', 'page': 0, 'total_pages': 24, 'Producer': 'iText 2.1.7 by 1T3XT', 'ModDate': "D:20250806152245+09'00'", 'CreationDate': "D:20250806152245+09'00'"}, page_content='근로기준법\n근로기준법\n[시행 2025. 2. 23.] [법률 제20520호, 2024. 10. 22., 일부개정]\n고용노동부 (근로기준정책과 - 해고, 취업규칙, 기타) 044-202-7534\n고용노동부 (근로기준정책과 - 소년) 044-202-7535\n고용노동부 (근로기준정책과 - 임금) 044-202-7548\n고용노동부 (여성고용정책과 - 여성) 044-202-7475\n고용노동부 (임금근로시간정책과 - 근로시간, 휴게) 044-202-7545\n고용노동부 (임금근로시간정책과 - 휴일, 연차휴가) 044-202-7973\n고용노동부 (임금근로시간정책과 - 제63조 적용제외, 특례업종) 044-202-7530\n고용노동부 (임금근로시간정책과 - 유연근로시간제) 044-202-7549\n제1장 총칙\n제1조(목적) 이 법은 헌법에 따라 근로조건의 기준을 정함으로써 근로자의 기본적 생활을 보장, 향상시키며 균형 있는\n국민경제의 발전을 꾀하는 것을 목적으로 한다.\n제2조(정의) ① 이 법에서 사용하는 용어의 뜻은 다음과 같다. <개정 2018. 3. 20., 2019. 1. 15., 2020. 5. 26.>\n1. “근로자”란 직업의 종류와 관계없이 임금을 목적으로 사업이나 사업장에 근로를 제공하는 사람을 말한다.\n2. “사용자”란 사업주 또는 사업 경영 담당자, 그 밖에 근로자에 관한 사항에 대하여 사업주를 위하여 행위하는 자를\n말한다.\n3. “근로”란 정신노동과 육체노동을 말한다.\n4. “근로계약”이란 근로자가 사용자에

In [12]:
# qdrant.create_collection(name="law_collection", dim=1536)
splitter = parser.create_text_splitter()
split_docs = splitter.split_documents(docs)

In [13]:
from langchain_core.documents import Document
from tqdm.auto import tqdm

def preprocess_documents(
    split_docs: List[Document],
    metadata_keys: List[str] = ["source", "page"],
    min_length: int = 2,
    use_basename: bool = False,
) -> tuple:
    """문서를 전처리하고 내용과 메타데이터를 반환합니다."""
    contents = []
    metadatas = {key: [] for key in metadata_keys}
    for doc in tqdm(split_docs):
        content = doc.page_content.strip()
        if content and len(content) >= min_length:
            contents.append(content)
            for k in metadata_keys:
                value = doc.metadata.get(k)
                if k == "source" and use_basename:
                    value = os.path.basename(value)
                try:
                    metadatas[k].append(int(value))
                except (ValueError, TypeError):
                    metadatas[k].append(value)
    return contents, metadatas

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
contents, metadatas = preprocess_documents(
    split_docs=split_docs,
    metadata_keys=["source", "page", "total_pages"],
    use_basename=True,
)

100%|██████████| 116/116 [00:00<00:00, 451335.12it/s]


In [29]:
# # 컬렉션 생성 (최초 1회만)
# qdrant.create_collection(name="law_collection", dim=1536)

# contents와 metadatas를 Qdrant 형식으로 변환
texts_for_qdrant = []
for i, content in enumerate(contents):
    # metadata에서 각 인덱스에 해당하는 값들을 가져와서 payload 구성
    payload = {"text": content}
    for key, values in metadatas.items():
        if i < len(values):
            payload[key] = values[i]
    
    texts_for_qdrant.append({
        "id": i + 1,  # Qdrant ID는 1부터 시작
        "text": content,
        "payload": payload
    })

print(f"총 {len(texts_for_qdrant)}개 문서 준비 완료")
print(f"첫 번째 문서 예시: {texts_for_qdrant[0]['payload']}")

총 116개 문서 준비 완료
첫 번째 문서 예시: {'text': '근로기준법\n근로기준법\n[시행 2025. 2. 23.] [법률 제20520호, 2024. 10. 22., 일부개정]\n고용노동부 (근로기준정책과 - 해고, 취업규칙, 기타) 044-202-7534\n고용노동부 (근로기준정책과 - 소년) 044-202-7535\n고용노동부 (근로기준정책과 - 임금) 044-202-7548\n고용노동부 (여성고용정책과 - 여성) 044-202-7475\n고용노동부 (임금근로시간정책과 - 근로시간, 휴게) 044-202-7545\n고용노동부 (임금근로시간정책과 - 휴일, 연차휴가) 044-202-7973\n고용노동부 (임금근로시간정책과 - 제63조 적용제외, 특례업종) 044-202-7530\n고용노동부 (임금근로시간정책과 - 유연근로시간제) 044-202-7549\n제1장 총칙\n제1조(목적) 이 법은 헌법에 따라 근로조건의 기준을 정함으로써 근로자의 기본적 생활을 보장, 향상시키며 균형 있는\n국민경제의 발전을 꾀하는 것을 목적으로 한다.', 'source': 'law.pdf', 'page': 0, 'total_pages': 24}


In [30]:
# Qdrant에 임베딩 및 적재
qdrant.upsert_texts(collection_name="law_collection_with_sparse", texts=texts_for_qdrant)

ValidationError: 57 validation errors for PointStruct
vector.list[float]
  Input should be a valid list [type=list_type, input_value={'dense': [0.040354952216... 0.24201912274416126])]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/list_type
vector.list[list[float]]
  Input should be a valid list [type=list_type, input_value={'dense': [0.040354952216... 0.24201912274416126])]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/list_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[float].0
  Input should be a valid number [type=float_type, input_value=SparseVector(indices=[36,...62, 0.2007692554577865]), input_type=SparseVector]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[float].1
  Input should be a valid number [type=float_type, input_value=SparseVector(indices=[4, ...13, 0.8670018176976244]), input_type=SparseVector]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[float].2
  Input should be a valid number [type=float_type, input_value=SparseVector(indices=[25,...86, 0.3975129908830186]), input_type=SparseVector]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[float].3
  Input should be a valid number [type=float_type, input_value=SparseVector(indices=[4, ...13, 0.8670018176976244]), input_type=SparseVector]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[float].4
  Input should be a valid number [type=float_type, input_value=SparseVector(indices=[4, ...13, 0.8670018176976244]), input_type=SparseVector]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[float].5
  Input should be a valid number [type=float_type, input_value=SparseVector(indices=[4, ...13, 0.8670018176976244]), input_type=SparseVector]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[float].6
  Input should be a valid number [type=float_type, input_value=SparseVector(indices=[0, ...17, 0.3672120768132307]), input_type=SparseVector]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[float].7
  Input should be a valid number [type=float_type, input_value=SparseVector(indices=[79], values=[1.0]), input_type=SparseVector]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[float].8
  Input should be a valid number [type=float_type, input_value=SparseVector(indices=[0, ...63, 0.1464868747826887]), input_type=SparseVector]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[float].9
  Input should be a valid number [type=float_type, input_value=SparseVector(indices=[0, ...7, 0.34658952267039717]), input_type=SparseVector]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[float].10
  Input should be a valid number [type=float_type, input_value=SparseVector(indices=[0, ...65, 0.2897700001755165]), input_type=SparseVector]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[float].11
  Input should be a valid number [type=float_type, input_value=SparseVector(indices=[0, ...5, 0.13882178768984135]), input_type=SparseVector]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[float].12
  Input should be a valid number [type=float_type, input_value=SparseVector(indices=[0, ...6, 0.24201912274416126]), input_type=SparseVector]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.SparseVector
  Input should be a valid dictionary or instance of SparseVector [type=model_type, input_value=[SparseVector(indices=[36..., 0.24201912274416126])], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/model_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].0.0
  Input should be a valid number [type=float_type, input_value=('indices', [36, 0, 25, 54, 12, 4]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].0.1
  Input should be a valid number [type=float_type, input_value=('values', [0.69863765425...62, 0.2007692554577865]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].1.0
  Input should be a valid number [type=float_type, input_value=('indices', [4, 10]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].1.1
  Input should be a valid number [type=float_type, input_value=('values', [0.49830497499...13, 0.8670018176976244]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].2.0
  Input should be a valid number [type=float_type, input_value=('indices', [25, 15, 5, 47, 75, 13, 86]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].2.1
  Input should be a valid number [type=float_type, input_value=('values', [0.34280064744...86, 0.3975129908830186]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].3.0
  Input should be a valid number [type=float_type, input_value=('indices', [4, 11]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].3.1
  Input should be a valid number [type=float_type, input_value=('values', [0.49830497499...13, 0.8670018176976244]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].4.0
  Input should be a valid number [type=float_type, input_value=('indices', [4, 8]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].4.1
  Input should be a valid number [type=float_type, input_value=('values', [0.49830497499...13, 0.8670018176976244]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].5.0
  Input should be a valid number [type=float_type, input_value=('indices', [4, 14]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].5.1
  Input should be a valid number [type=float_type, input_value=('values', [0.49830497499...13, 0.8670018176976244]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].6.0
  Input should be a valid number [type=float_type, input_value=('indices', [0, 67, 27, 2... 35, 40, 73, 32, 3, 83]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].6.1
  Input should be a valid number [type=float_type, input_value=('values', [0.19440842508...17, 0.3672120768132307]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].7.0
  Input should be a valid number [type=float_type, input_value=('indices', [79]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].7.1
  Input should be a valid number [type=float_type, input_value=('values', [1.0]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].8.0
  Input should be a valid number [type=float_type, input_value=('indices', [0, 31, 1, 35...57, 50, 30, 58, 17, 37]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].8.1
  Input should be a valid number [type=float_type, input_value=('values', [0.22512914559...63, 0.1464868747826887]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].9.0
  Input should be a valid number [type=float_type, input_value=('indices', [0, 31, 1, 73... 6, 68, 37, 88, 21, 89]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].9.1
  Input should be a valid number [type=float_type, input_value=('values', [0.18349048821...7, 0.34658952267039717]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].10.0
  Input should be a valid number [type=float_type, input_value=('indices', [0, 75, 31, 1...59, 87, 24, 16, 72, 76]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].10.1
  Input should be a valid number [type=float_type, input_value=('values', [0.15340925020...65, 0.2897700001755165]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].11.0
  Input should be a valid number [type=float_type, input_value=('indices', [0, 75, 31, 1...62, 38, 39, 71, 49, 48]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].11.1
  Input should be a valid number [type=float_type, input_value=('values', [0.22048396675...5, 0.13882178768984135]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].12.0
  Input should be a valid number [type=float_type, input_value=('indices', [0, 4, 44, 65...45, 42, 81, 28, 61, 82]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.list[list[float]].12.1
  Input should be a valid number [type=float_type, input_value=('values', [0.12812910975...6, 0.24201912274416126]), input_type=tuple]
    For further information visit https://errors.pydantic.dev/2.11/v/float_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.Document
  Input should be a valid dictionary or instance of Document [type=model_type, input_value=[SparseVector(indices=[36..., 0.24201912274416126])], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/model_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.Image
  Input should be a valid dictionary or instance of Image [type=model_type, input_value=[SparseVector(indices=[36..., 0.24201912274416126])], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/model_type
vector.dict[str,union[list[float],SparseVector,list[list[float]],Document,Image,InferenceObject]].sparse.InferenceObject
  Input should be a valid dictionary or instance of InferenceObject [type=model_type, input_value=[SparseVector(indices=[36..., 0.24201912274416126])], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/model_type
vector.Document.text
  Field required [type=missing, input_value={'dense': [0.040354952216... 0.24201912274416126])]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing
vector.Document.model
  Field required [type=missing, input_value={'dense': [0.040354952216... 0.24201912274416126])]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing
vector.Document.dense
  Extra inputs are not permitted [type=extra_forbidden, input_value=[0.040354952216148376, 0...., -0.011777667328715324], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/extra_forbidden
vector.Document.sparse
  Extra inputs are not permitted [type=extra_forbidden, input_value=[SparseVector(indices=[36..., 0.24201912274416126])], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/extra_forbidden
vector.Image.image
  Field required [type=missing, input_value={'dense': [0.040354952216... 0.24201912274416126])]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing
vector.Image.model
  Field required [type=missing, input_value={'dense': [0.040354952216... 0.24201912274416126])]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing
vector.Image.dense
  Extra inputs are not permitted [type=extra_forbidden, input_value=[0.040354952216148376, 0...., -0.011777667328715324], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/extra_forbidden
vector.Image.sparse
  Extra inputs are not permitted [type=extra_forbidden, input_value=[SparseVector(indices=[36..., 0.24201912274416126])], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/extra_forbidden
vector.InferenceObject.object
  Field required [type=missing, input_value={'dense': [0.040354952216... 0.24201912274416126])]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing
vector.InferenceObject.model
  Field required [type=missing, input_value={'dense': [0.040354952216... 0.24201912274416126])]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/missing
vector.InferenceObject.dense
  Extra inputs are not permitted [type=extra_forbidden, input_value=[0.040354952216148376, 0...., -0.011777667328715324], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/extra_forbidden
vector.InferenceObject.sparse
  Extra inputs are not permitted [type=extra_forbidden, input_value=[SparseVector(indices=[36..., 0.24201912274416126])], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/extra_forbidden

In [25]:
# 검색 테스트
query = "법률 조항"
results = qdrant.search_text("law_collection_with_sparse", query, limit=3)

print(f"검색 쿼리: '{query}'")
print("=" * 50)
for i, result in enumerate(results, 1):
    print(f"{i}. 점수: {result['score']:.4f}")
    print(f"   소스: {result['payload'].get('source', 'N/A')}")
    print(f"   페이지: {result['payload'].get('page', 'N/A')}")
    print(f"   내용: {result['payload']['text'][:100]}...")
    print("-" * 30)

AttributeError: 'QdrantHandler' object has no attribute 'get_embedding'

In [None]:
from langchain_qdrant import QdrantVectorStore
vector_store = QdrantVectorStore.add_documents(documents="law_document")