Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 26 additions & 13 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: JavaScript CI
name: CI
on:
push:
branches: [main]
Expand Down Expand Up @@ -33,6 +33,11 @@ jobs:
with:
poetry-version: '1.6.1'


- run: |
sudo apt-get update
sudo apt-get install -y llvm-11 python3-dev
name: "LLVM install"

- uses: actions/checkout@v3

Expand Down Expand Up @@ -61,20 +66,28 @@ jobs:
repo-token: ${{ secrets.GITHUB_TOKEN }}
server-token: ${{ secrets.GITHUB_TOKEN }}

- run: yarn check
name: Static Checks

- run: yarn build
name: Build

- run: yarn start:azurite&
name: Start Azurite

- run: yarn ci
name: Verify
- run: yarn test:unit
name: Unit Tests
env:
GRAPHRAG_API_KEY: $(openaiApiKey)
GRAPHRAG_LLM_MODEL: $(completionModel)
GRAPHRAG_EMBEDDING_MODEL: $(embeddingModel)
GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }}

- run: |
git add -A
git status
name: Git status
- run: yarn test:integration
name: Integration Tests
env:
GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }}

- run: yarn is_clean
name: Check if repo is clean
- run: yarn test:smoke
name: Smoke Tests
env:
GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHRAG_LLM_MODEL: ${{ secrets.OPENAI_LLM_MODEL }}
GRAPHRAG_EMBEDDING_MODEL: ${{ secrets.OPENAI_EMBEDDING_MODEL }}
48 changes: 0 additions & 48 deletions .github/workflows/codeql.yml

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Python-Publish-CI
name: Python Publish
on:
push:
branches: [main]
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,6 @@
"devDependencies": {
"cspell": "^8.3.2",
"npm-run-all": "^4.1.5",
"turbo": "^1.12.4"
"turbo": "^1.13.0"
}
}
4 changes: 2 additions & 2 deletions python/graphrag/graphrag/index/default_config/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def load_pipeline_config(config_or_path: str | PipelineConfig) -> PipelineConfig
for extended_config in config.extends:
extended_config = load_pipeline_config(extended_config)
merged_config = {
**json.loads(extended_config.json()),
**json.loads(config.json(exclude_unset=True)),
**json.loads(extended_config.model_dump_json()),
**json.loads(config.model_dump_json(exclude_unset=True)),
}
config = PipelineConfig.model_validate(merged_config)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,20 +115,23 @@ def default_config_parameters_from_env_vars(
root_dir = root_dir or str(Path.cwd())
env = _make_env(root_dir)

def _str(key: str, default_value: str | None = None) -> str | None:
return env(key, default_value)
def _key(key: str | Fragment) -> str | None:
return key.value if isinstance(key, Fragment) else key

def _int(key: str, default_value: int | None = None) -> int | None:
return env.int(key, default_value)
def _str(key: str | Fragment, default_value: str | None = None) -> str | None:
return env(_key(key), default_value)

def _bool(key: str, default_value: bool | None = None) -> bool | None:
return env.bool(key, default_value)
def _int(key: str | Fragment, default_value: int | None = None) -> int | None:
return env.int(_key(key), default_value)

def _float(key: str, default_value: float | None = None) -> float | None:
return env.float(key, default_value)
def _bool(key: str | Fragment, default_value: bool | None = None) -> bool | None:
return env.bool(_key(key), default_value)

def section(key: str):
return env.prefixed(f"{key}_")
def _float(key: str | Fragment, default_value: float | None = None) -> float | None:
return env.float(_key(key), default_value)

def section(key: Section):
return env.prefixed(f"{key.value}_")

fallback_oai_key = _str("OPENAI_API_KEY", _str("AZURE_OPENAI_API_KEY"))
fallback_oai_org = _str("OPENAI_ORG_ID")
Expand Down
216 changes: 108 additions & 108 deletions python/graphrag/graphrag/vector_stores/qdrant.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,111 +3,111 @@
# Licensed under the MIT license. See LICENSE file in the project.
#

"""A package containing the Qdrant vector store implementation."""
from typing import Any
from qdrant_client import QdrantClient # type: ignore
from qdrant_client.http import models # type: ignore
from qdrant_client.models import Distance, VectorParams # type: ignore
from graphrag.model.types import TextEmbedder
from .base import BaseVectorStore, VectorStoreDocument, VectorStoreSearchResult
class Qdrant(BaseVectorStore):
"""The Qdrant vector storage implementation."""
def connect(self, **kwargs: Any) -> Any:
"""Connect to the Qdrant vector store."""
url = kwargs.get("url", None)
port = kwargs.get("port", 6333)
api_key = kwargs.get("api_key", None)
timeout = kwargs.get("timeout", 1000)
self.vector_size = kwargs.get("vector_size", 1536)
if url:
https = kwargs.get("https", "https://" in url)
self.db_connection = QdrantClient(
url=url, port=port, api_key=api_key, https=https, timeout=timeout
)
else:
# create in-memory db
self.db_connection = QdrantClient(":memory:")
def load_documents(
self, documents: list[VectorStoreDocument], overwrite: bool = True
) -> None:
"""Load documents into the vector store."""
if overwrite:
self.db_connection.recreate_collection(
collection_name=self.collection_name,
vectors_config=VectorParams(
size=(
len(documents[0].vector)
if len(documents) > 0 and documents[0].vector
else self.vector_size
),
distance=Distance.COSINE,
),
)
self.db_connection.upsert(
collection_name=self.collection_name,
points=models.Batch(
ids=[doc.id for doc in documents],
vectors=[doc.vector if doc.vector else [] for doc in documents],
payloads=[{"text": doc.text, **doc.attributes} for doc in documents],
),
)
def filter_by_id(self, include_ids: list[str] | list[int]) -> Any:
"""Build a query filter to filter documents by id."""
self.query_filter = models.Filter(
must=[
models.HasIdCondition(has_id=include_ids), # type: ignore
],
)
return self.query_filter
def similarity_search_by_vector(
self, query_embedding: list[float], k: int = 10, **kwargs: Any
) -> list[VectorStoreSearchResult]:
"""Perform a vector-based similarity search."""
docs = self.db_connection.search(
collection_name=self.collection_name,
query_filter=self.query_filter,
query_vector=query_embedding,
limit=k,
with_vectors=True,
)
return [
VectorStoreSearchResult(
document=VectorStoreDocument(
id=doc.id,
text=doc.payload["text"] if doc.payload else "",
vector=doc.vector if doc.vector else [], # type: ignore
attributes=(
{k: v for k, v in doc.payload.items() if k != "text"}
if doc.payload
else {}
),
),
score=1 - abs(doc.score),
)
for doc in docs
]
def similarity_search_by_text(
self, text: str, text_embedder: TextEmbedder, k: int = 10, **kwargs: Any
) -> list[VectorStoreSearchResult]:
"""Perform a text-based similarity search."""
query_embedding = text_embedder(text)
if query_embedding:
return self.similarity_search_by_vector(
query_embedding=query_embedding, k=k
)
return []
"""A package containing the Qdrant vector store implementation."""

from typing import Any

from qdrant_client import QdrantClient # type: ignore
from qdrant_client.http import models # type: ignore
from qdrant_client.models import Distance, VectorParams # type: ignore

from graphrag.model.types import TextEmbedder

from .base import BaseVectorStore, VectorStoreDocument, VectorStoreSearchResult


class Qdrant(BaseVectorStore):
"""The Qdrant vector storage implementation."""

def connect(self, **kwargs: Any) -> Any:
"""Connect to the Qdrant vector store."""
url = kwargs.get("url", None)
port = kwargs.get("port", 6333)

api_key = kwargs.get("api_key", None)
timeout = kwargs.get("timeout", 1000)
self.vector_size = kwargs.get("vector_size", 1536)

if url:
https = kwargs.get("https", "https://" in url)
self.db_connection = QdrantClient(
url=url, port=port, api_key=api_key, https=https, timeout=timeout
)
else:
# create in-memory db
self.db_connection = QdrantClient(":memory:")

def load_documents(
self, documents: list[VectorStoreDocument], overwrite: bool = True
) -> None:
"""Load documents into the vector store."""
if overwrite:
self.db_connection.recreate_collection(
collection_name=self.collection_name,
vectors_config=VectorParams(
size=(
len(documents[0].vector)
if len(documents) > 0 and documents[0].vector
else self.vector_size
),
distance=Distance.COSINE,
),
)

self.db_connection.upsert(
collection_name=self.collection_name,
points=models.Batch(
ids=[doc.id for doc in documents],
vectors=[doc.vector if doc.vector else [] for doc in documents],
payloads=[{"text": doc.text, **doc.attributes} for doc in documents],
),
)

def filter_by_id(self, include_ids: list[str] | list[int]) -> Any:
"""Build a query filter to filter documents by id."""
self.query_filter = models.Filter(
must=[
models.HasIdCondition(has_id=include_ids), # type: ignore
],
)
return self.query_filter

def similarity_search_by_vector(
self, query_embedding: list[float], k: int = 10, **kwargs: Any
) -> list[VectorStoreSearchResult]:
"""Perform a vector-based similarity search."""
docs = self.db_connection.search(
collection_name=self.collection_name,
query_filter=self.query_filter,
query_vector=query_embedding,
limit=k,
with_vectors=True,
)

return [
VectorStoreSearchResult(
document=VectorStoreDocument(
id=doc.id,
text=doc.payload["text"] if doc.payload else "",
vector=doc.vector if doc.vector else [], # type: ignore
attributes=(
{k: v for k, v in doc.payload.items() if k != "text"}
if doc.payload
else {}
),
),
score=1 - abs(doc.score),
)
for doc in docs
]

def similarity_search_by_text(
self, text: str, text_embedder: TextEmbedder, k: int = 10, **kwargs: Any
) -> list[VectorStoreSearchResult]:
"""Perform a text-based similarity search."""
query_embedding = text_embedder(text)
if query_embedding:
return self.similarity_search_by_vector(
query_embedding=query_embedding, k=k
)
return []
Loading