Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,5 @@ app/wkhtmltox/*.exe
**/dist/*

**.egg-info/
.env

12 changes: 1 addition & 11 deletions .vsts-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,4 @@ stages:
containerRegistry: '$(CONTAINER_REGISTRY)'
repository: 'intelligence-toolkit'
Dockerfile: '**/Dockerfile'
tags: '$(TAG)'
- script: |
docker build . -t intelligence-toolkit:$(TAG)
displayName: 'Build docker'
- script: |
docker save -o $(Build.ArtifactStagingDirectory)/intelligence-toolkit.tar intelligence-toolkit:$(TAG)
displayName: 'Save Docker image as tar file'
- task: PublishBuildArtifacts@1
inputs:
PathtoPublish: '$(Build.ArtifactStagingDirectory)/intelligence-toolkit.tar'
ArtifactName: 'intelligence-toolkit-container'
tags: '$(TAG)'
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- Migrated build system from hatchling to setuptools for better package management
- Updated import paths to use `intelligence_toolkit.AI.metaprompts` instead of `app.workflows.security.metaprompts`
- Added unit tests for coverage

### Removed
- Removed deprecated `app/workflows/security/` module (metaprompts moved to `intelligence_toolkit.AI.metaprompts`)
Expand Down
5 changes: 3 additions & 2 deletions intelligence_toolkit/detect_entity_networks/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,11 @@ def get_entity_types(self) -> list[str]:
)

def get_attributes(self) -> pl.DataFrame:
return pl.DataFrame(self.attributes_list, columns=["Attribute"])
return pl.DataFrame(self.attributes_list, schema=["Attribute"])

def remove_attributes(self, selected_rows: pl.DataFrame) -> list[str]:
self.additional_trimmed_attributes = selected_rows["Attribute"].tolist()
self.additional_trimmed_attributes = selected_rows["Attribute"].to_list()
return self.additional_trimmed_attributes

def add_attribute_links(
self, data_df: pl.DataFrame, entity_id_column: str, columns_to_link: list[str]
Expand Down
1 change: 1 addition & 0 deletions intelligence_toolkit/helpers/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

PDF_ENCODING = "UTF-8"
PDF_WKHTMLTOPDF_PATH = "C:\\Program Files\\wkhtmltopdf\\bin\\wkhtmltopdf.exe"
PDF_MARGIN_INCHES = 0.75
ATTRIBUTE_VALUE_SEPARATOR = "=="

CACHE_PATH = os.path.join(
Expand Down
2 changes: 1 addition & 1 deletion intelligence_toolkit/match_entity_records/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def integrated_results(self) -> pl.DataFrame:

def add_df_to_model(self, model: RecordsModel) -> pl.DataFrame:
if not model.dataframe_name:
model.dataframe_name = "dataset_" + len(self.model_dfs) + 1
model.dataframe_name = "dataset_" + str(len(self.model_dfs) + 1)

self.model_dfs[model.dataframe_name] = format_model_df(
model,
Expand Down
2 changes: 1 addition & 1 deletion intelligence_toolkit/query_text_data/helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def get_test_progress(test_history):
return response


def test_history_elements(test_history, previous_cid, next_cid, adjacent_search_steps):
def parse_history_elements(test_history, previous_cid, next_cid, adjacent_search_steps):
relevant_list = [x[1] for x in test_history if x[2] == "Yes"]
seen_list = [x[1] for x in test_history]
adjacent_targets = set()
Expand Down
3 changes: 1 addition & 2 deletions intelligence_toolkit/query_text_data/query_rewriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ async def rewrite_query(ai_configuration, query, concept_graph, top_concepts):
str: The rewritten query.
"""
concepts = sorted(concept_graph.degree(), key=lambda x: x[1], reverse=True)
if "dummynode" in concepts:
concepts.remove("dummynode")
concepts = [c for c in concepts if c[0] != "dummynode"]

concepts = concepts[:top_concepts]
concepts_str = ", ".join([concept for concept, _ in concepts])
Expand Down
8 changes: 4 additions & 4 deletions intelligence_toolkit/query_text_data/relevance_assessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ async def detect_relevant_chunks(
chunk_progress_callback(test_history)

aq_embedding = np.array(embedder.embed_store_one(query, embedding_cache))
relevant, seen, adjacent = helper_functions.test_history_elements(
relevant, seen, adjacent = helper_functions.parse_history_elements(
test_history,
processed_chunks.previous_cid,
processed_chunks.next_cid,
Expand Down Expand Up @@ -239,7 +239,7 @@ async def detect_relevant_chunks(
# print(f"Community sequence: {community_sequence}")
community_to_cids = level_to_community_to_cids[current_level]
for community in community_sequence:
relevant, seen, adjacent = helper_functions.test_history_elements(
relevant, seen, adjacent = helper_functions.parse_history_elements(
test_history,
processed_chunks.previous_cid,
processed_chunks.next_cid,
Expand Down Expand Up @@ -299,7 +299,7 @@ async def detect_relevant_chunks(
# print("Reached final level")
pass

relevant, seen, adjacent = helper_functions.test_history_elements(
relevant, seen, adjacent = helper_functions.parse_history_elements(
test_history,
processed_chunks.previous_cid,
processed_chunks.next_cid,
Expand All @@ -321,7 +321,7 @@ async def detect_relevant_chunks(
chunk_callback=chunk_callback,
commentary=commentary
)
relevant, seen, adjacent = helper_functions.test_history_elements(
relevant, seen, adjacent = helper_functions.parse_history_elements(
test_history,
processed_chunks.previous_cid,
processed_chunks.next_cid,
Expand Down
113 changes: 113 additions & 0 deletions intelligence_toolkit/tests/unit/AI/test_base_batch_async.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project.
#
import asyncio
from unittest.mock import MagicMock

import pytest

from intelligence_toolkit.AI.base_batch_async import BaseBatchAsync
from intelligence_toolkit.helpers.progress_batch_callback import ProgressBatchCallback


@pytest.fixture
def base_batch():
return BaseBatchAsync()


def test_base_batch_async_initialization():
batch = BaseBatchAsync()
assert batch.total_tasks == 1
assert batch.completed_tasks == 0
assert batch.previous_completed_tasks == 0


def test_base_batch_async_progress_callback(base_batch):
assert base_batch.completed_tasks == 0
base_batch.progress_callback()
assert base_batch.completed_tasks == 1
base_batch.progress_callback()
assert base_batch.completed_tasks == 2


@pytest.mark.asyncio
async def test_track_progress_with_tasks():
batch = BaseBatchAsync()
batch.total_tasks = 3

callback = MagicMock(spec=ProgressBatchCallback)
callback.on_batch_change = MagicMock()

# Create some mock tasks
async def mock_task():
await asyncio.sleep(0.01)
batch.progress_callback()

tasks = [asyncio.create_task(mock_task()) for _ in range(3)]

# Track progress
await batch.track_progress(tasks, [callback])

# Verify callback was called
assert callback.on_batch_change.called
assert batch.completed_tasks == 3


@pytest.mark.asyncio
async def test_track_progress_multiple_callbacks():
batch = BaseBatchAsync()
batch.total_tasks = 2

callback1 = MagicMock(spec=ProgressBatchCallback)
callback1.on_batch_change = MagicMock()
callback2 = MagicMock(spec=ProgressBatchCallback)
callback2.on_batch_change = MagicMock()

async def mock_task():
await asyncio.sleep(0.01)
batch.progress_callback()

tasks = [asyncio.create_task(mock_task()) for _ in range(2)]

await batch.track_progress(tasks, [callback1, callback2])

assert callback1.on_batch_change.called
assert callback2.on_batch_change.called
assert batch.completed_tasks == 2


@pytest.mark.asyncio
async def test_track_progress_completed_immediately():
batch = BaseBatchAsync()
batch.total_tasks = 1
batch.completed_tasks = 1

callback = MagicMock(spec=ProgressBatchCallback)
callback.on_batch_change = MagicMock()

# Create already completed tasks
async def completed_task():
pass

task = asyncio.create_task(completed_task())
await asyncio.sleep(0.01) # Let task complete

await batch.track_progress([task], [callback])

assert callback.on_batch_change.called


@pytest.mark.asyncio
async def test_track_progress_no_change():
batch = BaseBatchAsync()
batch.total_tasks = 0
batch.completed_tasks = 0

callback = MagicMock(spec=ProgressBatchCallback)
callback.on_batch_change = MagicMock()

# Empty task list
await batch.track_progress([], [callback])

# Should still call callback at the end
assert callback.on_batch_change.called
146 changes: 146 additions & 0 deletions intelligence_toolkit/tests/unit/AI/test_base_chat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project.
#
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from intelligence_toolkit.AI.base_chat import BaseChat
from intelligence_toolkit.AI.openai_configuration import OpenAIConfiguration


@pytest.fixture
def base_chat_config():
return OpenAIConfiguration({
"api_key": "test_key",
"model": "gpt-4",
"api_type": "OpenAI",
})


@pytest.fixture
def base_chat(base_chat_config):
with patch("intelligence_toolkit.AI.client.OpenAI"), \
patch("intelligence_toolkit.AI.client.AsyncOpenAI"):
return BaseChat(base_chat_config)


def test_base_chat_initialization(base_chat_config):
with patch("intelligence_toolkit.AI.client.OpenAI"), \
patch("intelligence_toolkit.AI.client.AsyncOpenAI"):

chat = BaseChat(base_chat_config, concurrent_coroutines=10)

assert chat.configuration is not None
assert chat.semaphore._value == 10


def test_base_chat_initialization_default_coroutines(base_chat_config):
with patch("intelligence_toolkit.AI.client.OpenAI"), \
patch("intelligence_toolkit.AI.client.AsyncOpenAI"):

from intelligence_toolkit.AI.defaults import DEFAULT_CONCURRENT_COROUTINES

chat = BaseChat(base_chat_config)
assert chat.semaphore._value == DEFAULT_CONCURRENT_COROUTINES


@pytest.mark.asyncio
async def test_generate_text_async_success(base_chat):
messages = [{"role": "user", "content": "Hello"}]

with patch.object(base_chat, 'generate_chat_async', new_callable=AsyncMock) as mock_chat:
mock_chat.return_value = "Test response"

result = await base_chat.generate_text_async(messages, None, False)

assert result == "Test response"
mock_chat.assert_called_once_with(messages=messages, stream=False)


@pytest.mark.asyncio
async def test_generate_text_async_with_callbacks(base_chat):
messages = [{"role": "user", "content": "Hello"}]
callback = MagicMock()

with patch.object(base_chat, 'generate_chat_async', new_callable=AsyncMock) as mock_chat, \
patch.object(base_chat, 'progress_callback') as mock_progress:

mock_chat.return_value = "Test response"

result = await base_chat.generate_text_async(messages, [callback], False)

assert result == "Test response"
mock_progress.assert_called_once()


@pytest.mark.asyncio
async def test_generate_text_async_exception(base_chat):
messages = [{"role": "user", "content": "Hello"}]

with patch.object(base_chat, 'generate_chat_async', new_callable=AsyncMock) as mock_chat:
mock_chat.side_effect = Exception("API Error")

with pytest.raises(Exception, match="Problem in OpenAI response"):
await base_chat.generate_text_async(messages, None, False)


@pytest.mark.asyncio
async def test_generate_texts_async_multiple_messages(base_chat):
messages_list = [
[{"role": "user", "content": "Hello"}],
[{"role": "user", "content": "World"}],
[{"role": "user", "content": "Test"}],
]

with patch.object(base_chat, 'generate_text_async', new_callable=AsyncMock) as mock_generate:
mock_generate.side_effect = ["Response 1", "Response 2", "Response 3"]

results = await base_chat.generate_texts_async(messages_list)

assert len(results) == 3
assert results[0] == "Response 1"
assert results[1] == "Response 2"
assert results[2] == "Response 3"
assert base_chat.total_tasks == 3


@pytest.mark.asyncio
async def test_generate_texts_async_with_callbacks(base_chat):
messages_list = [
[{"role": "user", "content": "Hello"}],
[{"role": "user", "content": "World"}],
]

callback = MagicMock()

with patch.object(base_chat, 'generate_text_async', new_callable=AsyncMock) as mock_generate, \
patch.object(base_chat, 'track_progress', new_callable=AsyncMock) as mock_track:

mock_generate.side_effect = ["Response 1", "Response 2"]

results = await base_chat.generate_texts_async(messages_list, callbacks=[callback])

assert len(results) == 2
mock_track.assert_called_once()


@pytest.mark.asyncio
async def test_generate_texts_async_with_kwargs(base_chat):
messages_list = [
[{"role": "user", "content": "Hello"}],
]

with patch.object(base_chat, 'generate_text_async', new_callable=AsyncMock) as mock_generate:
mock_generate.return_value = "Response"

await base_chat.generate_texts_async(
messages_list,
temperature=0.5,
max_tokens=100
)

# Verify kwargs were passed
call_kwargs = mock_generate.call_args[1]
assert call_kwargs['temperature'] == 0.5
assert call_kwargs['max_tokens'] == 100
Loading
Loading