diff --git a/src/harvester/runner/utils/utils.py b/src/harvester/runner/utils/utils.py index c8d69a8..a70e6c7 100644 --- a/src/harvester/runner/utils/utils.py +++ b/src/harvester/runner/utils/utils.py @@ -9,17 +9,28 @@ def __init__(self, data: dict): self.llm_response = data.get("llm_response") self.sentiment = data.get("sentiment") self.user_feedback = data.get("user_feedback") + self.categories = data.get("categories") self._validate() - + def _validate(self) -> None: - if not self.user_id \ - or not self.timestamp \ - or not self.conversation_id \ - or not self.user_question \ - or not self.llm_response: - raise Exception("Values missing from query response.") #TODO: add better logging + response_str = "The following values are missing from the query response: " + + if not self.user_id: + response_str += "user_id, " + if not self.timestamp: + response_str += "timestamp, " + if not self.conversation_id: + response_str += "conversation_id, " + if not self.user_question: + response_str += "user_question, " + if not self.llm_response: + response_str += "llm_response, " + if response_str[-2:] == ", ": + response_str = response_str[:-2] + raise Exception(response_str) + def get_args(self) -> List[str | None]: return [ self.user_id, @@ -28,8 +39,9 @@ def get_args(self) -> List[str | None]: self.user_question, self.llm_response, self.sentiment, - self.user_feedback + self.user_feedback, + self.categories ] def get_query(self) -> str: - return "INSERT INTO feedback (user_id, timestamp, conversation_id, user_question, llm_response, sentiment, user_feedback) VALUES (%s, %s, %s, %s, %s, %s, %s);" \ No newline at end of file + return "INSERT INTO feedback (user_id, timestamp, conversation_id, user_question, llm_response, sentiment, user_feedback, categories) VALUES (%s, %s, %s, %s, %s, %s, %s, %s);" \ No newline at end of file diff --git a/src/harvester/tests/unit/test_db.py b/src/harvester/tests/unit/test_db.py index 6363f10..f587e33 100644 --- a/src/harvester/tests/unit/test_db.py +++ b/src/harvester/tests/unit/test_db.py @@ -1,3 +1,5 @@ +import pytest +from runner.utils.utils import Feedback from unittest.mock import patch, MagicMock from runner.utils.db import PostgresDB @@ -34,3 +36,91 @@ def test_execute_query(mock_connection_pool): mock_cursor.execute.assert_called_once_with("INSERT INTO test_table VALUES (%s, %s, %s)", data) mock_conn.commit.assert_called_once() + +@pytest.mark.parametrize("feedback_data,expected_categories,test_description", [ + ({ + "user_id": "test-user-123", + "timestamp": "2025-01-15 10:30:00.123456", + "conversation_id": "conv-456", + "user_question": "Test question 1", + "llm_response": "Test response 1", + "user_feedback": "Test feedback 1", + "categories": ["deployment", "kubernetes"] + }, ["deployment", "kubernetes"], "sentiment_missing"), + ({ + "user_id": "test-user-456", + "timestamp": "2025-01-15 11:30:00.123456", + "conversation_id": "conv-789", + "user_question": "Test question 2", + "llm_response": "Test response 2", + "sentiment": 1, + "categories": ["general", "tutorial"] + }, ["general", "tutorial"], "user_feedback_missing"), + ({ + "user_id": "test-user-789", + "timestamp": "2025-01-15 12:30:00.123456", + "conversation_id": "conv-abc", + "user_question": "Test question 3", + "llm_response": "Test response 3", + "sentiment": 1, + "user_feedback": "Test feedback 3" + }, None, "categories_missing"), + ({ + "user_id": "test-user-000", + "timestamp": "2025-01-15 13:30:00.123456", + "conversation_id": "conv-def", + "user_question": "Test question 4", + "llm_response": "Test response 4", + "sentiment": -1, + "user_feedback": "Test feedback 4", + "categories": [] + }, [], "empty_categories_array"), + ({ + "user_id": "test-user-111", + "timestamp": "2025-01-15 14:30:00.123456", + "conversation_id": "conv-ghi", + "user_question": "Test question 5", + "llm_response": "Test response 5", + "sentiment": -1, + "user_feedback": "Test feedback 5", + "categories": ["incomplete"] + }, ["incomplete"], "single_category"), + ({ + "user_id": "test-user-222", + "timestamp": "2025-01-15 15:30:00.123456", + "conversation_id": "conv-jkl", + "user_question": "Test question 6", + "llm_response": "Test response 6", + "sentiment": -1, + "user_feedback": "Test feedback 6", + "categories": ["incorrect", "not_relevant", "other"] + }, ["incorrect", "not_relevant", "other"], "three_specific_categories") +], ids=["sentiment_missing", "user_feedback_missing", "categories_missing", "empty_categories_array", "single_category", "three_specific_categories"]) + +@patch("runner.utils.db.ConnectionPool") +def test_feedback_insertion_with_categories_scenarios(mock_connection_pool, feedback_data, expected_categories, test_description): + mock_conn = MagicMock() + mock_cursor = MagicMock() + + mock_pool = MagicMock() + mock_pool.connection.return_value.__enter__.return_value = mock_conn + mock_conn.cursor.return_value.__enter__.return_value = mock_cursor + + mock_connection_pool.return_value = mock_pool + + db = PostgresDB() + feedback = Feedback(feedback_data) + db.execute(feedback.get_query(), feedback.get_args()) + + expected_query = "INSERT INTO feedback (user_id, timestamp, conversation_id, user_question, llm_response, sentiment, user_feedback, categories) VALUES (%s, %s, %s, %s, %s, %s, %s, %s);" + + called_args = mock_cursor.execute.call_args[0][1] + assert called_args[7] == expected_categories + + if "sentiment" not in feedback_data: + assert called_args[5] is None + if "user_feedback" not in feedback_data: + assert called_args[6] is None + + mock_cursor.execute.assert_called_once_with(expected_query, called_args) + mock_conn.commit.assert_called_once() \ No newline at end of file diff --git a/src/harvester/tests/unit/test_utils.py b/src/harvester/tests/unit/test_utils.py new file mode 100644 index 0000000..e40afa3 --- /dev/null +++ b/src/harvester/tests/unit/test_utils.py @@ -0,0 +1,69 @@ +import pytest +from runner.utils.utils import Feedback + +test_res_base = "The following values are missing from the query response:" + +@pytest.mark.parametrize("request_data, expected_exception, description", [ + ( + { + "timestamp": "test-timestamp", + "conversation_id": "test-convo-id", + "user_question": "test-question", + "llm_response": "test-llm-response" + }, + f"{test_res_base} user_id", + "missing-user-id" + ), + ( + { + "user_id": "test-id", + "conversation_id": "test-convo-id", + "user_question": "test-question", + "llm_response": "test-llm-response" + }, + f"{test_res_base} timestamp", + "missing-timestamp" + ), + ( + { + "user_id": "test-id", + "timestamp": "test-timestamp", + "user_question": "test-question", + "llm_response": "test-llm-response" + }, + f"{test_res_base} conversation_id", + "missing-convo-id" + ), + ( + { + "user_id": "test-id", + "timestamp": "test-timestamp", + "conversation_id": "test-convo-id", + "llm_response": "test-llm-response" + }, + f"{test_res_base} user_question", + "missing-user-question" + ), + ( + { + "user_id": "test-id", + "timestamp": "test-timestamp", + "conversation_id": "test-convo-id", + "user_question": "test-question" + }, + f"{test_res_base} llm_response", + "missing-llm-response" + ), + ( + { + "sentiment": 1 + }, + f"{test_res_base} user_id, timestamp, conversation_id, user_question, llm_response", + "missing-all-required" + ) +], +ids=["missing-user-id", "missing-timestamp", "missing-convo-id", "missing-user-question", "missing-llm-response", "missing-all-required"]) +def test_validate(request_data, expected_exception, description): + with pytest.raises(Exception) as e: + feedback = Feedback(request_data) + assert expected_exception in str(e.value) \ No newline at end of file diff --git a/src/harvester/tests/unit/test_watcher.py b/src/harvester/tests/unit/test_watcher.py index b7db412..6f5d56d 100644 --- a/src/harvester/tests/unit/test_watcher.py +++ b/src/harvester/tests/unit/test_watcher.py @@ -83,19 +83,3 @@ def test_write_json_contents(): write_json_contents(test_file_path, mock_db) mock_db.execute.assert_called_once() - -def test_write_json_contents_failure(): - sample_files = [ - "sample_feedback_4.json", - ] - for file in sample_files: - test_file_path = os.path.join(os.path.dirname(__file__), "data", file) - mock_db = MagicMock() - mock_fb_instance = MagicMock() - mock_fb_instance.get_query.return_value = "INSERT INTO ..." - mock_fb_instance.get_args.return_value = ("val",) - - with pytest.raises(Exception) as e: - write_json_contents(test_file_path, mock_db) - - assert "Values missing from query response" in str(e.value) diff --git a/templates/postgres/post-deploy-config/job.yaml b/templates/postgres/post-deploy-config/job.yaml index c4202ec..d455493 100644 --- a/templates/postgres/post-deploy-config/job.yaml +++ b/templates/postgres/post-deploy-config/job.yaml @@ -45,7 +45,8 @@ spec: user_question TEXT NOT NULL, llm_response TEXT NOT NULL, sentiment INTEGER, - user_feedback TEXT + user_feedback TEXT, + categories TEXT[] ); EOF restartPolicy: OnFailure