redhat-ai-dev · Jdubrick · Aug 14, 2025 · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025
diff --git a/src/harvester/runner/utils/utils.py b/src/harvester/runner/utils/utils.py
@@ -9,17 +9,28 @@ def __init__(self, data: dict):
         self.llm_response = data.get("llm_response")
         self.sentiment = data.get("sentiment")
         self.user_feedback = data.get("user_feedback")
+        self.categories = data.get("categories")
 
         self._validate()
-
+    
     def _validate(self) -> None:
-        if not self.user_id \
-        or not self.timestamp \
-        or not self.conversation_id \
-        or not self.user_question \
-        or not self.llm_response:
-            raise Exception("Values missing from query response.") #TODO: add better logging
+        response_str = "The following values are missing from the query response: "
+
+        if not self.user_id:
+            response_str += "user_id, "
+        if not self.timestamp:
+            response_str += "timestamp, "
+        if not self.conversation_id:
+            response_str += "conversation_id, "
+        if not self.user_question:
+            response_str += "user_question, "
+        if not self.llm_response:
+            response_str += "llm_response, "
 
+        if response_str[-2:] == ", ":
+            response_str = response_str[:-2]
+            raise Exception(response_str)
+
     def get_args(self) -> List[str | None]:
         return [
             self.user_id,
@@ -28,8 +39,9 @@ def get_args(self) -> List[str | None]:
             self.user_question,
             self.llm_response,
             self.sentiment,
-            self.user_feedback
+            self.user_feedback,
+            self.categories
         ]
 
     def get_query(self) -> str:
-        return "INSERT INTO feedback (user_id, timestamp, conversation_id, user_question, llm_response, sentiment, user_feedback) VALUES (%s, %s, %s, %s, %s, %s, %s);"
+        return "INSERT INTO feedback (user_id, timestamp, conversation_id, user_question, llm_response, sentiment, user_feedback, categories) VALUES (%s, %s, %s, %s, %s, %s, %s, %s);"
diff --git a/src/harvester/tests/unit/test_db.py b/src/harvester/tests/unit/test_db.py
@@ -1,3 +1,5 @@
+import pytest
+from runner.utils.utils import Feedback
 from unittest.mock import patch, MagicMock
 from runner.utils.db import PostgresDB
 
@@ -34,3 +36,91 @@ def test_execute_query(mock_connection_pool):
 
     mock_cursor.execute.assert_called_once_with("INSERT INTO test_table VALUES (%s, %s, %s)", data)
     mock_conn.commit.assert_called_once()
+
+@pytest.mark.parametrize("feedback_data,expected_categories,test_description", [
+    ({
+        "user_id": "test-user-123",
+        "timestamp": "2025-01-15 10:30:00.123456",
+        "conversation_id": "conv-456",
+        "user_question": "Test question 1",
+        "llm_response": "Test response 1",
+        "user_feedback": "Test feedback 1",
+        "categories": ["deployment", "kubernetes"]
+    }, ["deployment", "kubernetes"], "sentiment_missing"),
+    ({
+        "user_id": "test-user-456",
+        "timestamp": "2025-01-15 11:30:00.123456",
+        "conversation_id": "conv-789",
+        "user_question": "Test question 2",
+        "llm_response": "Test response 2",
+        "sentiment": 1,
+        "categories": ["general", "tutorial"]
+    }, ["general", "tutorial"], "user_feedback_missing"),
+    ({
+        "user_id": "test-user-789",
+        "timestamp": "2025-01-15 12:30:00.123456",
+        "conversation_id": "conv-abc",
+        "user_question": "Test question 3",
+        "llm_response": "Test response 3",
+        "sentiment": 1,
+        "user_feedback": "Test feedback 3"
+    }, None, "categories_missing"),
+    ({
+        "user_id": "test-user-000",
+        "timestamp": "2025-01-15 13:30:00.123456",
+        "conversation_id": "conv-def",
+        "user_question": "Test question 4",
+        "llm_response": "Test response 4",
+        "sentiment": -1,
+        "user_feedback": "Test feedback 4",
+        "categories": []
+    }, [], "empty_categories_array"),
+    ({
+        "user_id": "test-user-111",
+        "timestamp": "2025-01-15 14:30:00.123456",
+        "conversation_id": "conv-ghi",
+        "user_question": "Test question 5",
+        "llm_response": "Test response 5",
+        "sentiment": -1,
+        "user_feedback": "Test feedback 5",
+        "categories": ["incomplete"]
+    }, ["incomplete"], "single_category"),
+    ({
+        "user_id": "test-user-222",
+        "timestamp": "2025-01-15 15:30:00.123456",
+        "conversation_id": "conv-jkl",
+        "user_question": "Test question 6",
+        "llm_response": "Test response 6",
+        "sentiment": -1,
+        "user_feedback": "Test feedback 6",
+        "categories": ["incorrect", "not_relevant", "other"]
+    }, ["incorrect", "not_relevant", "other"], "three_specific_categories")
+], ids=["sentiment_missing", "user_feedback_missing", "categories_missing", "empty_categories_array", "single_category", "three_specific_categories"])
+
+@patch("runner.utils.db.ConnectionPool")
+def test_feedback_insertion_with_categories_scenarios(mock_connection_pool, feedback_data, expected_categories, test_description):
+    mock_conn = MagicMock()
+    mock_cursor = MagicMock()
+
+    mock_pool = MagicMock()
+    mock_pool.connection.return_value.__enter__.return_value = mock_conn
+    mock_conn.cursor.return_value.__enter__.return_value = mock_cursor
+
+    mock_connection_pool.return_value = mock_pool
+
+    db = PostgresDB()
+    feedback = Feedback(feedback_data)
+    db.execute(feedback.get_query(), feedback.get_args())
+
+    expected_query = "INSERT INTO feedback (user_id, timestamp, conversation_id, user_question, llm_response, sentiment, user_feedback, categories) VALUES (%s, %s, %s, %s, %s, %s, %s, %s);"
+
+    called_args = mock_cursor.execute.call_args[0][1]
+    assert called_args[7] == expected_categories
+
+    if "sentiment" not in feedback_data:
+        assert called_args[5] is None
+    if "user_feedback" not in feedback_data:
+        assert called_args[6] is None
+
+    mock_cursor.execute.assert_called_once_with(expected_query, called_args)
+    mock_conn.commit.assert_called_once()
diff --git a/src/harvester/tests/unit/test_utils.py b/src/harvester/tests/unit/test_utils.py
@@ -0,0 +1,69 @@
+import pytest
+from runner.utils.utils import Feedback
+
+test_res_base = "The following values are missing from the query response:"
+
+@pytest.mark.parametrize("request_data, expected_exception, description", [
+    (
+        {
+            "timestamp": "test-timestamp",
+            "conversation_id": "test-convo-id",
+            "user_question": "test-question",
+            "llm_response": "test-llm-response"
+        },
+        f"{test_res_base} user_id",
+        "missing-user-id"
+    ),
+    (
+        {
+            "user_id": "test-id",
+            "conversation_id": "test-convo-id",
+            "user_question": "test-question",
+            "llm_response": "test-llm-response"
+        },
+        f"{test_res_base} timestamp",
+        "missing-timestamp"
+    ),
+    (
+        {
+            "user_id": "test-id",
+            "timestamp": "test-timestamp",
+            "user_question": "test-question",
+            "llm_response": "test-llm-response"
+        },
+        f"{test_res_base} conversation_id",
+        "missing-convo-id"
+    ),
+    (
+        {
+            "user_id": "test-id",
+            "timestamp": "test-timestamp",
+            "conversation_id": "test-convo-id",
+            "llm_response": "test-llm-response"
+        },
+        f"{test_res_base} user_question",
+        "missing-user-question"
+    ),
+    (
+        {
+            "user_id": "test-id",
+            "timestamp": "test-timestamp",
+            "conversation_id": "test-convo-id",
+            "user_question": "test-question"
+        },
+        f"{test_res_base} llm_response",
+        "missing-llm-response"
+    ),
+    (
+        {
+            "sentiment": 1
+        },
+        f"{test_res_base} user_id, timestamp, conversation_id, user_question, llm_response",
+        "missing-all-required"
+    )
+], 
+ids=["missing-user-id", "missing-timestamp", "missing-convo-id", "missing-user-question", "missing-llm-response", "missing-all-required"])
+def test_validate(request_data, expected_exception, description):
+    with pytest.raises(Exception) as e:
+        feedback = Feedback(request_data)
+    assert expected_exception in str(e.value)
diff --git a/src/harvester/tests/unit/test_watcher.py b/src/harvester/tests/unit/test_watcher.py
@@ -83,19 +83,3 @@ def test_write_json_contents():
         write_json_contents(test_file_path, mock_db)
 
         mock_db.execute.assert_called_once()
-
-def test_write_json_contents_failure():
-    sample_files = [
-        "sample_feedback_4.json",
-    ]
-    for file in sample_files:
-        test_file_path = os.path.join(os.path.dirname(__file__), "data", file)
-        mock_db = MagicMock()
-        mock_fb_instance = MagicMock()
-        mock_fb_instance.get_query.return_value = "INSERT INTO ..."
-        mock_fb_instance.get_args.return_value = ("val",)
-
-        with pytest.raises(Exception) as e:
-            write_json_contents(test_file_path, mock_db)
-
-        assert "Values missing from query response" in str(e.value)
diff --git a/templates/postgres/post-deploy-config/job.yaml b/templates/postgres/post-deploy-config/job.yaml
@@ -45,7 +45,8 @@ spec:
             user_question TEXT NOT NULL,
             llm_response TEXT NOT NULL,
             sentiment INTEGER,
-            user_feedback TEXT
+            user_feedback TEXT,
+            categories TEXT[]
           );
           EOF
       restartPolicy: OnFailure