In [0]:
%run ./system_prompts

In [0]:
import dspy

In [0]:
class SQLPrompt1AntiHallucination(dspy.Signature):
    """Extract an EXACT, VERBATIM code snippet from the 'context' ONLY IF it meets ALL criteria.  Hallucination is STRICTLY PROHIBITED.

    **CRITICAL RULES - READ CAREFULLY:**

    1.  **ABSOLUTELY NO GENERATION:** You are FORBIDDEN from generating ANY code. Your task is PURE EXTRACTION, not creation.
    2.  **VERBATIM MATCH ONLY:** The extracted code snippet MUST be a 100% character-for-character match with a portion of the 'context'.  ANY deviation is unacceptable.
    3.  **NO COMMENTS:** Ignore ALL commented-out code.  Only consider uncommented, executable code.
    4.  **CONTEXT IS THE ONLY SOURCE:**  The 'context' input is the *SOLE* source of truth.  Do NOT use the 'question' for extraction or scoring.
    5.  **'workloads' AND Schema Display REQUIRED:** The extracted snippet MUST create/define a table/view named 'workloads' AND display its schema (e.g., using a command like DESCRIBE, SHOW CREATE TABLE, etc.).
    6.  **EMPTY STRING IS THE DEFAULT:** If NO snippet in the 'context' meets ALL criteria, the 'code_snippet' MUST be an EMPTY STRING.  This is the EXPECTED behavior in most cases.
    7.  **ZERO TOLERANCE FOR HALLUCINATION:**  Any 'code_snippet' that is not a verbatim extract from the 'context' will be considered a MAJOR FAILURE.

    **Consequences of Hallucination:**

    *   Incorrect 'code_snippet' values will lead to immediate rejection.
    *   Focus on accuracy and adherence to the rules.  An empty string is ALWAYS preferable to an incorrect guess.
    """

    text: str = dspy.InputField(desc="The text containing potential SQL code snippets. This is your ONLY source of information.")

    code_snippet: str = dspy.OutputField(
        desc="""The EXACT, VERBATIM code snippet from the 'context' that fulfills ALL requirements.
               If NO such snippet exists, return an EMPTY STRING.  An empty string is the CORRECT response if no match is found.
               DO NOT GUESS. DO NOT GENERATE. EXTRACT ONLY."""
    )

    score: str = dspy.OutputField(
        desc="""Score based SOLELY on the 'code_snippet':
               - 15: If 'code_snippet' is NON-EMPTY, creates 'workloads', AND displays its schema (verbatim from 'context').
               - 7.5: If 'code_snippet' is NON-EMPTY, creates 'workloads', but DOES NOT display its schema (verbatim from 'context').
               - 0:  If 'code_snippet' is EMPTY (no match found) OR does not create 'workloads' (verbatim from 'context'). This is the most common and often correct score."""
    )

    explanation: str = dspy.OutputField(
        desc="""Explain the 'score' based ONLY on the 'code_snippet'.
               Justify the score (0, 7.5, or 15) by explicitly referencing the presence/absence of 'workloads' creation and schema display IN THE EXTRACTED SNIPPET.
               If the 'code_snippet' is empty, state this clearly and explain that no matching code was found in the 'context'."""
    )

In [0]:
class SQLPrompt2AntiHallucination(dspy.Signature):
    """Extract a *VERBATIM* SQL code snippet from the 'context' ONLY if it returns a distinct list of 'workspaceId' *AND NOTHING ELSE*.  Hallucination is STRICTLY FORBIDDEN.

    **MANDATORY RULES - NO EXCEPTIONS:**

    1.  **ZERO CODE GENERATION:** You MUST NOT generate ANY SQL code. Your ONLY task is to FIND and EXTRACT an *EXACT* match within the 'context'.
    2.  **PERFECT MATCH REQUIRED:** The extracted code snippet MUST be a 100% character-for-character copy of a section within the 'context'.  ANY alteration is a FAILURE.
    3.  **IGNORE ALL COMMENTS:**  Completely disregard any commented-out code (e.g., lines starting with `--` or enclosed in `/* ... */`).
    4.  **CONTEXT IS THE SOLE SOURCE:**  Base your extraction and scoring EXCLUSIVELY on the 'context'. The 'question' is IRRELEVANT and MUST be IGNORED.
    5.  **STRICT 'workspaceId' CRITERIA:** The extracted snippet MUST:
        *   Select ONLY the `workspaceId` column.
        *   Use the `DISTINCT` keyword (or equivalent) to return only unique values.
        *   Contain NO other columns, `WHERE` clauses, `GROUP BY` clauses, `ORDER BY` clauses, `LIMIT` clauses, or any other operations.  It must be a simple `SELECT DISTINCT workspaceId ...` query.
    6.  **EMPTY STRING IS THE DEFAULT:** If NO snippet in the 'context' PERFECTLY matches ALL criteria, the 'code_snippet' MUST be an EMPTY STRING.  This is the expected and correct behavior in most situations.
    7.  **ABSOLUTE ZERO TOLERANCE FOR HALLUCINATION:** Any 'code_snippet' that is NOT a verbatim extract from the 'context' is a CRITICAL ERROR and will be REJECTED.

    **CONSEQUENCES OF FAILURE:**

    *   Generating code, extracting incorrect code, or misinterpreting the 'question' will result in IMMEDIATE REJECTION.
    *   Prioritize ACCURACY and ADHERENCE to these rules.  An EMPTY STRING is ALWAYS better than a guess.
    """

    text: str = dspy.InputField(
        desc="The text containing potential SQL code snippets. This is your ONLY source of information.  Do NOT use any other information."
    )

    code_snippet: str = dspy.OutputField(
        desc="""The EXACT, VERBATIM code snippet from the 'context' that selects ONLY a distinct list of 'workspaceId' and NOTHING ELSE.
               If NO such snippet exists, return an EMPTY STRING.  An empty string is the CORRECT and EXPECTED response if no match is found.
               ABSOLUTELY NO GUESSING. NO GENERATION. ONLY EXTRACT."""
    )

    score: str = dspy.OutputField(
        desc="""Score based EXCLUSIVELY on the 'code_snippet':
               - 15: If 'code_snippet' is NON-EMPTY and returns ONLY a distinct list of 'workspaceId' (verbatim from 'context').
               - 0: If 'code_snippet' is EMPTY (no match found) OR returns anything other than ONLY a distinct list of 'workspaceId' (verbatim from 'context'). This is the most likely and often the correct score.
               THERE ARE NO OTHER POSSIBLE SCORES."""
    )

    explanation: str = dspy.OutputField(
        desc="""Explain the 'score' based ONLY on the 'code_snippet'.
               Justify the score (0 or 15) by explicitly stating whether a matching snippet was found in the 'context'.
               If 'code_snippet' is empty, clearly state that no code in the 'context' met ALL the strict criteria.
               Refer directly to the presence/absence of ONLY the distinct 'workspaceId' selection in the extracted snippet."""
    )

In [0]:
# class SQLPrompt2(dspy.Signature):
#     """ Check if the provided context has any code snippet which return a distinct list of workspaceID """

#     text: str = dspy.InputField()
    
#     score: str = dspy.OutputField(desc="15 if any code snippet below return a distinct list of workspaceID, 0 if no code snippet returns a list of distinct workspace id,")
    
#     code_snippet: str = dspy.OutputField(desc="provide the code snippet which returns a distinct list of workspace id  encapsulated as a string") 

In [0]:
class SQLPrompt3(dspy.Signature):
    """ Check if the provided context has any code snippet which returns the  number of unique clusters. """

    text: str = dspy.InputField()
    
    score: str = dspy.OutputField(desc="15 if any code snippet below returns the  number of unique clusters, 0 if no code snippet returns the number of unique clusters")
    
    code_snippet: str = dspy.OutputField(desc="provide the code snippet which returns the  number of unique clusters encapsulated as a string") 

In [0]:
class SQLPrompt4(dspy.Signature):
    """ Check if the provided context has any code snippet which returns the  workload hours each day for the workspace id in ordered fashion """

    text: str = dspy.InputField()
    
    score: str = dspy.OutputField(desc="15 if any code snippet below returns the  workload hours each day for the workspace id in ordered fashion, 12 points if any code snippet below returns workload hours each day for the workspace id but the order is missing, 12 points if grouping is based on date , 0 if no code snippet returns workload hours each day for the workspace id")

    code_snippet: str = dspy.OutputField(desc="provide the code snippet which returns the  workload hours each day for the workspace id encapsulated as a string") 

In [0]:
class SQLPrompt5(dspy.Signature):
    """ Check if the provided context has any code snippet which returns interactive node hours per day on the different Spark versions over time """

    text: str = dspy.InputField()
    
    score: str = dspy.OutputField(desc="15 if any code snippet below returns interactive node hours per day on the different Spark versions over time, 0 if no code snippet returns interactive node hours per day on the different Spark versions over time")

    code_snippet: str = dspy.OutputField(desc="provide the code snippet which returns interactive node hours per day on the different Spark versions over time encapsulated as a string") 

In [0]:
class SQLPrompt6(dspy.Signature):
    """ Check if the provided context has any code snippet which returns top two most recently shipped (shipDate) Line Items per Part using window function """

    text: str = dspy.InputField()
    
    score: str = dspy.OutputField(desc="25 if any code snippet below returns top two most recently shipped (shipDate) Line Items per Part using window function, 20  if any code snippet below returns top two most recently shipped (shipDate) Line Items per Part using subquery, 10 if any code snippet below returns top two most recently shipped (shipDate) Line Items per Part but groups by the key only, 0 if no code snippet returns top two most recently shipped (shipDate) Line Items per Part")

    code_snippet: str = dspy.OutputField(desc="provide the code snippet which returns top two most recently shipped (shipDate) Line Items per Part using window function encapsulated as a string") 

In [0]:
module_dict_sql = {
  'module_1': dspy.ChainOfThought(SQLPrompt1AntiHallucination),
  'module_2': dspy.ChainOfThought(SQLPrompt2AntiHallucination),
  'module_3': dspy.ChainOfThought(SQLPrompt3),
  'module_4': dspy.ChainOfThought(SQLPrompt4),
  'module_5': dspy.ChainOfThought(SQLPrompt5),
  'module_6': dspy.ChainOfThought(SQLPrompt6)
}

In [0]:
class SQLModule(dspy.Module):
    def __init__(self, prompt_name: str):
        super().__init__()
        if prompt_name == "prompt1":
          self.qa = dspy.ChainOfThought(SQLPrompt1AntiHallucination)

    def forward(self, context, question):
        return self.qa(context=context, question=question)