In [0]:
%pip install nbformat databricks-sdk[openai]==0.38.0 dspy --quiet
dbutils.library.restartPython()

In [0]:
%run ../helper/GradingModule

In [0]:
class SQLPrompt1(dspy.Signature):
  """ Check if the provided context has any code snippet which creates a table/view called workloads and get shcema of workloads """

  text: str = dspy.InputField()

  score: str = dspy.OutputField(desc="15 if any code snippet below creates a table/view and displays its schema, 7.5 if it creates a table/view but does not display its schema,  0 if no code snippet creates a table/view")

  code_snippet: str = dspy.OutputField(desc="provide the code snippet which creates a table/view and displays its schema encapsulated as a string")

In [0]:
x = dspy.ChainOfThought(SQLPrompt1)

In [0]:
x.__dict__['predict'].__dict__['signature'].instructions

In [0]:
help(dspy.ChainOfThought)

In [0]:
class Answer(dspy.Signature):
  score: int = dspy.OutputField(desc="The score provided by the model")
  reasoning: str = dspy.OutputField(desc="The reason provided by the model for the score")
  code_snippet: str = dspy.OutputField(desc=""" supporting code snippet provided by the model for the score """)

In [0]:
# Create a prediction
actual_output = Answer(
    score="15",
    reasoning="The code snippet provided creates a temporary Spark SQL view called \"workloads\" from the JSON files in the specified directory. The schema of this table can be obtained using the `DESCRIBE` command or the `printSchema()` method in Python",
    code_snippet=""" ```python
                      df = (spark.read
                        .format("json")
                        .option("header", "true")
                        .option("inferSchema", "true")
                        .load("/databricks-coding-challenge/workloads/")
                      )
                      df.createOrReplaceTempView("workloads")

                      # Get the schema of the table
                      desc workloads
                      ``` """
)

In [0]:
qa_pair = dspy.Example(question=SQLPrompt1, answer=actual_output)

In [0]:
module = Module(databricks.sdk.WorkspaceClient())

In [0]:
module.set_module_dict({'module_1': dspy.ChainOfThought(SQLPrompt1)})

In [0]:
candidate_dict = [
  ("X","/Workspace/Users/vibhor.nigam@databricks.com/interview-scripts/interview-grading/example-notebooks-sql/01-SQL-X")
]

human_graded_dict = {
  "X": [15]
}
# w = databricks.sdk.WorkspaceClient()

In [0]:
lm = dspy.LM('databricks/databricks-meta-llama-3-3-70b-instruct')
dspy.settings.configure(lm=lm)

In [0]:
context_path = candidate_dict[0][1]
human_answers_list = human_graded_dict[candidate_dict[0][0]]
table_name = "users.abhay_jalisatgi.gen_ai_eval"
section = "SQL"
candidate = "X"

print(f"Context path: {context_path}\n, Human answers list: {human_answers_list}\n, Table name: {table_name}\n, Section: {section}\n, Candidate: {candidate}\n")

In [0]:
results = module.get_error_and_answer_dict(context_path, human_answers_list, table_name, section, candidate)

In [0]:
results['answers_dict']

In [0]:
qa_pair.answer.score

In [0]:
def evaluate_score(example, pred, trace=None):
  actual_values = np.array(example.answer.score)
  pred = np.array(pred)
  return np.sqrt(np.mean((actual_values - pred)**2))

In [0]:
pred = results['answers_dict']['score'].tolist()
pred

In [0]:
print(evaluate_score(qa_pair, pred))

In [0]:
results['answers_dict'].display()

In [0]:
spark.sql("select * from users.abhay_jalisatgi.gen_ai_eval limit 5 ").display()

In [0]:
%sql
create table users.abhay_jalisatgi.training_set as 
select distinct score
      , code_snippet
      , candidate
      ,secton as section

from users.abhay_jalisatgi.gen_ai_eval

In [0]:
df = spark.read.table("users.abhay_jalisatgi.training_set")
df = df.dropDuplicates(["candidate", "section"])
df.write.mode("overwrite").saveAsTable("users.abhay_jalisatgi.training_set")

In [0]:
import dspy
from dspy.signatures import Signature
from dspy.signatures.field import InputField, OutputField

# Define a Signature class with a docstring
class SentimentAnalysis(Signature):
  """Analyze sentiment in text"""
  input_text = InputField(desc="Text to be analyzed")
  sentiment = OutputField(desc="Overall sentiment (positive, negative, neutral)")
  key_insights = OutputField(desc="Specific elements contributing to the sentiment")

# Function to extract docstring from a Signature class
def get_signature_docstring(signature_class):
    docstring = signature_class.__doc__
    return docstring.strip() if docstring else "No docstring available"

# Extract and print the docstring
docstring = get_signature_docstring(SentimentAnalysis)
print("Docstring of SentimentAnalysis:")
print(docstring)

# Example of handling a Signature without a docstring
class TopicAnalysis(Signature):
    input_text = InputField(desc="Text to be analyzed")
    topics = OutputField(desc="Main topics identified")

docstring = get_signature_docstring(TopicAnalysis)
print("\nDocstring of TopicAnalysis:")
print(docstring)