In [1]:
import concurrent.futures
import jinja2
from  anthropic import AnthropicBedrock
from compiler.core import Compiler
from tracing_client import TracingClient
from policies import common, handlers, typespec, drizzle, typescript, router, app_testcases

In [2]:
jinja_env = jinja2.Environment()
compiler = Compiler("botbuild/tsp_compiler", "botbuild/app_schema")
client = TracingClient(AnthropicBedrock(aws_profile="dev", aws_region="us-west-2"))

In [3]:
application_description = "Bot that writes poems about cars."

In [4]:
import os

DATASET_DIR = "evals/dataset.min/"
files = os.listdir(DATASET_DIR)
#typespec_definitions = "car_poem_typespec_schema.ts"
typescript_file = "car_app.ts"
drizzle_file = "car_db.ts"
typespec_file = "car_app.tsp"
#with open(os.path.join(DATASET_DIR, typespec_definitions), "r", encoding="utf-8") as f:
#    typespec_definitions = f.read()
with open(os.path.join(DATASET_DIR, typescript_file), "r", encoding="utf-8") as f:
    typescript_definitions = f.read()
with open(os.path.join(DATASET_DIR, drizzle_file), "r", encoding="utf-8") as f:
    drizzle_schema = f.read()
with open(os.path.join(DATASET_DIR, typespec_file), "r", encoding="utf-8") as f:
    typespec_definitions = f.read()

content = jinja_env.from_string(handlers.PROMPT).render(application_description=application_description,
                                                        function_name="generateCarPoem",
                                                        handler_interfaces="interface Options { user_id: string; message: string; greeting: Greeting; } interface Output { greetingMessage: string; }",
                                                        typespec_schema=typespec_definitions,
                                                        typescript_schema=typescript_definitions,
                                                        drizzle_schema=drizzle_schema)
message = {"role": "user", "content": content}

In [5]:
message

print(typespec_definitions)

model CarDetails {
    make: string,
    modelName: string,
    year: integer,
    type: string
  }
  
  model PoemStyle {
    styleType: string,
    length: integer,
    mood: string
  }
  
  model Poem {
    id: string,
    content: string,
    car: CarDetails,
    style: PoemStyle,
    createdAt: utcDateTime
  }
  
  interface CarPoetBot {
    @llm_func(2)
    generatePoem(car: CarDetails, style: PoemStyle): Poem;
    
    @llm_func(1)
    savePoem(poem: Poem): void;
    
    @llm_func(1)
    getFavoritePoems(fromDate: utcDateTime, toDate: utcDateTime): Poem[];
  }


In [6]:

content = jinja_env.from_string(app_testcases.PROMPT).render(typespec_schema=typespec_definitions)
message = {"role": "user", "content": content}
print(message)

{'role': 'user', 'content': 'Based on TypeSpec models and interfaces, generate Gherkin test cases for the application.\nEnsure that the output follows the Gherkin syntax.\nProvide reasoning within <reasoning> tag.\nEncompass output with <gherkin> tag.\n\nMake sure to follow gherkin-lint rules:\n{\n  "no-files-without-scenarios": "on",\n  "no-unnamed-features": "on",\n  "no-unnamed-scenarios": "on",\n  "no-dupe-scenario-names": "on",\n  "no-dupe-feature-names": "on",\n  "no-empty-file": "on",\n  "no-trailing-spaces": "on",\n  "new-line-at-eof": ["on", "yes"],\n  "no-multiple-empty-lines": "on",\n  "no-empty-background": "on",\n  "indentation": [\n    "on", \n    {\n      "Feature": 0,\n      "Background": 2,\n      "Scenario": 2,\n      "Step": 4,\n      "Examples": 4,\n      "example": 6\n    }\n  ]\n}\n\nExample output:\n\n<reasoning>\n    The application operates on users and messages and processes them with LLM.\n    The users are identified by their ids.\n    The messages have role

In [18]:
from policies.app_testcases import GherkinTaskNode

with GherkinTaskNode.platform(client, compiler, jinja_env):
    gherkin_step = GherkinTaskNode.run([message], typespec_schema=typespec_definitions)

In [19]:
gherkin_step

GherkinData(messages=[{'role': 'assistant', 'content': '<reasoning>\nBased on the TypeSpec models and interface:\n1. We have three main models: CarDetails, PoemStyle, and Poem\n2. The CarPoetBot interface provides three operations: generatePoem, savePoem, and getFavoritePoems\n3. The operations involve LLM processing as indicated by @llm_func decorators\n4. We need to test both happy path and error scenarios\n5. DateTime handling needs specific attention for getFavoritePoems\n6. Data validation should be included in the test cases\n</reasoning>\n\n<gherkin>\nFeature: Car Poetry Generation and Management\n  As a user of the Car Poetry Bot\n  I want to generate and manage car-themed poems\n  So that I can create and store artistic content about vehicles\n\n  Background:\n    Given the CarPoetBot service is operational\n    And the system time is "2024-03-20T12:00:00Z"\n\n  Scenario: Generate a poem with valid car details and style\n    Given I have the following car information:\n      |

In [15]:
with handlers.HandlerTaskNode.platform(client, compiler, jinja_env):
    output = handlers.HandlerTaskNode.run([message], function_name="generateCarPoem",
                                          typespec_schema=typespec_definitions,
                                          typescript_schema=typescript_definitions,
                                          drizzle_schema=drizzle_schema)

In [17]:
output


HandlerData(messages=[{'role': 'assistant', 'content': '<reasoning>\nBased on the TypeSpec models and interface:\n1. There are three main entities: CarDetails, PoemStyle, and Poem\n2. The CarPoetBot interface provides three operations: generatePoem, savePoem, and getFavoritePoems\n3. The operations involve datetime handling, data validation, and LLM processing\n4. Test cases should cover all operations and validate the structure of input/output data\n5. Edge cases should be considered for date ranges and data validation\n</reasoning>\n\n<gherkin>\nFeature: Car Poetry Generation and Management\n  As a user of the car poetry system\n  I want to generate, save, and retrieve car-themed poems\n  So that I can manage my collection of automotive poetry\n\n  Background:\n    Given the CarPoetBot service is operational\n    And the system time is set to "2024-03-20T12:00:00Z"\n\n  Scenario: Generate a poem for a luxury car\n    Given I have the following car information:\n      | make      | mo

In [39]:
root = handlers.HandlerTaskNode(output)

In [40]:
with handlers.HandlerTaskNode.platform(client, compiler, jinja_env):
    solution = common.bfs(root)

In [41]:
solution.score, solution.depth

(0, 0)

In [42]:
if solution.parent:
    for node in solution.parent.children:
      print(node.is_successful, node.data.output)

In [43]:
with handlers.HandlerTaskNode.platform(client, compiler, jinja_env):
    args = root.run_args

In [44]:
args

[{'role': 'assistant',
  'content': "I apologize, but I don't see the TypeSpec models and interfaces in your message. Without the actual TypeSpec definition, I can't generate specific Gherkin test cases. However, I can provide a template explanation of how I would approach this:\n\n<reasoning>\nWhen generating Gherkin test cases from TypeSpec models, I would:\n1. Identify the main models/interfaces and their properties\n2. Create scenarios for each major operation defined in the interfaces\n3. Include validation for required properties and constraints\n4. Test both success and error cases\n5. Use Background section for common setup steps\n6. Structure data tables to match the TypeSpec model properties\n</reasoning>\n\n<gherkin>\nFeature: API Operations\n  As an API consumer\n  I want to perform operations defined in the interfaces\n  So that I can interact with the system properly\n\n  Background:\n    Given the API service is running\n    And I have valid authentication\n\n  # Generic

# End to end test

In [45]:
application_description = "Bot that writes poems about cars."

content = jinja_env.from_string(typespec.PROMPT).render(application_description=application_description)
message = {"role": "user", "content": content}
with typespec.TypespecTaskNode.platform(client, compiler, jinja_env):
    tsp_data = typespec.TypespecTaskNode.run([message])
    tsp_root = typespec.TypespecTaskNode(tsp_data)
    tsp_solution = common.bfs(tsp_root)

match tsp_solution.is_successful:
    case True:
        print("Typespec solution is successful.")
    case False:
        raise Exception("Typespec solution is not successful.")

Typespec solution is successful.


In [46]:
content = jinja_env.from_string(router.PROMPT).render(typespec_definitions=tsp_solution.data.output.typespec_definitions)
message = {"role": "user", "content": content}
with router.RouterTaskNode.platform(client, jinja_env):
    router_data = router.RouterTaskNode.run([message], typespec_definitions=tsp_solution.data.output.typespec_definitions)
    router_root = router.RouterTaskNode(router_data)
    router_solution = common.bfs(router_root)

match router_solution.is_successful:
    case True:
        print("Router solution is successful.")
    case False:
        raise Exception("Router solution is not successful.")

Router solution is successful.


In [47]:
content = jinja_env.from_string(typescript.PROMPT).render(typespec_definitions=tsp_solution.data.output.typespec_definitions)
message = {"role": "user", "content": content}
with typescript.TypescriptTaskNode.platform(client, compiler, jinja_env):
    ts_data = typescript.TypescriptTaskNode.run([message])
    ts_root = typescript.TypescriptTaskNode(ts_data)
    ts_solution = common.bfs(ts_root)

match ts_solution.is_successful:
    case True:
        print("Typescript solution is successful.")
    case False:
        raise Exception("Typescript solution is not successful.")

Typescript solution is successful.


In [48]:
content = jinja_env.from_string(drizzle.PROMPT).render(typespec_definitions=tsp_solution.data.output.typespec_definitions)
message = {"role": "user", "content": content}
with drizzle.DrizzleTaskNode.platform(client, compiler, jinja_env):
    dzl_data = drizzle.DrizzleTaskNode.run([message])
    dzl_root = drizzle.DrizzleTaskNode(dzl_data)
    dzl_solution = common.bfs(dzl_root)

match dzl_solution.is_successful:
    case True:
        print("Drizzle solution is successful.")
    case False:
        raise Exception("Drizzle solution is not successful.")

Drizzle solution is successful.


In [49]:
dzl_solution.data.output.feedback

{'exit_code': 0,
 'stdout': "Reading config file '/app/drizzle.config.ts'\nUsing 'pg' driver for database querying\n[⣷] Pulling schema from database...\n\x1b[2K\x1b[1G[✓] Pulling schema from database...\n[✓] Changes applied\n",
 'stderr': None}

In [50]:
print(dzl_solution.data.output.drizzle_schema)

import { integer, pgTable, text, timestamp, uuid } from "drizzle-orm/pg-core";

// Car details table
export const carDetailsTable = pgTable("car_details", {
  id: uuid("id").defaultRandom().primaryKey(),
  carMake: text("car_make").notNull(),
  carModel: text("car_model").notNull(),
  yearMade: integer("year_made"),
});

// Poem style table
export const poemStyleTable = pgTable("poem_styles", {
  id: uuid("id").defaultRandom().primaryKey(),
  poemType: text("poem_type").notNull(),
  poemTone: text("poem_tone").notNull(),
});

// Poems table
export const poemsTable = pgTable("poems", {
  id: uuid("id").defaultRandom().primaryKey(),
  content: text("content").notNull(),
  createdAt: timestamp("created_at").notNull().defaultNow(),
  // Foreign keys
  carDetailsId: uuid("car_details_id")
    .references(() => carDetailsTable.id)
    .notNull(),
  styleId: uuid("style_id")
    .references(() => poemStyleTable.id)
    .notNull(),
});


In [51]:
def seach_handler(
    function_name: str,
    typespec_definitions: str,
    typescript_schema: str,
    drizzle_schema: str,
) -> handlers.HandlerTaskNode:
    prompt_params = {
        "application_description": application_description,
        "function_name": function_name,
        "typespec_schema": typespec_definitions,
        "typescript_schema": typescript_schema,
        "drizzle_schema": drizzle_schema,
    }
    content = jinja_env.from_string(handlers.PROMPT).render(**prompt_params)
    message = {"role": "user", "content": content}
    output = handlers.HandlerTaskNode.run([message], **prompt_params)
    root_node = handlers.HandlerTaskNode(output)
    solution = common.bfs(root_node)
    return solution

In [52]:
MAX_WORKERS = 5
results: dict[str, handlers.HandlerOutput] = {}
with handlers.HandlerTaskNode.platform(client, compiler, jinja_env):
    with concurrent.futures.ThreadPoolExecutor(MAX_WORKERS) as executor:
        future_to_handler = {}
        for function_name in tsp_solution.data.output.llm_functions:
            future_to_handler[executor.submit(
                seach_handler,
                function_name,
                tsp_solution.data.output.typespec_definitions,
                ts_solution.data.output.typescript_schema,
                dzl_solution.data.output.drizzle_schema,
            )] = function_name
        for future in concurrent.futures.as_completed(future_to_handler):
            function_name, result = future_to_handler[future], future.result()
            results[function_name] = result

In [53]:
for key, val in results.items():
    print("Handler", key)
    print(val.data.output.feedback["stdout"])
    print(val.depth)

Handler getPreviousPoems
None
0
Handler generatePoem
None
0


In [54]:
print(ts_data.output.typescript_schema)

export interface CarDetails {
    carMake: string;
    carModel: string;
    yearMade: number | null;
}

export interface PoemStyle {
    poemType: string;
    poemTone: string;
}

export interface Poem {
    carInfo: CarDetails;
    content: string;
    style: PoemStyle;
    createdAt: Date;
}

export interface CarPoetBot {
    generatePoem(carDetails: CarDetails, poemStyle: PoemStyle): Promise<Poem>;
    getPreviousPoems(carInfo: CarDetails): Promise<Poem[]>;
}


In [55]:
print(results["generateCarPoem"].data.output.handler)

KeyError: 'generateCarPoem'

In [None]:
results["greet"].output.feedback

KeyError: 'greet'

In [28]:
content = jinja_env.from_string(handlers.PROMPT).render(
    typespec_schema=tsp_solution.data.output.typespec_definitions,
    typescript_schema=ts_solution.data.output.typescript_schema,
    drizzle_schema=dzl_solution.data.output.drizzle_schema
)
message = {"role": "user", "content": content}
with handlers.HandlerTaskNode.platform(client, compiler, jinja_env):
    output = handlers.HandlerTaskNode.run([message],
                                          function_name="generateGreeting",
                                          typespec_schema=tsp_solution.data.output.typespec_definitions,
                                          typescript_schema=ts_solution.data.output.typescript_schema,
                                          drizzle_schema=dzl_solution.data.output.drizzle_schema)

In [29]:
print(output.output.feedback)

{'exit_code': 0, 'stdout': None, 'stderr': None}


In [30]:
print(output.output.handler)

import { db } from "../db";
import type { GreetingParams } from "../common/schema";
import { greetingParamsTable, generatedGreetingsTable } from "../db/schema/application";

interface Options {
    params: GreetingParams;
}

interface Output {
    greetingId: string;
    greeting: string;
}

export const handle = async (options: Options): Promise<Output> => {
    const [paramsResult] = await db.insert(greetingParamsTable)
        .values({
            name: options.params.name,
            time_of_day: options.params.timeOfDay
        })
        .returning({ id: greetingParamsTable.id });

    const greeting = `Hello${options.params.name ? " " + options.params.name : ""}${options.params.timeOfDay ? ", good " + options.params.timeOfDay : ""}!`;

    const [greetingResult] = await db.insert(generatedGreetingsTable)
        .values({
            params_id: paramsResult.id,
            greeting_text: greeting
        })
        .returning({ id: generatedGreetingsTable.id });

    return {
