Separated source code transformation from TypeChat src and created se…

…parate CLI utility for schema pythonic comment handling
microsoft · May 2, 2024 · 84e649e · 84e649e
1 parent 6ec8dd3
commit 84e649e
Show file tree

Hide file tree

Showing 11 changed files with 492 additions and 253 deletions.
diff --git a/python/examples/math/ast_comment_handling.py b/python/examples/math/ast_comment_handling.py
diff --git a/python/examples/math/demo.py b/python/examples/math/demo.py
@@ -4,7 +4,7 @@
 import sys
 from typing import cast
 from dotenv import dotenv_values
-import schema_with_comments as math
+import schema as math
 from typechat import Failure, create_language_model, process_requests
 from program import TypeChatProgramTranslator, TypeChatProgramValidator, evaluate_json_program
 

diff --git a/python/examples/math/program.py b/python/examples/math/program.py
@@ -121,7 +121,6 @@ class TypeChatProgramTranslator(TypeChatJsonTranslator[JsonProgram]):
     _api_declaration_str: str
 
     def __init__(self, model: TypeChatLanguageModel, validator: TypeChatProgramValidator, api_type: type):
-        api_type = self._convert_pythonic_comments_to_annotated_docs(api_type)
         super().__init__(model=model, validator=validator, target_type=api_type, _raise_on_schema_errors = False)
         # TODO: the conversion result here has errors!
         conversion_result = python_type_to_typescript_schema(api_type)

diff --git a/python/examples/math/pythonic_comment_handling.py b/python/examples/math/pythonic_comment_handling.py
diff --git a/python/examples/math/schema.py b/python/examples/math/schema.py
@@ -1,5 +1,6 @@
 from typing_extensions import TypedDict, Annotated, Callable, Doc
 
+
 class MathAPI(TypedDict):
     """
     This is API for a simple calculator

diff --git a/python/src/typechat/_internal/translator.py b/python/src/typechat/_internal/translator.py
@@ -1,10 +1,6 @@
 from typing_extensions import Generic, TypeVar
 
 import pydantic_core
-import ast
-import io
-import tokenize
-import inspect
 
 from typechat._internal.model import PromptSection, TypeChatLanguageModel
 from typechat._internal.result import Failure, Result, Success
@@ -123,99 +119,4 @@ def _create_repair_prompt(self, validation_error: str) -> str:
 '''
 The following is a revised JSON object:
 """
-        return prompt
-
-    def _convert_pythonic_comments_to_annotated_docs(schema_class, debug=False):
-
-        def _extract_tokens_between_line_numbers(gen, start_lineno, end_lineno):
-            # Extract tokens between start_lineno and end_lineno obtained from the tokenize generator
-            tokens = []
-            for tok in gen:
-                if tok.start[0] < start_lineno:  # Skip tokens before start_lineno
-                    continue
-                if tok.start[0] >= start_lineno and tok.end[0] <= end_lineno:
-                    # Add token if it is within the range
-                    tokens.append((tok.type, tok.string))
-                elif tok.start[0] > end_lineno:  # Stop if token is beyond end_lineno
-                    break
-
-            return tokens
-
-        schema_path = inspect.getfile(schema_class)
-
-        with open(schema_path, 'r') as f:
-            schema_class_source = f.read()
-            gen = tokenize.tokenize(io.BytesIO(
-                schema_class_source.encode('utf-8')).readline)
-
-        tree = ast.parse(schema_class_source)
-
-        if debug:
-            print("Source code before transformation:")
-            print("--"*50)
-            print(schema_class_source)
-            print("--"*50)
-
-        has_comments = False  # Flag later used to perform imports of Annotated and Doc if needed
-
-        for node in tree.body:
-            if isinstance(node, ast.ClassDef):
-                for n in node.body:
-                    if isinstance(n, ast.AnnAssign):  # Check if the node is an annotated assignment
-                        assgn_comment = None
-                        tokens = _extract_tokens_between_line_numbers(
-                            # Extract tokens between the line numbers of the annotated assignment
-                            gen, n.lineno, n.end_lineno
-                        )
-                        for toknum, tokval in tokens:
-                            if toknum == tokenize.COMMENT:
-                                # Extract the comment
-                                assgn_comment = tokval
-                                break
-
-                        if assgn_comment:
-                            # If a comment is found, transform the annotation to include the comment
-                            assgn_subscript = n.annotation
-                            has_comments = True
-                            n.annotation = ast.Subscript(
-                                value=ast.Name(id="Annotated", ctx=ast.Load()),
-                                slice=ast.Tuple(
-                                    elts=[
-                                        assgn_subscript,
-                                        ast.Call(
-                                            func=ast.Name(
-                                                id="Doc", ctx=ast.Load()
-                                            ),
-                                            args=[
-                                                ast.Constant(
-                                                    value=assgn_comment.strip("#").strip()
-                                                )
-                                            ],
-                                            keywords=[]
-                                        )
-                                    ],
-                                    ctx=ast.Load()
-                                ),
-                                ctx=ast.Load()
-                            )
-
-        if has_comments:
-            for node in tree.body:
-                if isinstance(node, ast.ImportFrom):
-                    if node.module == "typing_extensions":
-                        if ast.alias(name="Annotated") not in node.names:
-                            node.names.append(ast.alias(name="Annotated"))
-                        if ast.alias(name="Doc") not in node.names:
-                            node.names.append(ast.alias(name="Doc"))
-
-        transformed_schema_source = ast.unparse(tree)
-
-        if debug:
-            print("Source code after transformation:")
-            print("--"*50)
-            print(transformed_schema_source)
-            print("--"*50)
-
-        namespace = {}
-        exec(transformed_schema_source, namespace)
-        return namespace[schema_class.__name__]
+        return prompt
diff --git a/python/examples/math/schema_with_comments.py → ...handler/examples/commented_math_schema.py b/python/examples/math/schema_with_comments.py → ...handler/examples/commented_math_schema.py