In [118]:
%autoreload 2

In [119]:
from langchain.output_parsers.pydantic import PydanticOutputParser, parse_json_schema_to_typescript
from pydantic import BaseModel, Field

In [146]:
from pydantic import BaseModel, Field
from typing import Optional, Union
from enum import Enum
from datetime import datetime


class CityTier(Enum):
    SMALL = "small"
    MEDIUM = "medium"
    LARGE = "large"
    
class NumberedEnum(Enum):
    ONE = 1
    TWO = 2
    THREE = 3
    FOUR = 4
    FIVE = 5
    SIX = 6
    

class PopulationInformation(BaseModel):
    average_age: Optional[int] = Field(None, description="Average age of the location.")
    description: Optional[str] = Field(None, description="A unique description here")
    tier: Optional[CityTier] = Field(None)
    which_number: Optional[NumberedEnum] = Field(None)
    # date_founded: Optional[datetime] = Field(None, description="The date the city was founded.")
    
class AnimalPopulations(BaseModel):
    modal_breed: Optional[str] = Field(None, description="The modal breed of the animal.")
    reproductive_age: Optional[int] = Field(None, description="The reproductive age of the animal.")
    disease: Optional[str] = Field(None, description="The disease of the animal.")

class Response(BaseModel):
    answer: str = Field(description="answer to the user's question")
    source: str = Field(description="source used to answer the user's question, should be a website.")
    # population_information: Optional[Union[PopulationInformation, dict]] = Field(description="Information about the city.")
    population_information: Optional[Union[PopulationInformation, AnimalPopulations, NumberedEnum]] = Field(description="Information about the city.")

In [147]:
output_parser = PydanticOutputParser(response_schema=Response)

In [148]:
print(output_parser.get_format_instructions())

The output should be a markdown code snippet formatted in the following schema.

```json
{
	// answer to the user's question
	"answer": string
	// source used to answer the user's question, should be a website.
	"source": string
	"population_information": {
		// Average age of the location.
		"average_age": integer
		// A unique description here
		"description": string
		// An enumeration.
		"tier": "small" | "medium" | "large" 
		// An enumeration.
		"which_number": 1 | 2 | 3 | 4 | 5 | 6 

	} | 	{
		// The modal breed of the animal.
		"modal_breed": string
		// The reproductive age of the animal.
		"reproductive_age": integer
		// The disease of the animal.
		"disease": string

	} | 	// An enumeration.
	1 | 2 | 3 | 4 | 5 | 6 

}
```
Do not include comments in the resulting code snippet.






In [149]:
# print(parse_json_schema_to_typescript(Response.schema()))

In [106]:
from langchain.prompts import PromptTemplate

In [107]:
# line_template = """"{name}": {type}"""

# MAX_DEPTH = 6

# def _get_description_from_prop(prop: dict, depth: int) -> str:
#     """Return the optional description of a property."""
#     indent = "\t" * depth
#     return f"{indent}// {prop['description']}\n" if "description" in prop else ""


# def _get_code_line(name: str, type: str, depth: int) -> str:
#     """Return the code line for a property."""
#     indent = "\t" * depth
#     return f"{indent}{line_template.format(name=name, type=type)}"


# def _get_sub_string(k: str, v: dict, depth: int) -> str:
#     """Return the sub-string for a property."""
#     description = _get_description_from_prop(v, depth)
#     if "type" not in v:
#         print(k, v, depth)
#     code = _get_code_line(k, v["type"], depth)
#     return description + code


# def resolve_reference(ref: str, definitions: dict) -> dict:
#     """Resolve a reference to a definition."""
#     if not ref.startswith("#/definitions/"):
#         # Arbitrary JSON-schema references not yet supported.
#         raise ValueError(f"Unsupported $ref: {ref}")
#     ref = ref[len("#/definitions/") :]
#     if ref in definitions:
#         return definitions[ref]
#     else:
#         raise ValueError(f"Unknown reference: {ref}")


# def process_reference(k: str, v: dict, depth: int, definitions: dict) -> str:
#     """Return the code line for a referred property."""
#     resolved = resolve_reference(v["$ref"], definitions)
#     nested_type = get_nested_schema_str(resolved, depth + 1)
#     return _get_sub_string(k, resolved, depth) + nested_type


# def get_nested_schema_str(schema: dict, depth: int = 1) -> str:
#     """Return the string representation of schema."""
#     if depth > MAX_DEPTH:
#         logger.error(f"Max depth exceeded: {depth}")
#         return ""
#     properties = schema.get("properties", {})
#     definitions = schema.get("definitions", {})
#     result = "{\n"
#     for k, v in properties.items():
#         if "$ref" in v:
#             result += process_reference(k, v, depth, definitions)
#         elif "allOf" in v:
#             # Nested subschema are emitted as an anyOf
#             for sub_schema in v["allOf"]:
#                 if "$ref" in sub_schema:
#                     result += process_reference(
#                         k, definitions[sub_schema["$ref"]], depth, definitions
#                     )
#                 elif "type" in sub_schema:
#                     result += _get_code_line(k, sub_schema["type"], depth)
#                 else:
#                     raise ValueError(f"Unknown type: {sub_schema}")
#         elif "anyOf" in v:
#             raise NotImplementedError(f"Union types not yet supported: {v}")
#         else:
#             arg_str = _get_sub_string(k, v, depth)
#             result += f"{arg_str}\n"
#     final_indent = "\t" * (depth - 1)
#     result += f"{final_indent}}}"
#     return result

In [108]:
# get_nested_schema_str(Response.schema())

In [None]:
# import os
# import json
# from tempfile import TemporaryDirectory
# # import subprocess
# import subprocess
# def parse_json_schema_to_typescript(schema: dict) -> str:
#     """Return the string representation of schema."""
#     with TemporaryDirectory() as tmpdir:
#         with open(os.path.join(tmpdir, "schema.json"), "w") as f:
#             json.dump(schema, f)
#         schema_str = subprocess.check_output(
#             ["json2ts", "-i", "schema.json"], cwd=tmpdir
#         ).decode("utf-8")
#     return schema_str

In [None]:
# output_parser.get_format_instructions()

In [None]:
# import importlib

# model_file = "/Users/whinthorn/code/openapi-python-generator/src/openapi_python_generator/models.py"
# # Use util to load model_file from the absolute path specified above
# spec = importlib.util.spec_from_file_location("models", model_file)
# models = importlib.util.module_from_spec(spec)
# spec.loader.exec_module(models)

In [150]:
prompt = PromptTemplate(
    template="Answer the users question as best as possible.\n{format_instructions}\n{question}",
    input_variables=["question"],
    partial_variables={"format_instructions": output_parser.get_format_instructions()}
)

In [151]:
from langchain.llms import OpenAI

In [152]:
model = OpenAI(temperature=0)

In [153]:
print(prompt.format(question="what's the capital of paris"))

Answer the users question as best as possible.
The output should be a markdown code snippet formatted in the following schema.

```json
{
	// answer to the user's question
	"answer": string
	// source used to answer the user's question, should be a website.
	"source": string
	"population_information": {
		// Average age of the location.
		"average_age": integer
		// A unique description here
		"description": string
		// An enumeration.
		"tier": "small" | "medium" | "large" 
		// An enumeration.
		"which_number": 1 | 2 | 3 | 4 | 5 | 6 

	} | 	{
		// The modal breed of the animal.
		"modal_breed": string
		// The reproductive age of the animal.
		"reproductive_age": integer
		// The disease of the animal.
		"disease": string

	} | 	// An enumeration.
	1 | 2 | 3 | 4 | 5 | 6 

}
```
Do not include comments in the resulting code snippet.




what's the capital of paris


In [157]:
foo = model(prompt.format(question="When did cats first arrive to the British Isles?"))

In [158]:
print(foo)



```json
{
	"answer": "Cats were believed to have arrived to the British Isles around the 1st century AD.",
	"source": "https://www.historyextra.com/period/roman/cats-in-the-roman-world-and-the-british-isles/",
	"population_information": {
		"average_age": 10,
		"description": "Domestic cats",
		"tier": "small",
		"which_number": 1
	}
}
``


In [159]:
output_parser.parse(foo)

Response(answer='Cats were believed to have arrived to the British Isles around the 1st century AD.', source='https://www.historyextra.com/period/roman/cats-in-the-roman-world-and-the-british-isles/', population_information=PopulationInformation(average_age=10, description='Domestic cats', tier=<CityTier.SMALL: 'small'>, which_number=<NumberedEnum.ONE: 1>))

In [100]:
# from langchain.prompts import load_prompt

# prompt = load_prompt("lc://prompts/conversation/prompt.json")