In [1]:
#autoreload
%load_ext autoreload
%autoreload 2

In [13]:
from askharrison.llmparse.schema_recommender import SchemaGenerator

In [3]:
from askharrison.llm.openai_llm_client import OpenAIClient

In [4]:
openai_client = OpenAIClient()

In [8]:
schema_generator = SchemaGenerator(openai_client)

In [14]:
output = schema_generator.generate_schema_from_description("generate a qa schema from chat history of a support chat channel")

In [58]:
from askharrison.llm_models import extract_python_code, safe_eval

In [20]:
model = schema_generator._create_pydantic_model(output)

In [47]:
import json

json.loads(model.schema_json())

{'properties': {'raw_text': {'description': 'Original text that generated this output',
   'title': 'Raw Text',
   'type': 'string'},
  'extracted_at': {'format': 'date-time',
   'title': 'Extracted At',
   'type': 'string'},
  'chatId': {'default': None,
   'description': 'Unique identifier for the chat session',
   'title': 'Chatid',
   'type': 'string'},
  'timestamp': {'default': None,
   'description': 'Timestamp when the chat occurred',
   'title': 'Timestamp',
   'type': 'string'},
  'customerId': {'default': None,
   'description': 'Unique identifier for the customer',
   'title': 'Customerid',
   'type': 'string'},
  'agentId': {'default': None,
   'description': 'Unique identifier for the support agent',
   'title': 'Agentid',
   'type': 'string'},
  'questions': {'default': None,
   'description': 'List of questions asked during the chat',
   'items': {},
   'title': 'Questions',
   'type': 'array'},
  'answers': {'default': None,
   'description': 'List of answers provided 

In [30]:
output2 = schema_generator.generate_schema_from_description("""generate a qa schema from chat history of a support chat channel
                                                        The schema should have a field for the question and a field for the answer or resolution
                                                        answer is null if the question is not answered
                                                        also include start_date and end_date""")


In [36]:
with open("C:\\Users\\alist\\Desktop\\code\\askharrison\\notebooks\\data\\cc_features.txt") as f:
    text = f.read()

In [44]:
recommended_output = schema_generator.recommend_schema_from_document(text)

In [46]:
recommended_output['properties']

{'introduction': {'type': 'string',
  'description': 'Introduction of the release notes document.'},
 'oldReleasesReference': {'type': 'string',
  'description': 'URL or reference to older releases.'},
 'updates': {'type': 'array',
  'description': 'List of updates or features in the release notes.',
  'items': {'type': 'object',
   'properties': {'date': {'type': 'string',
     'format': 'date',
     'description': 'The date of the update or announcement.'},
    'title': {'type': 'string',
     'description': 'Title summarizing the update or new feature.'},
    'description': {'type': 'string',
     'description': 'Detailed description of the update or feature.'},
    'externalReferences': {'type': 'array',
     'description': 'References to external documents or resources.',
     'items': {'type': 'string'}}},
   'required': ['date', 'title', 'description']}}}

In [48]:
with open("C:\\Users\\alist\\Desktop\\code\\askharrison\\askharrison\\llm_models.py") as f:
    codefile = f.read()
    

In [49]:
code_recommended_output = schema_generator.recommend_schema_from_document(codefile)

In [61]:
safe_eval(extract_python_code(code_recommended_output))

{'description': 'Schema for parsing documents containing utility functions for language model processing in Python.',
 'schema': {'imports': {'description': 'List of import statements with associated modules or packages used.',
   'type': 'array',
   'items': {'type': 'string'}},
  'functions': {'description': 'List of function definitions in the document.',
   'type': 'array',
   'items': {'type': 'object',
    'properties': {'name': {'description': 'Name of the function.',
      'type': 'string'},
     'description': {'description': 'Docstring description explaining the purpose of the function.',
      'type': 'string'},
     'parameters': {'description': 'List of parameters the function takes, with their descriptions.',
      'type': 'array',
      'items': {'type': 'object',
       'properties': {'name': {'description': 'Parameter name.',
         'type': 'string'},
        'type': {'description': 'Type of the parameter.', 'type': 'string'},
        'description': {'description': "

In [64]:
codefile_schama_dict = json.loads(extract_python_code(code_recommended_output))

# create a pydantic model from the schema without using the schema generator
from pydantic import BaseModel
     

In [68]:
pydantic_output = openai_client.generate(f"""generate a pydantic models from the schema below
                       {codefile_schama_dict}"
                       return only python code for the pydantic model,
                       example output:
                       ####
                       ```python
                       class MyModel(BaseModel):
                            field1: str
                            field2: int
                       
                       class MyModel2(BaseModel):
                            field1: MyModel
                            field2: int
                       ```
                       ####
                       output
                       """)

In [74]:
print(extract_python_code(pydantic_output))

from typing import List, Optional
from pydantic import BaseModel

class Parameter(BaseModel):
    name: str
    type: str
    description: str
    default: Optional[str] = None

class Function(BaseModel):
    name: str
    description: str
    parameters: List[Parameter]
    returns: str

class Schema(BaseModel):
    imports: List[str]
    functions: List[Function]


In [79]:
from askharrison.llmparse.document_parser import DocumentParser

In [80]:
document_parser = DocumentParser(openai_client) 

In [81]:
doc_parsed_output = document_parser.parse_document(codefile, codefile_schama_dict)

Invalid JSON in LLM response, returning raw response


In [83]:
safe_eval(extract_python_code(doc_parsed_output))

{'imports': ['requests',
  'pandas as pd',
  'typing.List',
  'typing.Dict',
  'typing.Any',
  'typing.Callable',
  'typing.Optional',
  'openai.OpenAI',
  're',
  'concurrent.futures',
  'time',
  'random',
  'tqdm.tqdm',
  'functools',
  'tiktoken'],
 'functions': [{'name': 'process_question',
   'description': 'Processes a question using the specified language model and returns the response.',
   'parameters': {'question': 'str', 'model': 'str, optional'},
   'returns': 'str'},
  {'name': 'polish_code',
   'description': "Polish the code by removing the leading 'python' or 'py', removing surrounding '`' characters and removing trailing spaces and new lines.",
   'parameters': {'code': 'str'},
   'returns': 'str'},
  {'name': 'extract_python_code',
   'description': 'Extract the code from the llm response.',
   'parameters': {'response': 'str', 'separator': 'str, optional'},
   'returns': 'str'},
  {'name': 'safe_eval',
   'description': 'Evaluates a string expression safely, returni