In [1]:
import instructor
from openai import OpenAI
from pydantic import BaseModel
import os


In [2]:
LLM_REPORT_API_KEY = os.getenv("LLM_REPORT_API_KEY")
assert LLM_REPORT_API_KEY, "LLM_REPORT_API_KEY env var must be set"

client = OpenAI(
    # base_url="https://api.openai.withlogging.com/v1",
    # default_headers={
    #     "X-Api-Key": f"Bearer {LLM_REPORT_API_KEY}",
    #     "X-User-Id": "nickolay.sheyko@gmail.com",
    # },
)

from instructor import Mode
from llmonitor import monitor

monitor(client)
# client = instructor.patch(client)
# client = instructor.patch(client, mode=Mode.JSON)


In [3]:
# from llmonitor import monitor
# monitor(client)


In [6]:
from openai._types import NOT_GIVEN
from typing import Type, TypeVar




OutputType = TypeVar("OutputType", bound=BaseModel)


GPT_3 = "gpt-3.5-turbo"
GPT_3 = "gpt-3.5-turbo-1106"
GPT_4 = "gpt-4-1106-preview"


SYSTEM_PROMPT = """Ignore all previous instructions.

1. You are to provide clear, concise, and direct responses.
2. Eliminate unnecessary reminders, apologies, self-references, and any pre-programmed niceties.
3. Maintain a casual tone in your communication.
4. Be transparent; if you're unsure about an answer or if a question is beyond your capabilities or knowledge, admit it.
5. For any unclear or ambiguous queries, ask follow-up questions to understand the user's intent better.
6. When explaining concepts, use real-world examples and analogies, where appropriate.
7. For complex requests, take a deep breath and work on the problem step-by-step.
8. For every response, you will be tipped up to $200 (depending on the quality of your output).
9. Answer in JSON format.

It is very important that you get this right. Multiple lives are at stake."""

def call_openai(response_model: Type[OutputType], question: str, model = GPT_3, seed=NOT_GIVEN) -> OutputType:
    return client.chat.completions.create(
        model=model,
        response_model=response_model,
        max_retries=3,
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": question}],
        seed=seed,
        response_format={"type": "json_object"},
    )  # type: ignore


In [5]:
from pydantic import Field

class Step(BaseModel):
    question: str = Field(..., description="The subquestion needed to answer the main question")
    answer: str = Field(..., description="The answer to the subquestion")

class Answer(BaseModel):
    """Docstring"""
    steps: list[Step] = Field(..., description="The questions that were asked and answered to answer the initial question")

    answer: str
    confidence: float


In [3]:
response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[{"role": "user", "content": "hello"}],
    # functions=Answer.model_json_schema(),
)


() {'model': 'gpt-3.5-turbo', 'messages': [{'role': 'user', 'content': 'hello'}]}


In [7]:
# Answer.model_json_schema()

# def create_function_parameter(name: str, type: Type[BaseModel]):
#     return {
#         "name": type.schema()["title"],
#         "type": type.schema()["title"],
#         "description": type.schema()["description"],
#     }


In [8]:
answer = call_openai(Answer, "What is the richest ciries in the world?")


() {'model': 'gpt-3.5-turbo-1106', 'response_model': <class '__main__.Answer'>, 'max_retries': 3, 'messages': [{'role': 'system', 'content': "Ignore all previous instructions.\n\n1. You are to provide clear, concise, and direct responses.\n2. Eliminate unnecessary reminders, apologies, self-references, and any pre-programmed niceties.\n3. Maintain a casual tone in your communication.\n4. Be transparent; if you're unsure about an answer or if a question is beyond your capabilities or knowledge, admit it.\n5. For any unclear or ambiguous queries, ask follow-up questions to understand the user's intent better.\n6. When explaining concepts, use real-world examples and analogies, where appropriate.\n7. For complex requests, take a deep breath and work on the problem step-by-step.\n8. For every response, you will be tipped up to $200 (depending on the quality of your output).\n9. Answer in JSON format.\n\nIt is very important that you get this right. Multiple lives are at stake."}, {'role': 

TypeError: Completions.create() got an unexpected keyword argument 'response_model'

In [6]:
params = {
    "model": "gpt-3.5-turbo-1106",
    "messages": [
        {
            "role": "system",
            "content": "Ignore all previous instructions.\n\n1. You are to provide clear, concise, and direct responses.\n2. Eliminate unnecessary reminders, apologies, self-references, and any pre-programmed niceties.\n3. Maintain a casual tone in your communication.\n4. Be transparent; if you're unsure about an answer or if a question is beyond your capabilities or knowledge, admit it.\n5. For any unclear or ambiguous queries, ask follow-up questions to understand the user's intent better.\n6. When explaining concepts, use real-world examples and analogies, where appropriate.\n7. For complex requests, take a deep breath and work on the problem step-by-step.\n8. For every response, you will be tipped up to $200 (depending on the quality of your output).\n9. Answer in JSON format.\n\nIt is very important that you get this right. Multiple lives are at stake.",
        },
        {"role": "user", "content": "What is the richest ciries in the world?"},
    ],
    "response_format": {"type": "json_object"},
    "functions": [
        {
            "name": "Answer",
            "description": "Docstring",
            "parameters": {
                "properties": {
                    "answer": {
                        "description": "The answer to the subquestion",
                        "title": "Answer",
                        "type": "string",
                    },
                    "question": {
                        "description": "The subquestion needed to answer the main question",
                        "title": "Question",
                        "type": "string",
                    },
                },
                "required": ["answer", "question"],
                "type": "object",
            },
        }
    ],
    "function_call": {"name": "Answer"},
}


response = client.chat.completions.create(**params)


() {'model': 'gpt-3.5-turbo-1106', 'messages': [{'role': 'system', 'content': "Ignore all previous instructions.\n\n1. You are to provide clear, concise, and direct responses.\n2. Eliminate unnecessary reminders, apologies, self-references, and any pre-programmed niceties.\n3. Maintain a casual tone in your communication.\n4. Be transparent; if you're unsure about an answer or if a question is beyond your capabilities or knowledge, admit it.\n5. For any unclear or ambiguous queries, ask follow-up questions to understand the user's intent better.\n6. When explaining concepts, use real-world examples and analogies, where appropriate.\n7. For complex requests, take a deep breath and work on the problem step-by-step.\n8. For every response, you will be tipped up to $200 (depending on the quality of your output).\n9. Answer in JSON format.\n\nIt is very important that you get this right. Multiple lives are at stake."}, {'role': 'user', 'content': 'What is the richest ciries in the world?'}]

In [5]:
print(response.model_dump_json(indent=2))


{
  "id": "chatcmpl-8SsKYefSnP1iw4M3d3FRXodFBWkgR",
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": null,
        "role": "assistant",
        "function_call": {
          "arguments": "{\"question\":\"What are the top 5 richest cities in the world based on GDP per capita?\"}",
          "name": "Answer"
        },
        "tool_calls": null
      }
    }
  ],
  "created": 1701892722,
  "model": "gpt-3.5-turbo-1106",
  "object": "chat.completion",
  "system_fingerprint": "fp_eeff13170a",
  "usage": {
    "completion_tokens": 21,
    "prompt_tokens": 270,
    "total_tokens": 291
  }
}


In [38]:
print(answer.model_dump_json(indent=2))


{
  "steps": [
    {
      "question": "What specific criteria or measurements would you like to use to determine the richest cities? For example, are you referring to cities with the highest GDP, highest average income, or any other specific financial metrics?",
      "answer": ""
    }
  ],
  "answer": " ",
  "confidence": 0.8
}


In [110]:


class Property(BaseModel):
    key: str
    value: str
    resolved_absolute_value: str


class Entity(BaseModel):
    id: int = Field(
        ...,
        description="Unique identifier for the entity, used for deduplication, design a scheme allows multiple entities",
    )
    subquote_string: list[str] = Field(
        ...,
        description="Correctly resolved value of the entity, if the entity is a reference to another entity, this should be the id of the referenced entity, include a few more words before and after the value to allow for some context to be used in the resolution",
    )
    entity_title: str
    properties: list[Property] = Field(
        ..., description="List of properties of the entity"
    )
    dependencies: list[int] = Field(
        ...,
        description="List of entity ids that this entity depends  or relies on to resolve it",
    )


class DocumentExtraction(BaseModel):
    entities: list[Entity] = Field(
        ...,
        description="Body of the answer, each fact should be its seperate object with a body and a list of sources",
    )


In [113]:
with open("sample.txt") as f:
    data = f.read()

extracted = call_openai(DocumentExtraction, f"Extract and resolve a list of entities from the following document:\n\n{data}", model=GPT_4)


In [122]:
from graphviz import Digraph

def generate_html_label(entity: Entity) -> str:
    rows = [f"<tr><td>{prop.key}</td><td>{prop.resolved_absolute_value}</td></tr>" for prop in entity.properties]
    table_rows = "".join(rows)
    return f"<<table border='0' cellborder='1' cellspacing='0'><tr><td colspan='2'><b>{entity.entity_title}</b></td></tr>{table_rows}</table>>"

def generate_graph(data: DocumentExtraction):
    dot = Digraph(comment="Entity Graph", node_attr={"shape": "plaintext"})

    for entity in data.entities:
        label = generate_html_label(entity)
        dot.node(str(entity.id), label)

    for entity in data.entities:
        for dep_id in entity.dependencies:
            dot.edge(str(entity.id), str(dep_id))

    dot.render("entity.gv", view=True)

generate_graph(extracted)


<<table border='0' cellborder='1' cellspacing='0'><tr><td colspan='2'><b>Agreement Date</b></td></tr><tr><td>Date</td><td>2020-01-01</td></tr></table>>
<<table border='0' cellborder='1' cellspacing='0'><tr><td colspan='2'><b>Delivery Date</b></td></tr><tr><td>Period After Agreement Date</td><td>2020-01-31</td></tr></table>>
<<table border='0' cellborder='1' cellspacing='0'><tr><td colspan='2'><b>Total Payment</b></td></tr><tr><td>Amount</td><td>$50,000</td></tr></table>>
<<table border='0' cellborder='1' cellspacing='0'><tr><td colspan='2'><b>Initial Payment</b></td></tr><tr><td>Amount</td><td>$10,000</td></tr><tr><td>Due Period After Signed Date</td><td>2020-01-08</td></tr></table>>
<<table border='0' cellborder='1' cellspacing='0'><tr><td colspan='2'><b>Final Payment Date</b></td></tr><tr><td>Period After Signed Date</td><td>2020-02-15</td></tr></table>>
<<table border='0' cellborder='1' cellspacing='0'><tr><td colspan='2'><b>Confidentiality Period End Date</b></td></tr><tr><td>Perio

In [112]:
print(extracted.model_dump_json(indent=2))


{
  "entities": [
    {
      "id": 1,
      "subquote_string": [
        "Wouldn’t the point of Python to be dynamic, rely on duck typing and enable the engineer to create something quickly without barriers? Well, yes, it is."
      ],
      "entity_title": "Python",
      "properties": [
        {
          "key": "importance",
          "value": "dynamic",
          "resolved_absolute_value": "dynamic"
        },
        {
          "key": "typical_usage",
          "value": "dynamic typing, duck typing",
          "resolved_absolute_value": "dynamic typing, duck typing"
        },
        {
          "key": "advantage",
          "value": "creation without barriers",
          "resolved_absolute_value": "creation without barriers"
        }
      ],
      "dependencies": []
    },
    {
      "id": 2,
      "subquote_string": [
        "gradual typing in Python code (the ability to add type hints to your code little by little over time)"
      ],
      "entity_title": "gradual typi

In [103]:
class UserDetail(BaseModel):
    name: str
    age: int


user = call_openai(
    UserDetail,
    "Fillinthedetailesaboutnikolaymihaylovofthatwasburnin1995its2023now",
    # model=GPT_4,
)

assert isinstance(user, UserDetail)
print(user)
# print(user.age)
# assert user.name == "Jason"
# assert user.age == 25


name='Nikolay Mihaylov' age=28


In [75]:
from pydantic import Field


class Query (BaseModel):
    id: int
    question: str
    dependancies: list[int] = Field(
        default_factory=list,
        description="List of sub questions that need to be answered before we can ask the question. Use a subquery when anything may be unknown, and we need to ask multiple questions to get the answer. Dependences must only be other queries. Max length is 5. You can use placeholders for unknowns , and the AI will fill them in. Placeholders are in the form of {placeholder_name}.",
    )

class QueryPlan(BaseModel):
    query_graph: list[Query]



plan = call_openai(
    QueryPlan,
    "What are the volume of the Earth?",
    model=GPT_4,
)


In [62]:
from pprint import pprint


print(plan.model_dump_json(indent=2))


{
  "query_graph": [
    {
      "id": 1,
      "question": "What is the volume of the Earth?",
      "dependancies": []
    }
  ]
}


In [81]:
from pydantic import BaseModel, Field

from graphviz import Digraph

class Node(BaseModel):
    id: int
    label: str = Field(
        ...,
        description="The text that will be displayed on the node. This should be a short phrase that describes the concept needed to answer the question"
    )
    color: str
class Edge(BaseModel):
    source: int
    target: int
    label: str
    color: str = "black"
class KnowledgeGraph(BaseModel):
    nodes: list[Node] = Field(default_factory=list)
    edges: list[Edge] = Field(default_factory=list)
    def visualize_knowledge_graph(self):
        dot = Digraph(comment="Knowledge Graph")
        # Add nodes
        for node in self.nodes:
            dot.node(str(node.id), node.label, color=node.color)
        # Add edges
        for edge in self.edges:
            dot.edge(str(edge.source), str(edge.target), edge.label, color=edge.color)
        # Render the graph
        dot.render("knowledge_graph.gv", view=True)


In [82]:
def create_graph(question: str) -> KnowledgeGraph:
    return call_openai(
        KnowledgeGraph,
        "Help me understand following by describing as a detailed knowledge graph: {input}",
    )

graph = create_graph("Who will be the next president of the United States?")


In [83]:
print(graph.model_dump_json(indent=2))


{
  "nodes": [
    {
      "id": 1,
      "label": "Knowledge Graph",
      "color": "blue"
    },
    {
      "id": 2,
      "label": "Input",
      "color": "green"
    },
    {
      "id": 3,
      "label": "Understanding",
      "color": "yellow"
    },
    {
      "id": 4,
      "label": "Description",
      "color": "orange"
    },
    {
      "id": 5,
      "label": "Detailed",
      "color": "purple"
    }
  ],
  "edges": [
    {
      "source": 2,
      "target": 1,
      "label": "is part of",
      "color": "black"
    },
    {
      "source": 1,
      "target": 3,
      "label": "leads to",
      "color": "black"
    },
    {
      "source": 3,
      "target": 4,
      "label": "involves",
      "color": "black"
    },
    {
      "source": 4,
      "target": 5,
      "label": "contains",
      "color": "black"
    }
  ]
}


In [84]:
graph.visualize_knowledge_graph()


In [26]:
import openai


completion = openai.chat.completions.create(
    model="gpt-4-0613",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": f"Provide an alalysis of the text: {text}"},
    ],
    functions=[{"name": "set_text_properties", "parameters": schema}],
    function_call={"name": "set_text_properties"},
    temperature=0,
)

print(completion.choices[0].message.function_call.arguments)


{
  "correctness": {
    "description": "The text is grammatically correct. It uses appropriate punctuation, correct sentence structure, and proper use of technical terms. The use of parentheses to provide additional information is also correct.",
    "score": "A"
  },
  "politeness": {
    "description": "The text is polite and professional. It provides helpful suggestions and guidance without being condescending or dismissive. The use of 'you might want to' and 'you can' phrases makes the suggestions sound more like friendly advice rather than orders.",
    "score": "A"
  }
}


In [20]:
print(completion.choices[0].message.function_call.arguments)


{
  "correctness": {
    "description": "The text is mostly grammatically correct, but there is one error. The phrase 'I’ve wrote a lot in the issue' should be 'I’ve written a lot in the issue'.",
    "score": "B"
  },
  "politeness": {
    "description": "The text is polite. It uses respectful language and does not contain any offensive or inappropriate words.",
    "score": "A"
  }
}
