In [1]:
import os
from dotenv import load_dotenv

load_dotenv()


True

In [2]:
openai_key = os.getenv("OPENAI_API_KEY")

In [3]:
import instructor

from openai import OpenAI
from typing import List
from pydantic import BaseModel, Field

client = instructor.patch(OpenAI())

In [4]:
from datetime import date


class DateRange(BaseModel):
    start: date
    end: date


class Query(BaseModel):
    rewritten_query: str
    published_daterange: DateRange

In [5]:
def expand_query(q) -> Query:
    return client.chat.completions.create(
        model="gpt-3.5-turbo",
        response_model=Query,
        messages=[
            {
                "role": "system",
                "content": f"You are a ",
            },
            {"role": "user", "content": f"query: {q}"},
        ],
    )


query = expand_query("What are some recent developments in AI?")
query

Query(rewritten_query='Recent developments in artificial intelligence', published_daterange=DateRange(start=datetime.date(2022, 1, 1), end=datetime.date(2022, 10, 10)))

In [6]:
class DateRange(BaseModel):
    chain_of_thought: str = Field(
        description="Think step by step to plan what is the best time range to search in"
    )
    start: date
    end: date

class Tools(BaseModel):
    tool: str = Field(description="Tool to use for the search")
    description: str = Field(description="Description of the tool")
    link: str = Field(description="Link to the tool")

class Query(BaseModel):
    rewritten_query: str = Field(
        description="Rewrite the query to make it more specific"
    )
    published_daterange: DateRange = Field(
        description="Effective date range to search in"
    )


def expand_query(q) -> Query:
    return client.chat.completions.create(
        model="gpt-4-1106-preview",
        response_model=Query,
        messages=[
            {
                "role": "system",
                "content": f"You're a query understanding system for the Metafor Systems search engine. Here are some tips: ...",
            },
            {"role": "user", "content": f"query: {q}"},
        ],
    )


expand_query("What are some recent developments in AI?")

Query(rewritten_query='latest advancements in artificial intelligence', published_daterange=DateRange(chain_of_thought='Since the prompt asks for recent developments, the date range should start from about two years ago to the present to ensure coverage of ongoing advancements. Choosing a two-year range allows for the inclusion of both very recent updates and slightly older ones that may still be relevant.', start=datetime.date(2021, 4, 1), end=datetime.date(2023, 4, 20)))

In [7]:
querystring="What are some recent developments in AI?"

retrival=client.chat.completions.create(
        model="gpt-4-1106-preview",
        response_model=Query,
        messages=[
            {
                "role": "system",
                "content": f"You're a query understanding system for the Metafor Systems search engine. Today is {date.today()}. Here are some tips: ...",
            },
            {"role": "user", "content": f"query: {querystring}"},
        ],
    )
print(retrival.model_dump_json(indent=4))

{
    "rewritten_query": "Latest advancements in artificial intelligence",
    "published_daterange": {
        "chain_of_thought": "The most recent developments would logically be within the last year, so a search from a year before today to the present will capture the latest in AI.",
        "start": "2023-05-12",
        "end": "2024-05-12"
    }
}


In [15]:
import openai
import instructor

class Endpoints(BaseModel):
    id: int = Field(..., description="A unique identifier for the question")
    query: str = Field(..., description="The question decomposited as much as possible")
    endpoints: List[str] = Field(..., description="The graphql endpoint that is required to fetch data from the ai to answer this specific question")
    arguments: List[str] = Field(..., description="The parameters that are required to send in the query request to the endpoint")
    subquestions: List[int] = Field(
        default_factory=list,
        description="The other endpoints it relies on to get the data from to answer the question. Check each of the arguments of the endpoint to check if it is available if not add a subquestion to get the data from the other point that it can be retrived from",
    )


class QueryPlan(BaseModel):
    root_question: str = Field(..., description="The root question that the user asked")
    plan: List[Endpoints] = Field(
        ..., description="""The plan to answer the root question by querying different endpoints available in 
        the graphql api.
        Make sure every information is present and to answer the question and decompose the question properly         
        into each of it's respective endpoints.
        """
    )

client = instructor.from_openai(OpenAI(api_key=openai_key))
retrival = client.chat.completions.create_partial(
    model="gpt-4-turbo",
    response_model=QueryPlan,
    messages=[
        {
            "role": "system",
            "content": '''You are a query understanding system capable of decomposing a question into subparamaters required to answer the question.
            
            The arguments which are present in different parts of the graphql endpoint i am about to use are as follows: 
           These are the endpoints that are available in graphql
          type Query {
  searchVesselsByIdentifier(identifier: String!, limit: PositiveInt = 20): [SimplifiedVessel]

  """Query - returns information on a single vessel behaviour"""
  vessel(id: ObjectId!): VesselIntelligence

  """Query - returns information on a single vessel by its IMO"""
  vesselByIMO(imo: String!): VesselIntelligence

  """Query - returns information on a multiple vessels by their IMO"""
  vesselsByIMOs(imos: [String!]!): [VesselIntelligence]
  vesselsByMMSI(mmsi: String!): [VesselIntelligence!]!
  portExpectedArrivals(input: PortExpectedArrivalsInput!): PortExpectedArrivalsConnection!
  vesselsInPort(input: VesselsInPortInput!): VesselsInPortConnection!
  departedFromPortVessels(input: DepartedFromPortVesselsInput!): DepartedFromPortVesselsConnection!
  vesselPropertyChanges(input: VesselPropertyChangesInput!): VesselPropertyChangesConnection!

  """
  The API requires selection of a user defined area and selected time range 
  and returns a list of vessels that transmitted in that selected location and selected time
  """
  vesselsInArea(input: VesselsInAreaInput!): VesselsInAreaConnection!
  vesselsCurrentlyInArea(input: VesselsCurrentlyInAreaInput!): VesselsCurrentlyInAreaConnection!
  riskyVesselsInArea(input: RiskyVesselsInAreaInput!): RiskyVesselsInAreaConnection!
  getActivitiesByDatesAndPolygon(type: ActivityTypes!, timeRange: DateTimeRange!, polygonId: ObjectId!): [Activity]
  activitiesInPolygon(input: ActivitiesInPolygonInput!): ActivitiesInPolygonConnection!
  vesselTimeline(input: VesselTimelineInput!): VesselTimelineConnection!
  advancedVesselsSearch(input: AdvancedVesselSearchInput!): SearchResultsOutput
  areas(filter: AreaFilterInput!): [FeatureObject]
  searchCompaniesByTerm(searchTerm: String!): [Company!]!
  projectVOIs: [VOI]
  voiAuditLogs(input: VOIAuditLogsInput!): VOIAuditLogsConnection!
  complianceServiceReport(imos: [String], timeRange: DateTimeRange, programs: [ComplianceProgram], risks: [Int]): ComplianceServiceReport
  complianceRiskBy(input: ComplianceRiskByInput!): ComplianceRiskByConnection!
  complianceVesselBuildingBlocksBy(input: ComplianceVesselBuildingBlocksByInput!): ComplianceVesselBuildingBlocksByConnection!

  """Query - retrieve a vessel’s profile by IMO"""
  readOnlyVesselProfileLink(vesselId: ObjectId, imo: String, startDate: DateTime, endDate: DateTime, sections: [VesselProfileSectionName!]): ReadOnlyVesselProfileLinkResponse

  """Query - retrieve a link to static company profile"""
  readOnlyCompanyProfileLink(companyId: ObjectId!, startDate: DateTime, endDate: DateTime): ReadOnlyCompanyProfileLinkResponse!

  """
  This service requires area coordinates as an input and generates a link with a snapshot of the area and displays the vessels in it
  """
  readOnlyGeoPresenceLink(
    """
    Sample
    
    {
      "type": "Polygon"
      "coordinates": [
        [
          [2.176516056060791, 41.38813490002815],
          [2.1565604209899902, 41.35480230882416],
          [2.1931135654449463, 41.34268079363133],
          [2.222381830215454, 41.388883479485],
          [2.176516056060791, 41.38813490002815]
        ]
      ]
    }
    """
    area: GeoJSONPolygonGeometryScalar!

    """
    Optional text input to appear above the input area snapshot, the default value is “GEOGRAPHICAL PRESENCE REPORT”
    """
    caption: String
  ): ReadOnlyGeoPresenceLinkResponse

  """Query - returns all GeoJSON map elements by shipment."""
  shipmentGeoJSON(id: ObjectId!): GeoJSONFeatureCollectionScalar

  """Query - returns specific shipments by their ID"""
  trackedShipmentsByIds(ids: [ObjectId!]!, readFromPrimary: Boolean): [TrackedShipment]!

  """Query - returns tracked shipments"""
  trackedShipments(
    skip: NonNegativeInt = 0
    offset: NonNegativeInt! = 0

    """Maximum value is 1000 """
    limit: PositiveInt! = 100
    sort: [SortInput!]
    orderBy: [OrderByInput!]
    filterBy: [FilterByInput!]
    groupBy: GroupByInput
    searchTerm: String
    searchTerms: [String]
    useNewExceptions: Boolean
  ): TrackedShipments!

  """Query - returns all supported carriers"""
  carriers(aliases: [SCAC!]): [Carrier!]!
  businessDataMapping(includeDefaultKeys: Boolean): JSONObject
  sharedShipmentLink(trackedShipmentId: ObjectId!): String
  sharedShipmentsLinks(trackedShipmentIds: [ObjectId!]!): [ShareableLink]
  searchVesselsAPI(input: VesselSearchAPIInput!): VesselSearchAPIConnection!
  publicAPIQuota: [PublicAPIQuota!]
}
            ''',

        },
        {
            "role": "user",
            "content": "what are the ships surrounding india and what of them has imo=9987654",
        },
    ],
)

# print(retrival.model_dump_json(indent=4))
for plan in retrival:
    print(plan)

root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question=None plan=None
root_question='what are the ships surrounding india and what of them has imo=9987654' plan=None
root_question='what are the ships surrounding india and what of them has imo=9987654' plan=None
root_question='what are the ships surrounding india and what of them has imo=9987654' plan=[PartialEndpoints(id=None, query=None, endpoints=None, arguments=None, subquest

In [7]:
import openai
import instructor

class Endpoints(BaseModel):
    id: int = Field(..., description="A unique identifier for the question")
    query: str = Field(..., description="The question decomposited as much as possible")
    endpoints: List[str] = Field(..., description="The graphql endpoint that is required to fetch data from the ai to answer this specific question")
    arguments: List[str] = Field(..., description="The parameters that are required to send in the query request to the endpoint")
    subquestions: List[int] = Field(
        default_factory=list,
        description="The other endpoints it relies on to get the data from to answer the question. Check each of the arguments of the endpoint to check if it is available if not add a subquestion to get the data from the other point that it can be retrived from",
    )


class QueryPlan(BaseModel):
    root_question: str = Field(..., description="The root question that the user asked")
    plan: List[Endpoints] = Field(
        ..., description="""The plan to answer the root question by querying different endpoints available in 
        the graphql api.
        Make sure every information is present and to answer the question and decompose the question properly         
        into each of it's respective endpoints.
        """
    )

client = instructor.patch(OpenAI(api_key=openai_key))
retrival = client.chat.completions.create(
    model="gpt-4-turbo",
    response_model=QueryPlan,
    messages=[
        {
            "role": "system",
            "content": '''You are a query understanding system capable of decomposing a question into subparamaters required to answer the question.
            
            The arguments which are present in different parts of the graphql endpoint i am about to use are as follows: 
           These are the endpoints that are available in graphql
          type Query {
  searchVesselsByIdentifier(identifier: String!, limit: PositiveInt = 20): [SimplifiedVessel]

  """Query - returns information on a single vessel behaviour"""
  vessel(id: ObjectId!): VesselIntelligence

  """Query - returns information on a single vessel by its IMO"""
  vesselByIMO(imo: String!): VesselIntelligence

  """Query - returns information on a multiple vessels by their IMO"""
  vesselsByIMOs(imos: [String!]!): [VesselIntelligence]
  vesselsByMMSI(mmsi: String!): [VesselIntelligence!]!
  portExpectedArrivals(input: PortExpectedArrivalsInput!): PortExpectedArrivalsConnection!
  vesselsInPort(input: VesselsInPortInput!): VesselsInPortConnection!
  departedFromPortVessels(input: DepartedFromPortVesselsInput!): DepartedFromPortVesselsConnection!
  vesselPropertyChanges(input: VesselPropertyChangesInput!): VesselPropertyChangesConnection!

  """
  The API requires selection of a user defined area and selected time range 
  and returns a list of vessels that transmitted in that selected location and selected time
  """
  vesselsInArea(input: VesselsInAreaInput!): VesselsInAreaConnection!
  vesselsCurrentlyInArea(input: VesselsCurrentlyInAreaInput!): VesselsCurrentlyInAreaConnection!
  riskyVesselsInArea(input: RiskyVesselsInAreaInput!): RiskyVesselsInAreaConnection!
  getActivitiesByDatesAndPolygon(type: ActivityTypes!, timeRange: DateTimeRange!, polygonId: ObjectId!): [Activity]
  activitiesInPolygon(input: ActivitiesInPolygonInput!): ActivitiesInPolygonConnection!
  vesselTimeline(input: VesselTimelineInput!): VesselTimelineConnection!
  advancedVesselsSearch(input: AdvancedVesselSearchInput!): SearchResultsOutput
  areas(filter: AreaFilterInput!): [FeatureObject]
  searchCompaniesByTerm(searchTerm: String!): [Company!]!
  projectVOIs: [VOI]
  voiAuditLogs(input: VOIAuditLogsInput!): VOIAuditLogsConnection!
  complianceServiceReport(imos: [String], timeRange: DateTimeRange, programs: [ComplianceProgram], risks: [Int]): ComplianceServiceReport
  complianceRiskBy(input: ComplianceRiskByInput!): ComplianceRiskByConnection!
  complianceVesselBuildingBlocksBy(input: ComplianceVesselBuildingBlocksByInput!): ComplianceVesselBuildingBlocksByConnection!

  """Query - retrieve a vessel's profile by IMO"""
  readOnlyVesselProfileLink(vesselId: ObjectId, imo: String, startDate: DateTime, endDate: DateTime, sections: [VesselProfileSectionName!]): ReadOnlyVesselProfileLinkResponse

  """Query - retrieve a link to static company profile"""
  readOnlyCompanyProfileLink(companyId: ObjectId!, startDate: DateTime, endDate: DateTime): ReadOnlyCompanyProfileLinkResponse!

  """
  This service requires area coordinates as an input and generates a link with a snapshot of the area and displays the vessels in it
  """
  readOnlyGeoPresenceLink(
    """
    Sample
    
    {
      "type": "Polygon"
      "coordinates": [
        [
          [2.176516056060791, 41.38813490002815],
          [2.1565604209899902, 41.35480230882416],
          [2.1931135654449463, 41.34268079363133],
          [2.222381830215454, 41.388883479485],
          [2.176516056060791, 41.38813490002815]
        ]
      ]
    }
    """
    area: GeoJSONPolygonGeometryScalar!

    """
    Optional text input to appear above the input area snapshot, the default value is “GEOGRAPHICAL PRESENCE REPORT”
    """
    caption: String
  ): ReadOnlyGeoPresenceLinkResponse

  """Query - returns all GeoJSON map elements by shipment."""
  shipmentGeoJSON(id: ObjectId!): GeoJSONFeatureCollectionScalar

  """Query - returns specific shipments by their ID"""
  trackedShipmentsByIds(ids: [ObjectId!]!, readFromPrimary: Boolean): [TrackedShipment]!

  """Query - returns tracked shipments"""
  trackedShipments(
    skip: NonNegativeInt = 0
    offset: NonNegativeInt! = 0

    """Maximum value is 1000 """
    limit: PositiveInt! = 100
    sort: [SortInput!]
    orderBy: [OrderByInput!]
    filterBy: [FilterByInput!]
    groupBy: GroupByInput
    searchTerm: String
    searchTerms: [String]
    useNewExceptions: Boolean
  ): TrackedShipments!

  """Query - returns all supported carriers"""
  carriers(aliases: [SCAC!]): [Carrier!]!
  businessDataMapping(includeDefaultKeys: Boolean): JSONObject
  sharedShipmentLink(trackedShipmentId: ObjectId!): String
  sharedShipmentsLinks(trackedShipmentIds: [ObjectId!]!): [ShareableLink]
  searchVesselsAPI(input: VesselSearchAPIInput!): VesselSearchAPIConnection!
  publicAPIQuota: [PublicAPIQuota!]
}
            ''',

        },
        {
            "role": "user",
            "content": "what are the ships in the waters of indian ocean",
        },
    ],
)

print(retrival.model_dump_json(indent=4))


{
    "root_question": "what are the ships in the waters of indian ocean",
    "plan": [
        {
            "id": 1,
            "query": "Get details of the vessels in the Indian Ocean",
            "endpoints": [
                "vesselsInArea"
            ],
            "arguments": [
                "input"
            ],
            "subquestions": [
                2
            ]
        },
        {
            "id": 2,
            "query": "Define the area coordinates for Indian Ocean",
            "endpoints": [
                "areas"
            ],
            "arguments": [
                "filter"
            ],
            "subquestions": []
        }
    ]
}
