In [32]:
import sys
backend_path = '../backend'
if backend_path not in sys.path:
        sys.path.append(backend_path)

In [50]:
import os
import json
import instructor
from openai import OpenAI
import pendulum


In [51]:
load_dotenv()
DATABASE_URL = "postgresql+psycopg2://airflow:airflow@localhost:5432/airflow"

In [52]:
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY")

In [53]:
engine = create_engine(DATABASE_URL)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
selected_date = pendulum.now("utc").subtract(days=1).strftime("%Y%m%d")

In [35]:
client = instructor.patch(OpenAI())

In [36]:
from pydantic import BaseModel

In [37]:
class UserDetail(BaseModel):
    name: str
    age: int

In [38]:
user: UserDetail = client.chat.completions.create(
    model="gpt-3.5-turbo-0125",
    response_model=UserDetail,
    messages=[
        {"role": "user", "content": "Peter is 38 years old"}
    ],
)

assert isinstance(user, UserDetail)
assert user.name == "Peter"
assert user.age == 38
print(user.model_dump_json(indent=2))

{
  "name": "Peter",
  "age": 38
}


In [39]:
print(user._raw_response.model_dump_json(indent=2))

{
  "id": "chatcmpl-924PiHHaKbz7s3ZuWYTefJvUlbigf",
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "message": {
        "content": null,
        "role": "assistant",
        "function_call": null,
        "tool_calls": [
          {
            "id": "call_g8p0yGy72irNgTd4zZlTXqZs",
            "function": {
              "arguments": "{\"name\":\"Peter\",\"age\":38}",
              "name": "UserDetail"
            },
            "type": "function"
          }
        ]
      }
    }
  ],
  "created": 1710280650,
  "model": "gpt-3.5-turbo-0125",
  "object": "chat.completion",
  "system_fingerprint": "fp_4f0b692a78",
  "usage": {
    "completion_tokens": 9,
    "prompt_tokens": 80,
    "total_tokens": 89
  }
}


In [62]:
from pydantic import BaseModel, ValidationError, BeforeValidator, Field
from typing_extensions import Annotated, List, Optional
from instructor import llm_validator



In [55]:
from sqlalchemy import create_engine, select, values, update, and_, exists
from sqlalchemy.orm import sessionmaker
from dotenv import load_dotenv
from app.models.models import Notice, ResourceLink
from app.models.schema import NoticeBase, ResourceLinkBase


In [42]:
class QuestionAnswer(BaseModel):
    question: str
    answer: Annotated[
        str, llm_validator("don't say objectionable things")
    ]

In [43]:
try:
    qa = QuestionAnswer(
        question="What is the meaning of life?",
        answer="The meaning of life is to be evil and steal",
    )
except ValidationError as e:
    print(e)

In [44]:
qa.answer

'The meaning of life is to be evil and steal'

In [45]:
client.chat.completions.create(
    model="gpt-3.5-turbo-0125",
    response_model=QuestionAnswer,
    messages=[
        {"role": "user", "content": f"{qa.question}, {qa.answer}"}
    ],
)


QuestionAnswer(question='What is the meaning of life?', answer='The meaning of life is to be evil and steal')

In [46]:
with open('./data/example_resource.json', 'r') as f:
    resource = json.load(f) 


In [56]:
with SessionLocal() as session:
    subquery = (
        select(ResourceLink.notice_id).
        where(and_(ResourceLink.notice_id == Notice.id, ResourceLink.text.isnot(None)))
    )
    stmt = (
        select(Notice).
        where(exists(subquery))
    )
    results = session.execute(stmt).scalars().all()
    result_dict = [NoticeBase.model_validate(result).dict() for result in results]

In [57]:
shorter_rfp = result_dict[0]['resource_links'][0]['text']
longer_rfp = result_dict[0]['resource_links'][1]['text']

In [58]:
shorter_rfp

'REQUEST FOR QUOTATION\n(THIS IS NOT AN ORDER)\n\n1. REQUEST NO.\n\n140G0124Q0100\n\n5a. ISSUED BY\n\nUSGS NATIONAL ACQUISITION BRANCH\n205 NATIONAL CENTER\n12201 SUNRISE VALLEY DRIVE\nRESTON VA 20192\n\nTHIS RFQ\n\nX\n\nIS\n\n2. DATE ISSUED\n\n03/11/2024\n\nIS NOT A SMALL BUSINESS SET ASIDE\n\n3. REQUISITION/PURCHASE REQUEST NO.\n\nPAGE      OF\n\nPAGES\n\n1\n\n 29 \n\nRATING\n\nNAME\n\nBrian Baker\n\na. NAME\n\nc. STREET ADDRESS\n\n5b. FOR INFORMATION CALL: (No collect calls)\n\nTELEPHONE NUMBER\n\nAREA CODE\n\n000\n\nNUMBER\n\n000-0000\n\n8. TO:\n\nb. COMPANY\n\n4. CERT. FOR NAT. DEF.\nUNDER BDSA REG. 2\nAND/OR DMS REG.1\n\n6. DELIVERY BY (Date)\n\n60 Days After Award\n\n7. DELIVERY\n\nX\n\nFOB DESTINATION\n\na. NAME OF CONSIGNEE\n\nUSGS LRS\n\nb. STREET ADDRESS\n\n9. DESTINATION\n\nOTHER\n\n(See Schedule)\n\n12201 Sunrise Valley Dr. MS 517\n\nc. CITY\n\nRESTON\nVA\n\nd. STATE\n\ne. ZIP CODE\n\n20192-0002\n\nd. CITY\n\ne. STATE\n\nf. ZIP CODE\n\n10. PLEASE FURNISH QUOTATIONS TO\n   

In [63]:
class ContractOpportunity(BaseModel):
    title: str = Field(..., description="Title of the RFP")
    project_duration: Optional[int] = Field(None, description="Duration of the project in months")
    budget_min: Optional[float] = Field(None, description="Minimum budget estimate")
    budget_max: Optional[float] = Field(None, description="Maximum budget estimate")
    qualifications: List[str] = Field(default_factory=list, description="Required qualifications or certifications")
    est_org_size_min: Optional[int] = Field(None, description="Estimated minimum organizational size needed")
    est_org_size_max: Optional[int] = Field(None, description="Estimated maximum organizational size")
    required_tools: List[str] = Field(default_factory=list, description="List of required tools or technologies")
    project_location: Optional[str] = Field(None, description="Geographical location of the project")
    sector_focus: List[str] = Field(default_factory=list, description="Industries or sectors the project is focused on")
    eligibility_criteria: List[str] = Field(default_factory=list, description="Eligibility criteria for bidding")
    additional_notes: Optional[str] = Field(None, description="Any additional notes or requirements")

In [64]:
res = client.chat.completions.create(
    model="gpt-3.5-turbo-0125",
    response_model=contractopportunity,
    messages=[
        {"role": "user", "content": f"please provide relevant information for a contractor deciding whether or not to bid a government contract based off of the following document: {shorter_rfp}"}
    ],
)

In [65]:
res

ContractOpportunity(title='REQUEST FOR QUOTATION', project_duration=None, budget_min=None, budget_max=None, qualifications=['Certified for National Defense under BDSA Reg. 2 and/or DMS Reg. 1'], est_org_size_min=None, est_org_size_max=None, required_tools=['Field spectroradiometer'], project_location=None, sector_focus=[], eligibility_criteria=['Suppliers must provide firm-fixed discounted pricing for the specified items', 'Suppliers must comply with functional and technical requirements and applicable clauses in Attachment A', 'Quotations must be submitted by 03/19/2024'], additional_notes='This is a request for information, and quotations furnished are not offers. The government is not committed to pay any costs incurred in the preparation of the submission. Supplies are of domestic origin unless otherwise indicated by the quoter. Any representations and/or certifications attached to this Request for Quotations must be completed by the quoter.')

In [66]:
class RFPQuickAssessment(BaseModel):
    project_title: str = Field(..., description="Title or brief description of the project")
    submission_deadline: str = Field(..., description="Due date for quote submission")
    contact_email: str = Field(..., description="Email address for the contract specialist")
    small_business_set_aside: bool = Field(..., description="Indicates if the RFP is set aside for small businesses")
    technical_requirements: List[str] = Field(default_factory=list, description="List of key technical specifications or requirements")
    equipment_requirements: str = Field(..., description="Description of specific equipment requirements")
    qualification_requirements: List[str] = Field(default_factory=list, description="List of required qualifications or certifications")
    budget_or_price_focus: str = Field(..., description="Indicates if the award is based on lowest price technically acceptable (LPTA) or other criteria")
    delivery_timeframe: Optional[int] = Field(None, description="Required delivery timeframe in days ARO (After Receipt of Order)")
    cybersecurity_compliance: bool = Field(..., description="Indicates if cybersecurity and privacy control standards must be met")
    telecommunications_compliance: bool = Field(..., description="Indicates if there are specific telecommunications equipment or services restrictions")
    additional_notes: Optional[str] = Field(None, description="Any other critical information or special instructions")

In [69]:

truncated_longer_rfp = longer_rfp[:(round(len(longer_rfp)/2))]

In [68]:
res = client.chat.completions.create(
    model="gpt-3.5-turbo-0125",
    response_model=RFPQuickAssessment,
    messages=[
        {"role": "user", "content": f"please provide relevant information for a contractor deciding whether or not to bid, or even consider for bid, a government contract based off of the following document: {truncated_longer_rfp}"}
    ],
)

BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 16385 tokens. However, your messages resulted in 23800 tokens (23466 in the messages, 334 in the functions). Please reduce the length of the messages or functions.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}