In [1]:
%load_ext dotenv
%dotenv

In [2]:
import os
from pydantic import BaseModel, Field
from openai import OpenAI
from typing import Optional, List

client = OpenAI()

In [3]:
class Location(BaseModel):
    """
    Represents a physical location including address, city, state, and country.
    """
    address: str = Field(..., description="The street address of the location.")
    city: str = Field(..., description="The city of the location.")
    state: str = Field(..., description="The state or region of the location.")
    country: str = Field(..., description="The country of the location.")

class Organization(BaseModel):
    """
    Represents an organization, including its name and location.
    """
    name: str = Field(..., description="The name of the organization.")
    location: Location = Field(..., description="The primary location of the organization.")

class Contract(BaseModel):
    """
    Represents the key details of the licensing and web hosting agreement.
    """
    effective_date: str = Field(..., description="The date when the contract becomes effective. Use yyyy-MM-dd format.")
    client: Organization = Field(..., description="The client organization entering the contract.")
    service_provider: Organization = Field(..., description="The service provider organization (TrueLink) entering the contract.")
    grant_scope: str = Field(..., description="Description of the rights granted under the license, including scope and limitations.")
    term: str = Field(..., description="The term of the agreement, including any provisions for renewal or termination.")
    hosting_services: List[str] = Field(..., description="List of hosting services provided under the agreement.")
    support_services_duration: Optional[int] = Field(None, description="Duration in months that the service provider commits to providing support services.")
    compensation_details: str = Field(..., description="Details of the payment terms and compensation structure for hosting and support services.")
    intellectual_property_rights: str = Field(..., description="Provisions regarding the ownership and rights to the intellectual property of both parties.")
    confidentiality_obligations: str = Field(..., description="Details regarding the confidentiality obligations between the client and service provider.")
    dispute_resolution: str = Field(..., description="Provisions for how disputes will be resolved, including jurisdiction and venue.")
    termination_conditions: str = Field(..., description="Conditions under which the contract may be terminated by either party.")
    indemnification_provisions: str = Field(..., description="Details of indemnification obligations for both parties.")


In [4]:
system_message = """
You are an expert in extracting structured information from legal documents and contracts.
Identify key details such as parties involved, dates, terms, obligations, and legal definitions.
Present the extracted information in a clear, structured format. Be concise, focusing on essential
legal content and ignoring unnecessary boilerplate language."""

In [5]:
def extract(document, model="gpt-4o-2024-08-06", temperature=0):
    response = client.beta.chat.completions.parse(
        model=model,
        temperature=temperature,
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": document},
        ],
        response_format=Contract,
    )
    return response.choices[0].message.content

In [6]:
# Read the file
with open('../data/license_agreement.txt', 'r') as file:
    contents = file.read()

In [8]:
data = extract(contents)
print(data)

{"effective_date":"1999-02-26","client":{"name":"Mortgage Logic.com, Inc.","location":{"address":"Two Venture Plaza, 2 Venture","city":"Irvine","state":"California","country":"USA"}},"service_provider":{"name":"TrueLink, Inc.","location":{"address":"3026 South Higuera","city":"San Luis Obispo","state":"California","country":"USA"}},"grant_scope":"TrueLink grants a non-exclusive license to Client to use the Interface for origination, underwriting, processing, and funding of consumer finance receivables. Client is not obligated to use the License and may use other systems as long as they do not infringe on TrueLink's Intellectual Property.","term":"The agreement is effective for 1 year from the Effective Date and renews automatically for successive one-year periods unless terminated as per Section 12.","hosting_services":["Storage of Web Site and data files","Reasonable response times for Web Site access","Bandwidth provision","24/7 Web Site availability","Access to Interface usage stati

In [10]:
import json
data = json.loads(data)

In [11]:
data

{'effective_date': '1999-02-26',
 'client': {'name': 'Mortgage Logic.com, Inc.',
  'location': {'address': 'Two Venture Plaza, 2 Venture',
   'city': 'Irvine',
   'state': 'California',
   'country': 'USA'}},
 'service_provider': {'name': 'TrueLink, Inc.',
  'location': {'address': '3026 South Higuera',
   'city': 'San Luis Obispo',
   'state': 'California',
   'country': 'USA'}},
 'grant_scope': "TrueLink grants a non-exclusive license to Client to use the Interface for origination, underwriting, processing, and funding of consumer finance receivables. Client is not obligated to use the License and may use other systems as long as they do not infringe on TrueLink's Intellectual Property.",
 'term': 'The agreement is effective for 1 year from the Effective Date and renews automatically for successive one-year periods unless terminated as per Section 12.',
 'hosting_services': ['Storage of Web Site and data files',
  'Reasonable response times for Web Site access',
  'Bandwidth provisio