In [1]:
import openai
import os

openai.api_key = os.getenv("OPENAI_API_KEY")

In [50]:
# Function to generate text
import time
import random

def generate_text(user_prompt,system_prompt,  max_retries=5, model="gpt-3.5-turbo"):
    if system_prompt:
        messages = [{"role": "system", "content": system_prompt},{"role": "user", "content": user_prompt}]
    else:
        messages = [{"role": "user", "content": user_prompt}]
    for i in range(max_retries):
        try:
            response = openai.ChatCompletion.create(
                model=model,
                messages=messages,
                temperature=0.0,
                top_p=1,
            )
            return response
        except Exception as e:
            wait_time = (2 ** i) + (random.randint(0, 1000) / 1000)
            print(f"Exception occurred: {e}, retrying in {wait_time} seconds...")
            time.sleep(wait_time)
    raise Exception(f"Failed to generate text after {max_retries} retries.")

In [52]:
# Function to generate text concurrently
import concurrent.futures
from typing import List, Tuple, Union

def concurrent_generate_text(prompts: Union[List[str], List[Tuple[str, str]]]) -> List[str]:
    responses = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        if isinstance(prompts[0], tuple):
            futures = {executor.submit(generate_text, user_prompt, system_prompt) for system_prompt, user_prompt in prompts}
        else:
            futures = {executor.submit(generate_text, user_prompt) for user_prompt in prompts}
        for future in concurrent.futures.as_completed(futures):
            try:
                response = future.result()
                responses.append(response)
            except Exception as exc:
                print(f'Generated an exception: {exc}')
    return responses

In [4]:
# Function to read file to string -> string
def read_file(file_path: str) -> str:
    """Reads file & returns contents as string"""
    with open(file_path, 'r') as f:
        return f.read()

In [59]:
# Function to write string to file -> None
def write_file(file_path: str, contents: str) -> None:
    """Writes string to file"""
    with open(file_path, 'w') as f:
        f.write(contents)

In [5]:
# Function to calculate number of tokens -> int
import tiktoken
def get_tokens(text: str, model: str = "gpt-3.5-turbo") -> int:
    """Calculates number of tokens in text"""
    encoding = tiktoken.encoding_for_model(model)
    tokens = encoding.encode(text)
    num_tokens = len(tokens)
    return num_tokens

In [6]:
# Function to calculate cost -> none
from typing import List
def calculate_cost(texts: List[str], model: "gpt-3.5-turbo") -> None:
    """Calculates cost of text"""

    models = ['gpt-3.5-turbo', 'text-embedding-ada-002', 'gpt-4', 'davinci']
    if model not in models:
        raise Exception(f"Model {model} not found. Available models: {models}")
    
    if model == 'gpt-3.5-turbo':
        dollar_per_1k_tokens = 0.002
    elif model == 'text-embedding-ada-002':
        dollar_per_1k_tokens = 0.0004
    elif model == 'gpt-4':
        dollar_per_1k_tokens = 0.03 
    elif model == 'davinci':
        dollar_per_1k_tokens = 0.02

    num_tokens = 0
    cost = 0
    for t in texts:
        num_tokens += get_tokens(t, model)
        cost += num_tokens * dollar_per_1k_tokens / 1000
    print(f"Number of tokens: {num_tokens}")
    print(f"Cost: US${cost:.4f}")
    print(f"Cost: ~AU${cost * 1.5:.4f}")
    print("Warning: This cost does not account for completion costs, ONLY prompt cost.")

In [7]:
# Function to split text into chunks -> list
import re
def split_text_into_chunks(text: str, max_tokens=2048) -> List[str]:
    """Splits text into chunks of max_tokens length."""
    encoding = tiktoken.encoding_for_model("gpt-3.5-turbo-0301")
    tokens = encoding.encode(text)
    chunks = []
    i = 1
    while len(tokens) > max_tokens:
        chunk = encoding.decode(tokens[:max_tokens])
        chunks.append(f"Part {i} of <<PARTS>>\n\n" + chunk)
        tokens = tokens[max_tokens:]
        i += 1
    chunk = encoding.decode(tokens)
    chunks.append(f"Part {i} of <<PARTS>>\n\n" + chunk)
    chunks = [re.sub(r"<<PARTS>>", str(len(chunks)), chunk) for chunk in chunks]
    return chunks

In [55]:
# Functions to create system & user prompts

USER_PROMPT_TEMPLATE = read_file("prompts/summary_user.txt")
def generate_user_summary_prompt(text: str) -> str:
    """Generates summary prompt from text"""
    return re.sub(r"<<EXCERPT>>", text, USER_PROMPT_TEMPLATE)

SYSTEM_PROMPT_TEMPLATE = read_file("prompts/summary_system.txt")
def generate_system_summary_prompt(document_name: str) -> str:
    """Generates summary prompt from text"""
    return re.sub(r"<<DOCUMENT_NAME>>", document_name, SYSTEM_PROMPT_TEMPLATE)

In [56]:
# Function to get text output from response -> str
def get_text_from_response(response) -> str:
    """Gets text from response"""
    return response['choices'][0]['message']['content']

In [44]:
texts = read_file(file_path="2023_Australian_Constitution.txt")
texts = split_text_into_chunks(texts)
print(f"Number of chunks: { len(texts) }")
calculate_cost(texts, model="gpt-3.5-turbo")

Number of chunks: 14
Number of tokens: 26842
Cost: US$0.4277
Cost: ~AU$0.6415


In [53]:
user_prompts = [generate_user_summary_prompt(text) for text in texts]
system_prompts = [generate_system_summary_prompt("the legal document below") for text in texts]
prompts = list(zip(system_prompts, user_prompts))
prompts[0]

('Extract all key details in the excerpt from the legal document below below. Provide as much detail as possible.',
 'Excerpt:\nPart 1 of 14\n\nAUSTRALIA’S CONSTITUTION\n\nWith Overview and Notes by the Australian Government Solicitor\n\nProduced by the Parliamentary Education Office and Australian Government Solicitor, Canberra\n\nTable of contents\n\nOverview The Constitution\n\niii 1\n\nContents Covering clauses Chapter I—The Parliament\n\n3\n\n6\n\n8\n\nPart I—General\n\nPart II – The Senate\n\nPart III – The House of Representatives\n\nPart IV – Both Houses of the Parliament\n\nPart V – Powers of the Parliament Chapter II—The Executive Government Chapter III—The Judicature Chapter IV—Finance and Trade Chapter V—The States Chapter VI—New States Chapter VII—Miscellaneous Chapter VIII—Alteration of the Constitution\n\n8\n\n8\n\n11\n\n13\n\n15\n\n19\n\n21\n\n24\n\n29\n\n31\n\n32\n\n33\n\nNotes\n\n35\n\nAUSTRALIA’S CONSTITUTION\n\nii\n\nOverview\n\nby the Australian Government Solicito

In [9]:
# base case
response = generate_text(texts[0])
response

<OpenAIObject chat.completion id=chatcmpl-7GzUaHiSrEORcr5lkzQWwtzKLrOk1 at 0x10d6ec530> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "Act of Parliament, it must be passed by both Houses of Parliament and receive Royal Assent (section 58). The Parliament has the power to make laws with respect to a wide range of matters, including taxation, defence, trade and commerce, and immigration (Chapter I, Part V).\n\nExecutive Government\n\nThe Constitution vests the executive power of the Commonwealth in the Queen and provides for the Governor-General to exercise that power on behalf of the Queen (section 61). The Governor-General is appointed by the Queen on the advice of the Prime Minister (section 2). The Governor-General appoints Ministers to form the Executive Government, and those Ministers are responsible to the Parliament (section 64).\n\nJudicature\n\nThe Constitution establishes a federal judicature consisting of a

In [57]:
responses = concurrent_generate_text(prompts=prompts)
print(len(responses))
responses_text = [get_text_from_response(response) for response in responses]
print( len(responses_text) )
responses_text[0]

14
14


'Part 12 of 14\nSection 51 (xxxvii): This section allows the Parliament to make laws for the people of any race, other than the aboriginal race in any State, for whom it is deemed necessary to make special laws. The excerpt also lists the Acts passed by the Parliaments of the States to refer matters to the Parliament or adopt a Commonwealth law under section 51 (xxxvii), including the short title, number, and how they are affected.'

In [64]:
def merge_responses(responses_text: List[str]) -> str:
    summary = ''
    for response in responses_text:
        summary += response
        summary += '\n\n'
    return summary

In [60]:
write_file("Constitution summary.txt", summary)

In [68]:
summary = read_file("Constitution summary.txt")

In [69]:
calculate_cost([summary], model="gpt-3.5-turbo")

Number of tokens: 7125
Cost: US$0.0143
Cost: ~AU$0.0214


In [70]:
chunked_summary = split_text_into_chunks(summary, max_tokens=3000)
print(f"Number of chunks: { len(chunked_summary) }")

Number of chunks: 3


In [71]:
user_prompts = [generate_user_summary_prompt(text) for text in chunked_summary]
system_prompts = [generate_system_summary_prompt("the legal document below") for text in chunked_summary]
prompts = list(zip(system_prompts, user_prompts))

In [72]:
responses = concurrent_generate_text(prompts=prompts)
print(len(responses))
responses_text = [get_text_from_response(response) for response in responses]
print( len(responses_text) )
summary = merge_responses(responses_text)
write_file("Constitution summary p2.txt", summary)

3
3


Selenium is a TOOL.
GPT 3.5 turbo is a tool.
Sell it as a tool.
Use it as a tool to make enhancements to the user experience

**SUGGEST NEXT TEXT SEGMENT FOR WRITING LAW ARTICLES, CASES, REPORTS, etc whatever you can find that lawers use**


In [73]:
text = read_file("Constitution summary p2.txt")
calculate_cost([text], model="gpt-3.5-turbo")

Number of tokens: 2333
Cost: US$0.0047
Cost: ~AU$0.0070


In [74]:
import re

def sort_sections(text):
    lines = text.split('\n')
    section_lines = [line for line in lines if not line.startswith('Part ')]

    # Extract section numbers from the lines
    section_dict = {}
    for line in section_lines:
        match = re.match(r'Section (\d+)', line)
        if match:
            section_number = int(match.group(1))
            if section_number in section_dict:
                section_dict[section_number].append(line)
            else:
                section_dict[section_number] = [line]

    # Sort the sections by section number and join them back into a string
    sorted_text = '\n'.join('\n'.join(section_dict[section]) for section in sorted(section_dict.keys()))
    return sorted_text

In [75]:
text = sort_sections(text)
write_file("Constitution summary p2.txt", text)

In [78]:
text = read_file("Constitution summary p2.txt")
system_prompt = ("Write a cohesive, comprehensive, detailed, summary of the document provided.")
response = generate_text(user_prompt=text, system_prompt=system_prompt)
summary = get_text_from_response(response)
write_file("Constitution summary p3.txt", summary)