In [1]:
from openai import AsyncOpenAI
import asyncio
import json
from chatgpt_models.assistants.punctuation import Comma
from chatgpt_models.assistants.punctuation import Semicolon
from chatgpt_models.assistants.punctuation import Colon
from chatgpt_models.assistants.punctuation import DashHyphen
from chatgpt_models.assistants.punctuation import QuotationMark
from chatgpt_models.assistants.punctuation import Apostrophe
from chatgpt_models.assistants.punctuation import Parentheses
from chatgpt_models.assistants.punctuation import Ellipsis
from chatgpt_models.assistants.punctuation import ExclamationMark
from chatgpt_models.assistants.punctuation import QuestionMark
from text_handlers.parsers import CompositeTextCleaner
from text_handlers.parsers import WhitespaceNormalizer
from text_handlers.parsers import NonUTF8Remover
from text_handlers.parsers import ChapterLoader
from text_handlers.parsers import ParagraphClassifier
from text_handlers.parsers import AnnotationRemover
from text_handlers.parsers import SentenceSplitter
from text_handlers.parsers import ParagraphReconstructor
from text_handlers.parsers import ParagraphCombiner
import os
from datetime import datetime
from loguru import logger
# nltk.download('punkt')
# nltk.download('punkt_tab')

In [None]:
""" AGENT DEFINITIONS
### 1. **Sentence Structure and Clarity**
   - **Subject-Verb Agreement**: Ensure that the subject and verb agree in number (singular/plural) consistently throughout the manuscript.
   - **Parallelism**: Maintain parallel structure in sentences and lists to ensure clarity and consistency.
   - **Active vs. Passive Voice**: Active voice is generally preferred for clarity and directness, though passive voice may be used when the focus is on the action rather than the actor.

### 2. **Verb Tense Consistency**
   - **Present Tense**: Use for general statements, ongoing actions, and when discussing findings or conclusions within the manuscript.
   - **Past Tense**: Use for describing completed research, experiments, and historical events.
   - **Future Tense**: Use when discussing work that is yet to be undertaken or potential outcomes.

### 3. **Punctuation Rules**
   - **Commas**:
     - **Serial Comma**: Taylor & Francis style generally uses the serial (Oxford) comma before the final item in a list (e.g., apples, oranges, and bananas).
     - **Introductory Elements**: Use a comma after introductory phrases, clauses, or words that come before the main clause.
     - **Nonrestrictive Clauses**: Use commas to set off nonrestrictive clauses that add non-essential information to a sentence.
     - **Restrictive Clauses**: Do not use commas for restrictive clauses that are essential to the sentence’s meaning.
     - **Conjunctions**: Use a comma before coordinating conjunctions (and, but, or, nor, for, so, yet) when they join two independent clauses.
     - **Parenthetical Elements**: Set off non-essential elements in a sentence with commas.

   - **Semicolons**:
     - **Independent Clauses**: Use semicolons to link closely related independent clauses that are not joined by a conjunction.
     - **Complex Lists**: Use semicolons to separate items in a list when the items themselves contain commas for clarity.

   - **Colons**:
     - **Introducing Lists or Explanations**: Use a colon after a complete sentence to introduce a list, quote, or explanation.
     - **Time**: Use colons to separate hours and minutes in time expressions (e.g., 10:30 a.m.).
     - **Subtitles**: Use colons to separate a title from its subtitle (e.g., *Title: Subtitle*).

   - **Dashes and Hyphens**:
     - **Hyphen**: Use hyphens to form compound adjectives and to avoid ambiguity (e.g., well-known, re-cover vs. recover).
     - **En Dash**: Use en dashes for number ranges (e.g., 2010–2020) and in compound adjectives involving open compounds (e.g., New York–London flight).
     - **Em Dash**: Use em dashes sparingly to set off parenthetical statements or for emphasis, without spaces on either side.

   - **Quotation Marks**:
     - **Double Quotation Marks**: Use double quotation marks for direct quotes and titles of shorter works (e.g., articles, poems).
     - **Single Quotation Marks**: Use single quotation marks for quotes within quotes.
     - **Punctuation Inside Quotation Marks**: Periods and commas are placed inside quotation marks. Colons and semicolons are placed outside. Question marks and exclamation points are placed inside if part of the quoted material and outside if they apply to the entire sentence.

   - **Apostrophes**:
     - **Possessives**: Form the possessive of singular nouns with 's (e.g., the author’s study), and for plural nouns ending in s with just an apostrophe (e.g., the authors’ findings).
     - **Contractions**: Use apostrophes in contractions to indicate omitted letters (e.g., don’t, it’s).
     - **Plural Forms**: Avoid using apostrophes for plural forms of abbreviations or numbers (e.g., 1990s, MPs).

   - **Parentheses**:
     - **Non-Essential Information**: Use parentheses to enclose supplementary or non-essential information.
     - **Punctuation Placement**: Punctuation marks are placed outside the parentheses unless the entire sentence is within the parentheses.

   - **Ellipses**:
     - **Omission of Words**: Use ellipses to indicate omissions within a quotation. Three dots should be used for omissions within a sentence, and four dots for omissions between sentences.
     - **End of Sentence**: If an ellipsis appears at the end of a sentence, add a period before the ellipsis (making four dots in total).

   - **Exclamation Points**:
     - **Use Sparingly**: Exclamation points should be used sparingly and only for strong emphasis.

   - **Question Marks**:
     - **Direct Questions**: Place a question mark at the end of a direct question.
     - **Indirect Questions**: Do not use a question mark for indirect questions (e.g., She wondered why the experiment failed).

### 4. **Conjunctions and Connectives**
   - **Coordinating Conjunctions**: Ensure proper use of commas before coordinating conjunctions when joining two independent clauses.
   - **Subordinating Conjunctions**: Use subordinating conjunctions (although, because, since) with a comma when the subordinate clause precedes the main clause.

### 5. **Modifiers**
   - **Misplaced Modifiers**: Ensure modifiers are placed next to the word or phrase they modify to avoid ambiguity.
   - **Dangling Modifiers**: Avoid dangling modifiers by ensuring the introductory phrase clearly relates to the main clause subject.

### 6. **Pronoun-Antecedent Agreement**
   - **Agreement**: Pronouns must agree with their antecedents in number and gender.
   - **Clarity**: Ensure the antecedent is clear to avoid ambiguity in pronoun usage.

### 7. **Common Usage and Style Preferences**
   - **That vs. Which**: Use "that" for restrictive clauses and "which" for nonrestrictive clauses, with "which" clauses set off by commas.
   - **Who vs. Whom**: Use "who" as the subject of a verb and "whom" as the object of a verb or preposition.
   - **Further vs. Farther**: Use "farther" for physical distances and "further" for figurative or abstract distances.
   - **Since vs. Because**: Use "since" when referring to time and "because" when indicating causality.
   - **While vs. Although**: Use "while" to indicate time or simultaneous events and "although" for contrasts.

### 8. **Capitalization**
   - **Proper Nouns**: Capitalize proper nouns, including names of people, organizations, and specific locations.
   - **Titles**: Capitalize major words in titles (nouns, pronouns, verbs, adjectives, adverbs), and lowercase articles, conjunctions, and prepositions unless they are the first or last word in the title.
   - **Headings**: Ensure consistent capitalization in headings and subheadings.

### 9. **Italics**
   - **Emphasis**: Use italics sparingly for emphasis.
   - **Foreign Words**: Italicize foreign words and phrases that are not commonly used in English.
   - **Titles**: Italicize the titles of books, journals, and other standalone works.

### 10. **Lists**
   - **Punctuation in Lists**: Use commas to separate items in a simple list. Use semicolons if the list items contain internal punctuation.
   - **Parallel Structure**: Ensure all list items follow the same grammatical structure.

### 11. **Direct Address**
   - **Comma Usage**: Use commas to set off names or terms of endearment used in direct address (e.g., “Yes, Dr. Smith, the experiment is complete”).

### 12. **Miscellaneous**
   - **Split Infinitives**: Avoid split infinitives unless necessary for clarity.
   - **Ending Sentences with Prepositions**: Avoid ending sentences with prepositions unless it sounds awkward to do otherwise.
   - **Double Negatives**: Avoid using double negatives to prevent confusion.

### 13. **Formatting and Layout**
   - **Spacing**: Use single spacing after periods and other punctuation marks.
   - **Paragraph Indentation**: Indent the first line of each paragraph consistently.
   - **Alignment**: Use left-aligned text, with a ragged right edge, unless otherwise specified.

### Implementation in the GPT Agent
This codified list of rules will be used to direct the Grammar and Punctuation Agent when editing manuscripts according to Taylor & Francis guidelines. The agent will be trained to recognize and apply these specific rules consistently, ensuring that the text aligns with the style and standards set by Taylor & Francis.
"""

In [4]:
secret = ''
client = AsyncOpenAI(
    api_key=secret,
)

In [None]:

def clean_text(text: str) -> str:
    cleaner = CompositeTextCleaner([
        WhitespaceNormalizer(),
        NonUTF8Remover()
    ])
    return cleaner.clean(text)

# def extract_response(gpt_obj):
#     return json.loads(gpt_obj.data[-1].content[0].text.value)

In [5]:
chapter12_content               = ChapterLoader('raja').load('stuffthings.txt')
uncertain_blocks, paragraphs    = ParagraphClassifier().classify(chapter12_content)
paragraphs                      = paragraphs[1:]

In [None]:
paragraphs

In [None]:
"""
### 1. **Sentence Structure and Clarity**
   - **Subject-Verb Agreement**: Ensure that the subject and verb agree in number (singular/plural) consistently throughout the manuscript.
   - **Parallelism**: Maintain parallel structure in sentences and lists to ensure clarity and consistency.
   - **Active vs. Passive Voice**: Active voice is generally preferred for clarity and directness, though passive voice may be used when the focus is on the action rather than the actor.
"""

rule1 = 'Check Subject-Verb Agreement; Ensure that the subject and verb agree in number (singular/plural).'
rule2 = 'Check parallelism; Maintain parallel structure in sentences and lists to ensure clarity and consistency.'
rule3 = 'Check Active vs Passive voice; Makes sure the appropriate voice is used. Active voice is generally preferred for clarity and directness, though passive voice may be used when the focus is on the action rather than the actor.'

edited_paragraphs = await process_paragraphs(paragraphs, 'sentence_structure_and_clarity_agent', rule1)

In [None]:
"""
### 2. **Verb Tense Consistency**
   - **Present Tense**: Use for general statements, ongoing actions, and when discussing findings or conclusions within the manuscript.
   - **Past Tense**: Use for describing completed research, experiments, and historical events.
   - **Future Tense**: Use when discussing work that is yet to be undertaken or potential outcomes.
"""

rule1 = 'Check Verb Tense Consistency; Present Tense is used for general statements, ongoing actions, and when discussing findings or conclusions within the manuscript. Past tense is used for describing completed research, experiments, and historical events. And future tense is used when discussing work that is yet to be undertaken or potential outcomes.'
edited_paragraphs = await process_paragraphs(paragraphs, 'sentence_structure_and_clarity_agent', rule1)


In [None]:
"""
### 3. **Punctuation Rules**
   - **Commas**:
     - **Serial Comma**: Taylor & Francis style generally uses the serial (Oxford) comma before the final item in a list (e.g., apples, oranges, and bananas).
     - **Introductory Elements**: Use a comma after introductory phrases, clauses, or words that come before the main clause.
     - **Nonrestrictive Clauses**: Use commas to set off nonrestrictive clauses that add non-essential information to a sentence.
     - **Restrictive Clauses**: Do not use commas for restrictive clauses that are essential to the sentence’s meaning.
     - **Conjunctions**: Use a comma before coordinating conjunctions (and, but, or, nor, for, so, yet) when they join two independent clauses.
     - **Parenthetical Elements**: Set off non-essential elements in a sentence with commas.

   - **Semicolons**:
     - **Independent Clauses**: Use semicolons to link closely related independent clauses that are not joined by a conjunction.
     - **Complex Lists**: Use semicolons to separate items in a list when the items themselves contain commas for clarity.

   - **Colons**:
     - **Introducing Lists or Explanations**: Use a colon after a complete sentence to introduce a list, quote, or explanation.
     - **Time**: Use colons to separate hours and minutes in time expressions (e.g., 10:30 a.m.).
     - **Subtitles**: Use colons to separate a title from its subtitle (e.g., *Title: Subtitle*).

   - **Dashes and Hyphens**:
     - **Hyphen**: Use hyphens to form compound adjectives and to avoid ambiguity (e.g., well-known, re-cover vs. recover).
     - **En Dash**: Use en dashes for number ranges (e.g., 2010–2020) and in compound adjectives involving open compounds (e.g., New York–London flight).
     - **Em Dash**: Use em dashes sparingly to set off parenthetical statements or for emphasis, without spaces on either side.

   - **Quotation Marks**:
     - **Double Quotation Marks**: Use double quotation marks for direct quotes and titles of shorter works (e.g., articles, poems).
     - **Single Quotation Marks**: Use single quotation marks for quotes within quotes.
     - **Punctuation Inside Quotation Marks**: Periods and commas are placed inside quotation marks. Colons and semicolons are placed outside. Question marks and exclamation points are placed inside if part of the quoted material and outside if they apply to the entire sentence.

   - **Apostrophes**:
     - **Possessives**: Form the possessive of singular nouns with 's (e.g., the author’s study), and for plural nouns ending in s with just an apostrophe (e.g., the authors’ findings).
     - **Contractions**: Use apostrophes in contractions to indicate omitted letters (e.g., don’t, it’s).
     - **Plural Forms**: Avoid using apostrophes for plural forms of abbreviations or numbers (e.g., 1990s, MPs).

   - **Parentheses**:
     - **Non-Essential Information**: Use parentheses to enclose supplementary or non-essential information.
     - **Punctuation Placement**: Punctuation marks are placed outside the parentheses unless the entire sentence is within the parentheses.

   - **Ellipses**:
     - **Omission of Words**: Use ellipses to indicate omissions within a quotation. Three dots should be used for omissions within a sentence, and four dots for omissions between sentences.
     - **End of Sentence**: If an ellipsis appears at the end of a sentence, add a period before the ellipsis (making four dots in total).

   - **Exclamation Points**:
     - **Use Sparingly**: Exclamation points should be used sparingly and only for strong emphasis.

   - **Question Marks**:
     - **Direct Questions**: Place a question mark at the end of a direct question.
     - **Indirect Questions**: Do not use a question mark for indirect questions (e.g., She wondered why the experiment failed).
"""

edited_paragraphs = await process_paragraphs(paragraphs, 'punctuation_rules_comma_agent')

In [None]:
agent_network = [
    Comma(client=client),
    Semicolon(client=client),
    Colon(client=client),
    DashHyphen(client=client),
    QuotationMark(client=client),
    Apostrophe(client=client),
    Parentheses(client=client),
    Ellipsis(client=client),
    ExclamationMark(client=client),
    QuestionMark(client=client)
]

for agent in agent_network:
    logger.debug(f'Processing with {agent.agent_ident}')
    try:
        edited_paragraphs = await agent.process_paragraphs(paragraphs)
    except Exception as e:
        logger.error(f'Error processing with {agent.agent_ident}: {e}... ending agent network operation and stashing results so far')
        break
    else:
        full = []
        for p in edited_paragraphs:
            sentences = [agent.extract_response(s) for s in p]
            para = ParagraphReconstructor().reconstruct(sentences)
            full.append(para)
        paragraphs = ParagraphCombiner().combine(full)

current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

filename = f"paragraphs_{current_time}.txt"

with open(filename, "w") as file:
    file.write(paragraphs)

print(f"Paragraphs have been saved to {filename}")


In [None]:
agent = Semicolon(client=client)
prompt = await agent.create_prompt()
print(agent.agent_ident)
print(prompt)

In [None]:
agent = Colon(client=client)
prompt = await agent.create_prompt()
print(agent.agent_ident)
print(prompt)

In [None]:
agent = DashHyphen(client=client)
prompt = await agent.create_prompt()
print(agent.agent_ident)
print(prompt)

In [None]:
agent = QuotationMark(client=client)
prompt = await agent.create_prompt()
print(agent.agent_ident)
print(prompt)

In [None]:
agent = Apostrophe(client=client)
prompt = await agent.create_prompt()
print(agent.agent_ident)
print(prompt)

In [None]:
agent = Parentheses(client=client)
prompt = await agent.create_prompt()
print(agent.agent_ident)
print(prompt)

In [None]:
agent = Ellipsis(client=client)
prompt = await agent.create_prompt()
print(agent.agent_ident)
print(prompt)

In [None]:
agent = ExclamationMark(client=client)
prompt = await agent.create_prompt()
print(agent.agent_ident)
print(prompt)

In [None]:
agent = QuestionMark(client=client)
prompt = await agent.create_prompt()
print(agent.agent_ident)
print(prompt)

In [None]:
full = []
for p in edited_paragraphs:
    sentences = [extract_response(s) for s in p]
    para = ParagraphReconstructor().reconstruct(sentences)
    full.append(para)

content = ParagraphCombiner().combine(full)
print(content)

In [None]:
content = """T"""

In [None]:
# Load up assistant

# Fetch examples from GPT for each rule and append to the rule on the assistant

# Read content from plaintext file

# Parse plaintext file into paragraphs

# Parse paragraphs into sentences

# Build json structure for the Assistant. Use class to uniformize the structure

# Run assistant
