In [None]:
import os
import re
import requests
from bs4 import BeautifulSoup

def download_book(url, output_file):
    response = requests.get(url)
    response.raise_for_status()
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(response.text)

def split_into_chapters(input_file):
    with open(input_file, 'r', encoding='utf-8') as file:
        content = file.read()

    start_marker = '*** START OF THIS PROJECT GUTENBERG EBOOK'
    end_marker = '*** END OF THIS PROJECT GUTENBERG EBOOK'
    start_index = content.find(start_marker)
    end_index = content.find(end_marker)
    if start_index != -1 and end_index != -1:
        content = content[start_index + len(start_marker):end_index]

    chapter_pattern = r'(CHAPTER [IVXLCDM]+[\r\n]+.+?[\r\n]+)'
    chapters = re.split(chapter_pattern, content)

    combined_chapters = []
    for i in range(1, len(chapters), 2):
        chapter_marker = chapters[i].strip()
        chapter_content = chapters[i+1].strip() if i+1 < len(chapters) else ''
        combined_chapters.append(f"{chapter_marker}\n{chapter_content}")

    return combined_chapters

def save_chapters(chapters, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for i, chapter in enumerate(chapters, start=1):
        chapter_file = os.path.join(output_dir, f'Chapter_{i}.txt')
        with open(chapter_file, 'w', encoding='utf-8') as file:
            file.write(chapter.strip())
        print(f'Saved: {chapter_file}')

def main():
    book_url = 'https://www.gutenberg.org/cache/epub/74210/pg74210.txt'
    download_path = 'downloaded_book.txt'
    output_directory = 'chapters'

    print('Downloading book...')
    download_book(book_url, download_path)
    print('Book downloaded.')

    print('Splitting book into chapters...')
    chapters = split_into_chapters(download_path)
    print(f'Total chapters found: {len(chapters)}')

    print('Saving chapters...')
    save_chapters(chapters, output_directory)
    print('All chapters saved.')

if __name__ == '__main__':
    main()


### TYPICAL EDITORIAL WORKFLOW


1. Initial Assessment (Manuscript Evaluation)

- Objective: Understand the overall structure, themes, and quality of the manuscript.
- Activities:
    - Read through the entire manuscript to get a sense of the story, argument, or content.
    - Identify major strengths and weaknesses.
    - Provide a broad overview of what works well and what needs significant changes.

2. Developmental Editing (Substantive Editing)

- Objective: Address the big-picture elements such as structure, plot, character development, and overall coherence.
- Activities:
    - Work with the author to reorganize content, if necessary.
    - Suggest additions, deletions, or modifications to improve flow and clarity.
    - Ensure the manuscript has a logical structure and that the content is well-developed.
    - Focus on pacing, plot holes, character arcs, and thematic consistency.

3. Content Editing (Line Editing)

- Objective: Refine the writing style, enhance clarity, and improve readability.
- Activities:
    - Edit for sentence structure, word choice, and tone.
    - Ensure consistency in style and voice throughout the manuscript.
    - Clarify ambiguous or confusing passages.
    - Improve the overall flow of the text.

4. Copyediting

- Objective: Correct grammar, punctuation, spelling, and syntax errors.
- Activities:
    - Check for grammatical errors and correct them.
    - Fix punctuation and spelling mistakes.
    - Ensure consistency in usage (e.g., American vs. British English).
    - Verify proper use of capitalization, hyphenation, and abbreviations.
    - Fact-checking and verifying the accuracy of information (if necessary).

5. Proofreading

- Objective: Catch any remaining errors and ensure the final text is polished.
- Activities:
    - Perform a final review to catch any overlooked grammatical, punctuation, or spelling errors.
    - Ensure formatting consistency (headings, fonts, spacing).
    - Verify that all corrections from previous editing stages have been implemented.
    - Check for consistency in layout and design elements (if applicable).

6. Final Review

- Objective: Ensure the manuscript is ready for publication or submission.
- Activities:
    - Perform a last read-through to ensure everything is in order.
    - Address any final concerns or questions from the author.
    - Prepare the manuscript for the final format (print, digital, etc.).

### Detailed Step-by-Step Process

1. Initial Assessment (Manuscript Evaluation)

- Conduct a thorough read-through of the manuscript.
- Provide a comprehensive editorial letter summarizing initial thoughts and suggestions.
- Discuss the evaluation with the author and agree on the scope of developmental editing.

2. Developmental Editing

- Break down the manuscript into sections/chapters and evaluate each part.
- Work on restructuring and reorganizing content for better flow and coherence.
- Provide detailed feedback and suggestions for improving plot, character development, and pacing.
- Collaborate with the author on revisions and ensure alignment with the overall vision.

3. Content Editing (Line Editing)

- Edit each sentence for clarity, conciseness, and readability.
- Ensure consistency in tone, style, and voice throughout the manuscript.
- Focus on improving dialogue, descriptions, and narrative flow.
- Make suggestions for enhancing the writing style and eliminating redundancies.

4. Copyediting

- Review the manuscript line-by-line for grammatical, punctuation, and spelling errors.
- Ensure adherence to a specific style guide (e.g., Chicago Manual of Style, APA).
- Verify factual information and correct inconsistencies.
- Implement standardized formatting and usage conventions.

5. Proofreading

- Conduct a meticulous review to catch any remaining errors or typos.
- Verify the consistency of formatting elements (e.g., chapter headings, page numbers).
- Ensure all previous corrections have been accurately implemented.
- Prepare a final proof for the author’s approval.

6. Final Review

- Perform a last comprehensive read-through.
- Address any remaining concerns or final touches requested by the author.
- Ensure the manuscript is formatted correctly for its intended publication medium.
- Prepare and submit the final version for publication or submission.

### PROJECT PLAN: PHASE1 - INITIAL ASSESSMENT

1. Text Summarization Models
These models can generate concise summaries of larger texts, maintaining essential information and context.

Models and Techniques:

BART (Bidirectional and Auto-Regressive Transformers): Effective for abstractive summarization, generating summaries that are concise yet informative.
T5 (Text-to-Text Transfer Transformer): Versatile model that can be fine-tuned for summarization tasks, capable of producing high-quality summaries.
PEGASUS (Pre-training with Extracted Gap-sentences for Abstractive Summarization): Pre-trained specifically for summarization tasks, PEGASUS excels in generating coherent and contextually rich summaries.
SummaRuNNer: A neural network-based model that focuses on extractive summarization, selecting the most important sentences to include in the summary.

2. Topic Modeling
These models help identify and extract key themes and topics from the text, providing a structured way to summarize content.

Models and Techniques:

LDA (Latent Dirichlet Allocation): A generative statistical model that helps identify topics within a text corpus, summarizing content by key themes.
BERTopic: Utilizes BERT embeddings for topic modeling, capturing context and generating coherent topics.
NMF (Non-Negative Matrix Factorization): Another approach for extracting topics from the text, useful for understanding the main themes.

3. Key Phrase Extraction
These models identify and extract the most important phrases and concepts from a text, providing a condensed version of the content.

Models and Techniques:

RAKE (Rapid Automatic Keyword Extraction): A straightforward and effective method for extracting key phrases from text.
YAKE (Yet Another Keyword Extractor): Focuses on extracting keywords from individual documents without requiring a corpus for training.
KeyBERT: Uses BERT embeddings to find keywords and key phrases that are contextually relevant.

4. Information Retrieval Models
These models can help identify the most relevant sections of the text, focusing the analysis on high-value content.

Models and Techniques:

BM25 (Best Matching 25): A ranking function used in search engines to estimate the relevance of documents based on query terms.
TF-IDF (Term Frequency-Inverse Document Frequency): A statistical measure to evaluate the importance of a word in a document relative to a corpus, useful for identifying key sections.

5. Hierarchical Attention Networks (HAN)
HAN models can help summarize documents by focusing on the hierarchical structure of the text (e.g., sentences within paragraphs, paragraphs within chapters).

Models and Techniques:

Hierarchical Attention Networks: These models apply attention mechanisms at different levels (words, sentences) to capture the hierarchical structure of the text, summarizing while preserving context.
Integrating Models into the Workflow
To integrate these models effectively, you can design a multi-step pipeline:

Initial Text Processing:

Use Key Phrase Extraction (e.g., KeyBERT) to identify the most important phrases and concepts.
Apply Topic Modeling (e.g., BERTopic) to understand the main themes.
Summarization:

Use Text Summarization Models (e.g., BART, PEGASUS) to generate concise summaries of individual sections or chapters.
Combine these summaries to form a comprehensive overview of the manuscript.
Contextual Refinement:

Apply Hierarchical Attention Networks (HAN) to maintain the hierarchical context while further condensing the text.
Information Retrieval:

Use models like BM25 to rank and select the most relevant sections of the text for detailed analysis by the primary editor AI.

### Example Workflow
- Chunk the Manuscript: Divide the manuscript into chapters or sections.
- Extract Key Phrases: Use KeyBERT to identify important phrases in each section.
- Topic Modeling: Apply BERTopic to understand and summarize the main themes.
- Summarize: Use BART to generate summaries of each section.
- Hierarchical Refinement: Apply HAN to combine and refine these summaries.
- Select Relevant Sections: Use BM25 to rank the most important sections for detailed analysis.
- Primary Editing: Feed the condensed, contextually rich text into the primary editor AI for in-depth analysis and feedback.

In [None]:
import os
from keybert import KeyBERT

def extract_key_phrases(text, model, num_phrases=10):
    key_phrases = model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=num_phrases)
    return [phrase for phrase, score in key_phrases]

def process_chapters(input_dir, output_file, num_phrases=10):
    model = KeyBERT('distilbert-base-nli-mean-tokens')

    chapters_key_phrases = {}

    for chapter_file in os.listdir(input_dir):
        if chapter_file.endswith('.txt'):
            chapter_path = os.path.join(input_dir, chapter_file)
            with open(chapter_path, 'r', encoding='utf-8') as file:
                text = file.read()

            key_phrases = extract_key_phrases(text, model, num_phrases)
            chapters_key_phrases[chapter_file] = key_phrases
            print(f'Extracted key phrases for {chapter_file}')

    with open(output_file, 'w', encoding='utf-8') as file:
        for chapter, phrases in chapters_key_phrases.items():
            file.write(f'{chapter}:\n')
            for phrase in phrases:
                file.write(f'  - {phrase}\n')
            file.write('\n')

    print(f'Key phrases saved to {output_file}')

def main():
    input_directory = 'chapters'
    output_file = 'key_phrases.txt'
    num_phrases = 10

    process_chapters(input_directory, output_file, num_phrases)

main()

In [None]:
import os
from keybert import KeyBERT
from flair.embeddings import TransformerDocumentEmbeddings
from flair.data import Sentence
from bertopic import BERTopic
from sklearn.feature_extraction.text import CountVectorizer
from collections import Counter
import spacy

nlp = spacy.load("en_core_web_sm")

def extract_named_entities(text):
    doc = nlp(text)
    entities = [ent.text for ent in doc.ents]
    return entities

def extract_key_phrases(text, model, num_phrases=10):
    key_phrases = model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=num_phrases)
    # Extract Named Entities
    named_entities = extract_named_entities(text)
    # Combine Key Phrases with Named Entities
    combined_phrases = set(phrase for phrase, score in key_phrases) | set(named_entities)
    return list(combined_phrases)

def extract_topics(texts):
    vectorizer = CountVectorizer(ngram_range=(1, 2), stop_words='english')
    topic_model = BERTopic(vectorizer_model=vectorizer)
    topics, _ = topic_model.fit_transform(texts)
    topic_info = topic_model.get_topic_info()
    return topic_info

def process_chapters(input_dir, output_file, num_phrases=10):
    # model = KeyBERT(TransformerDocumentEmbeddings('distilbert-base-nli-mean-tokens'))
    model = KeyBERT('all-MiniLM-L6-v2')

    chapters_key_phrases = {}
    chapters_texts = []

    for chapter_file in os.listdir(input_dir):
        if chapter_file.endswith('.txt'):
            chapter_path = os.path.join(input_dir, chapter_file)
            with open(chapter_path, 'r', encoding='utf-8') as file:
                text = file.read()

            key_phrases = extract_key_phrases(text, model, num_phrases)
            chapters_key_phrases[chapter_file] = key_phrases
            chapters_texts.append(text)
            print(f'Extracted key phrases and entities for {chapter_file}')

    topic_info = extract_topics(chapters_texts)

    with open(output_file, 'w', encoding='utf-8') as file:
        for chapter, phrases in chapters_key_phrases.items():
            file.write(f'{chapter}:\n')
            for phrase in phrases:
                file.write(f'  - {phrase}\n')
            file.write('\n')
        file.write('Topics:\n')
        file.write(str(topic_info))

    print(f'Key phrases and topics saved to {output_file}')

def main():
    input_directory = 'chapters'
    output_file = 'key_phrases_and_topics.txt'
    num_phrases = 10

    process_chapters(input_directory, output_file, num_phrases)


main()

In [None]:
import os
from transformers import pipeline
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer

nlp = spacy.load("en_core_web_sm")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def extract_named_entities(text):
    doc = nlp(text)
    entities = [ent.text for ent in doc.ents]
    return entities

def extract_key_phrases_tfidf(text, num_phrases=10):
    vectorizer = TfidfVectorizer(stop_words='english', max_features=num_phrases)
    tfidf_matrix = vectorizer.fit_transform([text])
    scores = tfidf_matrix.toarray().flatten()
    feature_names = vectorizer.get_feature_names_out()
    sorted_phrases = sorted(zip(feature_names, scores), key=lambda x: x[1], reverse=True)
    top_phrases = [phrase for phrase, score in sorted_phrases[:num_phrases]]

    return top_phrases

def summarize_with_context(text, key_phrases):
    context_summaries = {}
    for phrase in key_phrases:
        prompt = f"Summarize the context and importance of the following key phrase: {phrase}\n\n{text}"
        summary = summarizer(prompt, max_length=50, min_length=25, do_sample=False)[0]['summary_text']
        context_summaries[phrase] = summary
    return context_summaries

def load_chapter(chapter_file, input_directory):
    if chapter_file.endswith('.txt'):
        chapter_path = os.path.join(input_directory, chapter_file)
        with open(chapter_path, 'r', encoding='utf-8') as file:
            return file.read()

def clean_combined_phrases(combined_phrases):
    cleaned_phrases = []
    for phrase in combined_phrases:
        cleaned_phrase = phrase.replace('\n', ' ').strip()
        cleaned_phrases.append(cleaned_phrase)
    return cleaned_phrases

def process_chapter(chapter_content, num_phrases=10):
    tfidf_phrases           = extract_key_phrases_tfidf(chapter_content, num_phrases)
    named_entities          = extract_named_entities(chapter_content)

    combined_phrases        = list(set(tfidf_phrases) | set(named_entities))
    combined_phrases        = clean_combined_phrases(combined_phrases)

    print(f"combined phrases: {combined_phrases}")

    contextual_summaries    = summarize_with_context(chapter_content, combined_phrases)

    return contextual_summaries

def save_summary(output_dir, chapter_file, contextual_summaries):
    output_file = os.path.join(output_dir, f'{os.path.splitext(chapter_file)[0]}_key_phrases_with_context.txt')
    with open(output_file, 'w', encoding='utf-8') as file:
        for phrase, context in contextual_summaries.items():
            file.write(f'{phrase}: {context}\n')

def main():
    input_directory = 'chapters'
    output_dir      = 'chapter_outputs'
    num_phrases     = 10

    if not os.path.exists(output_dir): os.makedirs(output_dir)

    for chapter_file in os.listdir(input_directory):
        chapter_content = load_chapter(chapter_file, input_directory)
        print(f"loaded {chapter_file}...")
        summary = process_chapter(chapter_content, num_phrases)
        print(f"summarized {chapter_file}...")
        save_summary(output_dir, chapter_file, summary)
        print(f"saved summary for {chapter_file}")
        break

# main()

In [None]:
import os
from transformers import pipeline

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def load_chapter(chapter_file, input_directory):
    if chapter_file.endswith('.txt'):
        chapter_path = os.path.join(input_directory, chapter_file)
        with open(chapter_path, 'r', encoding='utf-8') as file:
            return file.read()

def summarize_chapter(chapter_content):
    if len(chapter_content) < 100:
        return "Content too short to summarize."
    
    try:
        summary = summarizer(chapter_content, max_length=200, min_length=100, do_sample=False)[0]['summary_text']
        return summary
    except Exception as e:
        print(f"Error during summarization: {e}")
        return "Error generating summary."

def save_summary(output_dir, chapter_file, summary):
    output_file = os.path.join(output_dir, f'{os.path.splitext(chapter_file)[0]}_summary.txt')
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(summary)

def main():
    input_directory = 'chapters'
    output_dir      = 'chapter_outputs'

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for chapter_file in os.listdir(input_directory):
        chapter_content = load_chapter(chapter_file, input_directory)
        print(f"Loaded {chapter_file}...")
        summary = summarize_chapter(chapter_content)
        print(f"Summarized {chapter_file}...")
        save_summary(output_dir, chapter_file, summary)
        print(f"Saved summary for {chapter_file}")
        break

main()


In [3]:
import os
import re
import nltk
nltk.download('punkt')

def clean_text(text):
    import re
    text = re.sub(r'\s+', ' ', text)
    text = text.encode('utf-8', 'ignore').decode('utf-8')
    return text

def load_chapter(chapter_file, input_directory):
    if chapter_file.endswith('.txt'):
        chapter_path = os.path.join(input_directory, chapter_file)
        with open(chapter_path, 'r', encoding='utf-8') as file:
            return file.read()

def split_and_classify_paragraphs(text):
    raw_paragraphs = re.split(r'\n\s*\n', text.strip())

    uncertain_blocks = []
    definite_paragraphs = []

    for para in raw_paragraphs:
        sub_paragraphs = para.split('\n')

        combined_para = " ".join([sub.strip() for sub in sub_paragraphs if len(sub.strip()) > 40 or sub.strip().endswith(('.', '!', '?'))])

        if combined_para:
            sentence_count = combined_para.count('.')
            if sentence_count == 0 or sentence_count == 1:
                uncertain_blocks.append(combined_para)
            else:
                definite_paragraphs.append(combined_para)

    return uncertain_blocks, definite_paragraphs

def remove_annotations(text):
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'\(.*?\)', '', text)
    text = re.sub(r'\s*,\s*', ' ', text)  # dangling commas
    text = re.sub(r'\s*\[\s*', ' ', text) # leftover open brackets
    text = re.sub(r'\s*\]\s*', ' ', text) # leftover close brackets
    text = re.sub(r'\s+', ' ', text).strip()  # extra spaces and trim
    return text

def split_into_sentences(paragraph):
    """
    Splits a paragraph into sentences.

    :param paragraph: The paragraph to split into sentences.
    :return: A list of sentences in the order they appear in the paragraph.
    """
    sentences = nltk.sent_tokenize(paragraph)
    return sentences

chapter12_content               = load_chapter('chapter_name_here.txt', 'stuff')
uncertain_blocks, paragraphs    = split_and_classify_paragraphs(chapter12_content)
cleaned_paragraph               = remove_annotations(paragraphs[1:][0])
sentences                       = split_into_sentences(cleaned_paragraph)


In [4]:

# chapter12_content = clean_text(chapter12_content)  # Clean the text

In [None]:
# Necessary imports
import torch
from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer

qa_model_name = "distilbert-base-uncased-distilled-squad"
print('0')
# qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name)
print('1')
# qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name)
print('2')
# qa_pipeline = pipeline("question-answering", model=qa_model, tokenizer=qa_tokenizer)
qa_pipeline = pipeline("question-answering")
print('3')
def extract_fiction_elements(chapter_text):
    """
    Extract core elements of a fiction chapter based on the fiction framework.
    
    Args:
    - chapter_text (str): The text of the chapter to analyze.
    
    Returns:
    - dict: A dictionary containing the extracted elements (plot, characters, setting).
    """
    

    questions = {
        "plot": "What are the main events?",
        "characters": "Who are the key characters?",
        "setting": "Where does the story take place?",
        "themes": "What are the central themes?",
        "conflict": "What is the main conflict in this chapter?",
        "resolution": "How is the conflict resolved?"
    }

    extracted_elements = {}

    i = 0
    for key, question in questions.items():
        print(f'extract_fiction_elements {i}')
        result = qa_pipeline(question=question, context=chapter_text)
        extracted_elements[key] = result['answer']
        i += 1

    return extracted_elements


chapter_text = """
The wind howled through the trees as John trudged through the snow. He knew he was running out of time.
The sun was setting, and with it, any hope of finding shelter. He could feel the weight of his journey 
bearing down on him. Suddenly, in the distance, he saw a faint light flickering. Summoning the last of 
his strength, he pushed forward, hoping it wasn't just a mirage...
"""

elements = extract_fiction_elements(chapter_text)
print(elements)


In [None]:
from openai import OpenAI
secret = ''
client = OpenAI(
    # This is the default and can be omitted
    api_key=secret,
)


In [None]:
chat_completion = client.chat.completions.create(
    messages=[
        {"role": "system", "content": "You are a content editor with a specialty for fiction literature."},
        {"role": "user", "content": prompt}
    ],
    model="gpt-3.5-turbo",
)

In [None]:
f"""
You are part of an content editing team. This team is a fleet of specially trained GPT assistants. Each member on the team is designed to perform a specific function that in aggregate, would provide a comprehensive editorial service to an author of a full manuscript.

Your sub-team focuses on implementing the codified rules of a given style guide. In this case, you are adhering to the Taylor & Francis style guide.

Here are the specifics of your role:
Title: Sentence Structure and Clarity

Input:
- you will be provided a json object with two keys: 'sentence' and 'rule'
- the value of the 'sentence' key will be the sentence you are to check
- the value of the 'rule' key will be the specific style rule you are to check the given sentence for

Expectation(s):
- check that the sentence obeys the given rule to check
- if the rule is not obeyed by the sentence, rewrite the sentence so that it does, otherwise leave it unchanged

Output:
- json object with two keys: 'sentence' and 'edited'
- the value of the 'sentence' key should be the given sentence. if it was not edited by you because it obeyed the rule, provide the same sentence back unaltered. if required edits because it did not obey the rule, provided the edited version so that it now obeys the given rule
- no other content or data should be provided back besides the designated json output
"""

In [None]:
prompt = f"""
Analyse the following chapter in order to provide a critique of the 5 pillars for a well structured story. Your focus for this pass is on "Consistency". The guiding question here is "Does this chapeter Maintain a consistent tone and pacing throughout."

Your response will be used by another system, so the structure and format of your respone is important to follow this framework:

- Structure of the response should be a JSON object.
- There should be a key in the JSON object called "summary_score" and it should give a score out of 10, judging as objectively as possible how much of the content is "Consistent" to the overall tone and pacing.
- There should be one key per section in the content that needs to be improved to help bring the work to a higher "consistency_score". The key format should be the paragraph number being referenced.
- The value for the above "paragraph key" should be a concise summary of what needs to be improved in this section to help bring up the consistency score. It should start with a single word prescription from these three options: Delete, Alter, Extend. Followed by a short and concise description of why this section should either be deleted, altered, or extended in order to improve overall consistency.
- No additional content should be included in your response outside of the JSON object described in the instructions above

Chapter content below:

{chapter_content}
"""

In [None]:
ch12_summary = """
### Summary of Chapter 12: The New Economy and a Vision for India's Future Development

**Core Thematic Elements:**
1. **Vision for India’s Future Development**: The chapter introduces a vision for India's development, grounded in Social Market Economy principles, aiming to address challenges brought by digital technology and new economic developments. It emphasizes the complexity of implementing such a model in India's diverse and multifaceted socioeconomic landscape.

2. **Global Technological Developments**: The chapter highlights the significant impact of recent technological advancements (e.g., AI, IoT, renewable energy) on socioeconomic changes. It discusses the opportunities and challenges posed by these technologies, including the need for an integrated approach to their adoption.

3. **Strategic Futures**: A call for a transformation in institutions and mindsets is made to address the evolving challenges of the digital economy. The need for new development strategies, particularly in education, workforce training, and policy adaptation, is emphasized.

4. **Comparative Perspectives**: The chapter compares India's development trajectory with that of other regions (Asia, Europe, the Americas), emphasizing the limitations of replicating models from other countries. It notes the importance of learning from global experiences while adapting them to India's unique context.

5. **A Vision for India's Future Development**: The chapter outlines a vision for India's development that balances technological advancement with social equity, emphasizing the need for a cohesive and inclusive approach involving government, private sector, and civil society.

6. **Lessons from Global Experiences**: It draws lessons from the social market economies of Germany, Nordic countries, and East Asia, suggesting that while these models offer valuable insights, they cannot be directly applied to India due to different historical and socioeconomic contexts.

**General Style and Tone:**
- The writing is analytical and forward-looking, combining a detailed examination of historical development models with a speculative discussion on the future.
- The tone is scholarly, with a focus on policy implications and strategic recommendations.
- The chapter balances optimism about technological progress with caution regarding its potential downsides.

**Key Points:**
- The implementation of a Social Market Economy in India faces challenges due to its diversity and political complexity.
- Recent technological advancements have a transformative impact on socioeconomic development, requiring an integrated and multidimensional approach.
- India’s development trajectory should consider global lessons but must be tailored to its unique challenges and opportunities.
- A cohesive and inclusive development model, involving all societal stakeholders, is essential for India’s future.
- Learning from the experiences of other regions is crucial, but India must adapt these lessons to its specific context.

This summary provides the essential components required to understand the chapter's main arguments, thematic elements, and the overall approach, while also maintaining a focus on the broader implications for India's future development.
"""

ch12_paragraph1 = """[Note: [Words???], [/consolance], (Mention cultural and other diversity, traits of selfish focus on individual and  family and limited willingness to sacrifice for the common good, barring many exceptions making it hard to generalise– cultural impediment – make social market economy paradigm unrealistic expect for certain pockets or type of development.)]This chapter presents a new economy a vision for India's future development vision for India grounded in Social Market Economy principles and challenges relating to digital technology and other new economic developments. The vision points to an ambitious, aspirational goal , though realising the complexity and diversity, political economy, and other issues in India's socioeconomic fabric makes it hard to build consensus on a new social market economy "model" in line  with the requirement of a new economy and even more to implement such a development model/paradigm."""

In [None]:
assistant_setup = "You are part of a team of specially tuned AI Agents whose function is to edit content/literature. Your team specializes in academic literature review. Your specific role on this team is as follows:"
assistant1 = f"""
{assistant_setup} summarize whole sections of a work and reduce it to its core components. The reason you perform this function is because you are prepping the content/section for the next member in your team of AI editors, reducing the content length to only the critical components required for understanding what a section is about. Your focus should be highlighting the core thematic elements, the general style and tone of writing, and the key points being made in the chapter/section."""
assistant2 = f"""
{assistant_setup} you take a summary of a section that is prepared by a member of your team and you analyze a given paragraph from this section to check its "Consistency". The guiding questions here are "Does this paragraph maintain a consistent tone and pacing throughout? Does it fit in to the chapter given our understanding of the chapter's contents and messaging"

Your response will be used by another system, so the structure and format of your response is important to follow this framework:

- Structure of the response should be a JSON object.
- There should be a key in the JSON object called "score" and it should give a score out of 10, judging as objectively as possible how much of the content is "Consistent" to the overall tone, pacing, and message.
- There should be a key in the JSON object called "comments". The value for the "comments" key should be a concise summary of what needs to be improved in this section to help bring up the consistency "score". It should start with a single word prescription from these three options: Delete, Alter, Extend. Followed by a short and concise description of why this section should either be deleted, altered, or extended in order to improve overall consistency.
- No additional content should be included in your response outside of the JSON object described in the instructions above
"""

In [50]:
def craft_assistant2_prompt(summary, paragraph):
    prompt = f"""
    Summary of content below:
    {summary}

    Paragraph to be analyzed below:
    {paragraph}
    """
    return prompt

In [None]:
import re
import nltk
nltk.download('punkt')

In [None]:
print(paragraphs[1:][0])

In [51]:
results = {}
for i, paragraph in enumerate(paragraphs):
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": assistant2},
            {"role": "user", "content": craft_assistant2_prompt(ch12_summary, paragraph)}
        ],
        model="gpt-3.5-turbo",
    )
    results[f'paragraph_{i+1}'] = chat_completion.choices[0].message.content

In [None]:
results

In [None]:
pass1 = {
    "consistency_score": 6,
    "1": "Alter. The introduction is clear but slightly verbose, which affects the pacing. Simplifying the language and cutting unnecessary phrases can help maintain a more consistent tone.",
    "3": "Alter. The paragraph starts strong but introduces several ideas that deviate slightly from the established tone, creating a sense of rushed pacing. Simplifying the language and focusing on one or two main points can improve consistency.",
    "4": "Extend. The transition between discussing global ICT changes and India's specific context is abrupt. A few additional sentences to bridge these ideas would improve the flow.",
    "5": "Extend. The section on India's recent achievements and vision could be expanded slightly to provide smoother transitions and maintain the pacing established in earlier paragraphs.",
    "6": "Alter. The paragraph introduces too many new concepts without adequate connection to earlier content. Revising the structure to focus on fewer points would enhance consistency.",
    "7": "Delete. This paragraph repeats information already mentioned earlier in the chapter. Removing it will streamline the flow and reinforce the chapter's consistency.",
    "8": "Extend. The mention of India’s socioeconomic challenges lacks depth, creating a disjointed feel. Expanding on this idea would help maintain the chapter's pacing.",
    "9": "Alter. The discussion on creative and disruptive technologies starts well but then shifts abruptly to a broader analysis. A more gradual transition or a split into two paragraphs might maintain the pacing better.",
    "10": "Delete. The paragraph revisits themes already discussed, which disrupts the flow. Removing it will keep the chapter focused and consistent.",
    "11": "Alter. The paragraph shifts to a more technical discussion, which can be jarring. Rewriting to align with the chapter's general tone will help maintain consistency.",
    "12": "Extend. The mention of demographic changes is too brief and feels disconnected. Expanding on this idea and linking it to earlier content would help with consistency.",
    "13": "Extend. The brief mention of India's economic standing lacks detail and disrupts the flow. Expanding on this idea or linking it more clearly to the previous content would enhance consistency.",
    "14": "Alter. The tone becomes overly formal, which contrasts with the previous sections. Simplifying the language will help maintain a consistent tone throughout.",
    "15": "Delete. The paragraph introduces redundant information already covered earlier, which affects pacing. Removing it will improve consistency.",
    "16": "Alter. The discussion of international comparisons introduces a new analytical tone that doesn’t align with earlier content. Revising the language to match the rest of the chapter would help.",
    "17": "Alter. This paragraph introduces complex concepts that shift the tone to a more academic and theoretical style. Simplifying the language or breaking it into two parts could help maintain a consistent tone.",
    "18": "Extend. The mention of ICT's impact on different sectors is brief and feels disconnected. Adding more detail would enhance the flow and consistency.",
    "19": "Alter. The paragraph introduces complex concepts that disrupt the pacing. Simplifying the language or breaking it into two parts could help maintain a consistent tone.",
    "20": "Delete. The paragraph is redundant with earlier content. Removing it will help streamline the chapter and maintain consistency.",
    "21": "Alter. The tone shifts to a more theoretical discussion, which contrasts with earlier content. Rewriting to align with the chapter’s overall tone will improve consistency.",
    "22": "Delete. The analysis of India's potential future development scenarios becomes repetitive and diverges from the earlier tone. Removing redundant statements will keep the chapter focused and consistent.",
    "23": "Alter. The conclusion introduces new ideas abruptly, which affects the flow. Revising the structure to summarize rather than introduce new concepts would enhance consistency."
}