In [1]:
import pathlib
import textwrap
import os
import google.generativeai as genai
from IPython.display import display, Markdown, HTML
from dotenv import load_dotenv, find_dotenv
from pathlib import Path
import json
import re
import time
import tiktoken

# Load environment variables
load_dotenv(find_dotenv())
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')

# Configure the API client with the API key
genai.configure(api_key=GOOGLE_API_KEY)

model = genai.GenerativeModel('gemini-1.5-pro')


In [2]:
def to_markdown(text):
    text = text.replace('•', '  *')
    return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

def upload_rubric(file_path):
    with open(file_path, 'r') as file:
        content = file.read()
    return content


# Documentation for Grading Functions


## Function: format_prompt

### Description
The `format_prompt` function constructs a prompt string that includes the grading rubric and the student's essay. This prompt is used to validate the rubric and grade the essay if the rubric is appropriate.

### Parameters
- **rubric_content** (`str`): The content of the grading rubric in string format.
- **essay_text** (`str`): The text of the essay to be graded.

### Returns
- **prompt** (`str`): A formatted string that includes the grading rubric and the student's essay, with specific instructions for validation and grading.



## Function: analyze_essay_with_rubric

### Description
The `analyze_essay_with_rubric` function takes a rubric content, an essay text, and a model to generate a grading feedback for the essay based on the rubric. The function constructs a prompt using the rubric content and essay text, and then uses the provided AI model to generate the grading feedback. If the API request fails due to resource exhaustion, the function retries the request up to a specified number of times, with exponential backoff between attempts.

### Parameters
- **model** (`GenerativeModel`): The AI model used to generate the content.
- **rubric_content** (`str`): The content of the grading rubric in string format.
- **essay_text** (`str`): The text of the essay to be graded.
- **retries** (`int`, optional): The number of times to retry the API request in case of resource exhaustion. Default is 3.

### Returns
- **response.text** (`str`): The grading feedback generated by the model. If the API request fails after the specified number of retries, the function returns `None`.


In [3]:
def format_prompt(rubric_content, essay_text):
    return f"""
    Validate the following grading rubric for formatting and appropriateness:

    Grading Rubric:
    {rubric_content}

    Is the rubric formatted correctly and appropriate for grading the essay given there's chance that the rubric might be for a different subject? (yes or no) Please explain why for either case. 
    If no, notify the user that the rubric is not correponding to the student essay; do not grade the essay or come up with a correct rubric. 
    If yes, grade the essay with the rubric.

    Student Response:
    {essay_text}

    Please provide the criterion from the rubric, a score, and an explaination.
    
    Please do not say criterion and the criterion number next to the actual criterion. Always provide an explanation.
    
    Please make the final score a weighted average of all the scores.
    """

def analyze_essay_with_rubric(model, rubric_content, essay_text, retries=3):
    prompt = format_prompt(rubric_content, essay_text)
    for attempt in range(retries):
        try:
            response = model.generate_content(prompt)
            return response.text
        except google.api_core.exceptions.ResourceExhausted as e:
            print(f"Resource exhausted: {e}. Retrying in {2 ** attempt} seconds...")
            time.sleep(2 ** attempt)
    return None


# Functions for Essay Grading and Improvement Suggestions
 


## Function: format_suggestions_and_rewrite_prompt

### Description
The `format_suggestions_and_rewrite_prompt` function constructs a prompt string that includes the grading feedback and the student's essay. This prompt is used to generate suggestions for improvement and to rewrite the essay using the suggested improvements.

### Parameters
- **grading_feedback** (`str`): The feedback received from grading the essay, which includes the criterion, score, and explanation.
- **essay_text** (`str`): The text of the essay to be improved and rewritten.

### Returns
- **prompt** (`str`): A formatted string that includes the grading feedback and the student's essay, with specific instructions for generating suggestions and rewriting the essay. The instructions ask for the smallest chunk of the original text where each improvement was applied, the criterion from the rubric, and the reason for the suggested improvement, all formatted in JSON.


## Function: generate_suggestions_and_rewrite_essay

### Description
The `generate_suggestions_and_rewrite_essay` function takes grading feedback, an essay text, and a model to generate suggestions for improvement and rewrite the essay using those suggestions. The function constructs a prompt using the `format_suggestions_and_rewrite_prompt` function, which combines the grading feedback and essay text into a single prompt. It then uses the provided AI model to generate the suggestions and rewritten essay. If the API request fails due to resource exhaustion, the function retries the request up to a specified number of times, with exponential backoff between attempts.

### Parameters
- **model** (`GenerativeModel`): The AI model used to generate the content.
- **grading_feedback** (`str`): The feedback received from grading the essay, which includes the criterion, score, and explanation.
- **essay_text** (`str`): The text of the essay to be improved and rewritten.
- **retries** (`int`, optional): The number of times to retry the API request in case of resource exhaustion. Default is 3.

### Returns
- **response.text** (`str`): The suggestions and rewritten essay generated by the model. If the API request fails after the specified number of retries, the function returns `None`.

### How it Works
1. **Construct Prompt**: The function calls the `format_suggestions_and_rewrite_prompt` function, passing in the grading feedback and essay text. This function returns a formatted prompt string.
2. **Generate Content**: The function uses the AI model to generate content based on the constructed prompt. It attempts to call the model's `generate_content` method.
3. **Retry Mechanism**: If the API request fails due to resource exhaustion, the function catches the `ResourceExhausted` exception and retries the request up to the specified number of times, with exponential backoff between attempts.
4. **Return Response**: If the model successfully generates a response, the function returns the generated text. If the request fails after all retries, the function returns `None`.


In [4]:
def format_suggestions_and_rewrite_prompt(grading_feedback, essay_text):
    return f"""
    Based on the following grading feedback, 
    
    1. Provide suggestions for improvement
    2. Rewrite the essay using the suggested improvements

    Grading Feedback:
    {grading_feedback}

    Essay:
    {essay_text}
    
    Take each improvement provided and identify,
    a. the smallest chunk of the original text where it was applied
    b. the criterion from the rubric 
    c. the reason for the suggested improvement
    
    Provide steps a, b, and c in the following JSON format: 
    [
        {{"improvement": "improvement_1", "criterion_from_rubric": "criterion_from_rubric_1", "reason_for_suggestion": "reason_for_suggestion_1", "original_text": "original_text_1", "revised_text": "revised_text_1"}},
        ...
        {{"improvement": "improvement_n", "criterion_from_rubric": "criterion_from_rubric_n", "reason_for_suggestion": "reason_for_suggestion_n", "original_text": "original_text_n", "revised_text": "original_text_n"}}
    ]
    """

def generate_suggestions_and_rewrite_essay(model, grading_feedback, essay_text, retries=3):
    prompt = format_suggestions_and_rewrite_prompt(grading_feedback, essay_text)
    for attempt in range(retries):
        try:
            response = model.generate_content(prompt)
            if response and response.text:
                print("Full response from model:\n", response.text)  # Debugging statement
                return response.text
            else:
                print("Error: No response from model")
                return None
        except google.api_core.exceptions.ResourceExhausted as e:
            print(f"Resource exhausted: {e}. Retrying in {2 ** attempt} seconds...")
            time.sleep(2 ** attempt)
    return None


# Functions to Clean the JSON Output

In [5]:
def clean_json_string(json_string):
    # Remove Markdown formatting and trailing commas
    json_string = json_string.replace('```json', '').replace('```', '').strip()
    json_string = re.sub(r',\s*([}\]])', r'\1', json_string)  # Remove trailing commas before closing braces
    # Ensure all objects are closed properly
    json_string = re.sub(r'\s*}\s*{', '},{', json_string)
    # Add missing closing brackets if necessary
    if json_string.count('[') > json_string.count(']'):
        json_string += ']'
    if json_string.count('{') > json_string.count('}'):
        json_string += '}'
    return json_string

def extract_json_improvements(response):
    if response is None:
        print("Error: No response provided for JSON extraction")
        return {'improvements': [], 'response_without_json': ''}
    try:
        json_match = re.search(r'\[\s*\{.*\}\s*\]', response, re.DOTALL)
        if not json_match:
            raise ValueError("No valid JSON found in response.")
        
        cleaned_json_string = clean_json_string(json_match.group(0))
        print("Cleaned JSON string:", cleaned_json_string)  # Debugging statement

        improvements = json.loads(cleaned_json_string)
        response_without_json = response.replace(json_match.group(0), '')
        return {'improvements': improvements, 'response_without_json': response_without_json}
    except (ValueError, json.JSONDecodeError) as e:
        print("Error extracting JSON from response:", e)
        print(f"Failed to decode JSON string: {response}")  # Print the entire response for debugging
        return {'improvements': [], 'response_without_json': response}


# Functions to Estimate Token Counts and Calculate Costs

## Function: estimate_tokens

### Description
The `estimate_tokens` function calculates the number of tokens in a given text string using the encoder for the "davinci" model. This is useful for estimating the token usage for an API request.

### Parameters
- **text** (`str`): The text string for which the token count is to be estimated.

### Returns
- **tokens** (`int`): The number of tokens in the provided text string.

### How it Works
1. **Initialize Encoder**: Uses the `tiktoken.encoding_for_model` method to get the encoder for the "davinci" model.
2. **Encode Text**: Encodes the provided text string into tokens.
3. **Count Tokens**: Returns the length of the token list.

## Function: calculate_costs

### Description
The `calculate_costs` function estimates the costs associated with making grading and suggestion requests based on the number of tokens used. It calculates the total tokens per request and provides an estimated cost per request and an overall cost based on the number of users and requests.

### Parameters
- **grading_prompt_tokens** (`int`): The number of tokens in the grading prompt.
- **suggestions_prompt_tokens** (`int`): The number of tokens in the suggestions prompt.
- **grading_response_tokens** (`int`, optional): The estimated number of tokens in the grading response. Default is 100.
- **suggestions_response_tokens** (`int`, optional): The estimated number of tokens in the suggestions response. Default is 200.

### Returns
- **None**: This function prints the estimated costs and does not return any value.

### How it Works
1. **Calculate Total Tokens**: Adds the prompt tokens and response tokens to get the total tokens per request for both grading and suggestions.
2. **Print Token Counts**: Prints the number of tokens for grading prompt, suggestions prompt, and the total tokens for each request.
3. **Calculate Costs**: 
   - Uses a hypothetical cost of $0.0004 per 1,000 tokens.
   - Calculates the cost per grading request and per suggestions request.
   - Prints the estimated cost for each type of request.
4. **Estimate Overall Cost**: 
   - Assumes 100 users, each making 10 requests (5 grading and 5 suggestions).
   - Calculates the total cost for all users and prints the estimated total cost.


In [6]:
def estimate_tokens(text):
    encoder = tiktoken.encoding_for_model("davinci")
    tokens = encoder.encode(text)
    return len(tokens)

def calculate_costs(grading_prompt_tokens, suggestions_prompt_tokens, grading_response_tokens=100, suggestions_response_tokens=200):
    # Total tokens per request
    total_grading_tokens = grading_prompt_tokens + grading_response_tokens
    total_suggestions_tokens = suggestions_prompt_tokens + suggestions_response_tokens

    print(f"Grading prompt tokens: {grading_prompt_tokens}")
    print(f"Suggestions prompt tokens: {suggestions_prompt_tokens}")
    print(f"Total tokens for grading request: {total_grading_tokens}")
    print(f"Total tokens for suggestions request: {total_suggestions_tokens}")

    # Example cost calculation
    # Assuming $0.0004 per 1K tokens (for a hypothetical pricing model)
    cost_per_1k_tokens = 0.0004
    grading_cost = (total_grading_tokens / 1000) * cost_per_1k_tokens
    suggestions_cost = (total_suggestions_tokens / 1000) * cost_per_1k_tokens

    print(f"Estimated cost per grading request: ${grading_cost:.4f}")
    print(f"Estimated cost per suggestions request: ${suggestions_cost:.4f}")

    # Example overall cost calculation
    # Assuming 100 users with 10 requests each (5 grading + 5 suggestions)
    num_users = 100
    requests_per_user = 10
    grading_requests_per_user = 5
    suggestions_requests_per_user = 5

    total_cost = num_users * (
        grading_requests_per_user * grading_cost +
        suggestions_requests_per_user * suggestions_cost
    )

    print(f"Estimated total cost for {num_users} users with {requests_per_user} requests each: ${total_cost:.2f}")


In [7]:
def display_improvements(improvements, original_text):
    if not improvements:
        print("No improvements to display.")
        return

    css = """
    <style>
    .improvement-tooltip {
        position: relative;
        display: inline-block;
        cursor: pointer;
        background-color: yellow;
        margin-bottom: 10px;
        padding: 5px;
        border: 1px solid black;
        border-radius: 4px;
    }
    .improvement-tooltip .tooltiptext {
        visibility: hidden;
        width: 300px;
        background-color: white;
        color: black;
        text-align: left;
        border: 1px solid #ddd;
        padding: 10px;
        border-radius: 6px;
        box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
        position: absolute;
        z-index: 1;
        top: 100%;
        left: 50%;
        margin-left: -150px;
    }
    .improvement-tooltip:hover .tooltiptext {
        visibility: visible;
    }
    </style>
    """
    
    html_content = original_text
    
    for improvement in improvements:
        original = improvement.get('original_text', '')
        revised = improvement.get('revised_text', '')
        improvement_text = improvement['improvement']
        reason_for_suggestion = improvement['reason_for_suggestion']
        criterion = improvement['criterion_from_rubric']
        
        tooltip_html = f"""
        <div class="improvement-tooltip">
            {revised}
            <span class="tooltiptext">
                <strong>Criterion:</strong> {criterion}<br>
                <strong>Improvement:</strong> {improvement_text}<br>
                <strong>Reason:</strong> {reason_for_suggestion}
            </span>
        </div>
        """
        
        if original:
            html_content = html_content.replace(original, tooltip_html)
        else:
            html_content = html_content + tooltip_html
    
    display(HTML(css + html_content))


In [8]:
# Load the rubric
rubric_file_path = '/Users/samuelgartenstein/Desktop/grading_assistant/external_data/output_and_suggestion_functionality/nys_common_core_derived/writing_social_sciences_11_12-Copy1.md'
rubric_content = upload_rubric(rubric_file_path)
display(to_markdown(rubric_content))


> 
> # New York State Common Core Writing Standards Rubric for Social Sciences (Grades 11-12)
> 
> | Criterion                  | 1 | 2 | 3 | 4 | 5 |
> |----------------------------|---|---|---|---|---|
> | **1. Argument Development** | Introduces claims with little or no clarity; minimal distinction between claims and counterclaims; lacks logical organization | Introduces claims with some clarity; some distinction between claims and counterclaims; basic organization but lacks depth | Introduces clear claims; distinguishes claims from counterclaims; logical sequence but may have minor gaps | Introduces precise and knowledgeable claims; clearly distinguishes claims from counterclaims; logically sequences with minor issues | Introduces precise, knowledgeable claims; clearly distinguishes and logically sequences claims, counterclaims, reasons, and evidence |
> | **2. Evidence and Analysis** | Provides minimal or irrelevant evidence; weak development of claims and counterclaims; lacks consideration of audience | Provides some relevant evidence; basic development of claims and counterclaims; limited audience consideration | Provides relevant evidence and analysis; adequately develops claims and counterclaims; considers audience to some extent | Provides strong evidence and analysis; thoroughly develops claims and counterclaims; anticipates audience’s knowledge and biases | Provides the most relevant evidence and analysis; thoroughly develops claims and counterclaims with strong audience consideration |
> | **3. Cohesion and Clarity** | Minimal use of transitions and varied syntax; weak cohesion and unclear relationships between ideas | Uses some transitions and varied syntax; some cohesion but relationships between ideas may be unclear | Uses appropriate transitions and varied syntax; clear relationships between most ideas | Uses varied syntax and transitions effectively; clear and cohesive relationships between ideas | Uses varied syntax and transitions skillfully; creates strong cohesion and clear relationships between all ideas |
> | **4. Style and Tone** | Inconsistent or inappropriate style and tone; minimal adherence to discipline norms | Somewhat consistent style and tone; some adherence to discipline norms | Consistent style and tone; adheres to discipline norms but may have minor lapses | Maintains formal style and objective tone; adheres to discipline norms consistently | Establishes and maintains formal style and objective tone; fully adheres to discipline norms |
> | **5. Conclusion** | Provides a weak or irrelevant conclusion; does not support the argument | Provides a basic conclusion; somewhat supports the argument | Provides a clear conclusion; supports the argument but may lack depth | Provides a strong conclusion; effectively supports the argument | Provides a compelling conclusion; thoroughly supports the argument and adds insight |
> 


In [9]:
# The essay text
essay_text =  """
The American Revolution, which occurred from 1775 to 1783, was a pivotal event in history that not only led to the birth of a new nation but also had profound and lasting impacts on modern American society. This essay explores the significance of the American Revolution, examining its influence on contemporary political systems, social structures, and cultural values in the United States.

Firstly, the American Revolution laid the foundation for the modern American political system. The revolutionaries' fight for independence was rooted in the desire for self-governance and the rejection of tyrannical rule. This struggle culminated in the drafting of the Declaration of Independence in 1776, which articulated the fundamental principles of liberty, equality, and democracy. These principles became the cornerstone of the United States Constitution, adopted in 1787, which established a federal republic with a system of checks and balances designed to prevent the concentration of power. Today, these democratic ideals continue to shape American politics, ensuring that power is derived from the consent of the governed and that individual rights are protected.

Moreover, the American Revolution had a profound impact on the social fabric of the nation. The revolution challenged existing social hierarchies and promoted the idea of equality. While the promise of equality was not immediately realized for all groups, the rhetoric of the revolution inspired subsequent movements for social justice. The abolitionist movement, which sought to end slavery, drew upon the revolutionary principles of liberty and equality. Similarly, the women's suffrage movement, which fought for women's right to vote, was influenced by the revolution's emphasis on individual rights. These movements have played a crucial role in shaping a more inclusive and equitable society, reflecting the enduring legacy of the American Revolution.

Culturally, the American Revolution fostered a sense of national identity and unity. The shared struggle for independence created a collective memory and a sense of common purpose among the American people. This sense of unity was further reinforced by the creation of national symbols, such as the flag and the national anthem, which continue to evoke patriotic sentiments. Additionally, the revolution gave rise to a distinct American culture that valued individualism, self-reliance, and innovation. These cultural values have profoundly influenced various aspects of American life, including the economy, education, and popular culture, contributing to the country's dynamic and entrepreneurial spirit.

In conclusion, the American Revolution was a transformative event that has had a lasting impact on modern American society. It established the foundational principles of American democracy, inspired social movements for equality, and fostered a unique national identity and culture. The legacy of the American Revolution continues to resonate in contemporary America, shaping its political systems, social structures, and cultural values. As Americans reflect on their history, the revolutionary ideals of liberty, equality, and democracy remain guiding principles for the nation's ongoing pursuit of a more just and equitable society.
"""


In [10]:
# Estimate tokens for the essay and rubric
essay_tokens = estimate_tokens(essay_text)
rubric_tokens = estimate_tokens(rubric_content)


In [11]:
# Validate and grade the essay
grading_feedback = analyze_essay_with_rubric(model, rubric_content, essay_text)
print("Grading Feedback:")
print(grading_feedback)
display(to_markdown(grading_feedback))


Grading Feedback:
Yes, the rubric is formatted correctly and is appropriate for grading the essay. 

**Argument Development:** The essay introduces a clear and knowledgeable claim about the significance of the American Revolution on modern American society. It logically sequences its points, examining the Revolution's impact on political systems, social structures, and cultural values. The essay does not delve deeply into counterclaims, which prevents it from achieving the highest score.
**Score: 4** 

**Evidence and Analysis:** The essay provides relevant historical details and examples to support its claims. It connects the American Revolution to the development of American democracy, social justice movements, and cultural values. However, the analysis could be further strengthened by exploring a wider range of evidence and delving deeper into the complexities of the historical context. 
**Score: 4**

**Cohesion and Clarity:** The essay demonstrates a clear and logical flow of ideas.

> Yes, the rubric is formatted correctly and is appropriate for grading the essay. 
> 
> **Argument Development:** The essay introduces a clear and knowledgeable claim about the significance of the American Revolution on modern American society. It logically sequences its points, examining the Revolution's impact on political systems, social structures, and cultural values. The essay does not delve deeply into counterclaims, which prevents it from achieving the highest score.
> **Score: 4** 
> 
> **Evidence and Analysis:** The essay provides relevant historical details and examples to support its claims. It connects the American Revolution to the development of American democracy, social justice movements, and cultural values. However, the analysis could be further strengthened by exploring a wider range of evidence and delving deeper into the complexities of the historical context. 
> **Score: 4**
> 
> **Cohesion and Clarity:** The essay demonstrates a clear and logical flow of ideas. It utilizes transitions effectively to connect paragraphs and sentences, ensuring smooth transitions between different aspects of the argument. The writing is clear and easy to follow, enhancing the overall coherence of the essay.
> **Score: 5**
> 
> **Style and Tone:**  The essay maintains a formal and objective tone appropriate for an academic essay. The language is clear, concise, and avoids overly colloquial expressions. It adheres well to the conventions of standard written English, contributing to the essay's credibility and professionalism.
> **Score: 5**
> 
> **Conclusion:** The conclusion effectively summarizes the main points of the essay, reiterating the enduring legacy of the American Revolution on modern American society. It reinforces the thesis statement and leaves the reader with a sense of the significance of the topic. However, it could be strengthened by offering a more insightful concluding thought.  
> **Score: 4**
> 
> **Final Score:** To calculate the weighted average, we sum the scores for each criterion and divide by the total number of criteria. In this case: (4 + 4 + 5 + 5 + 4) / 5 = **4.4** 
> 
> **Overall, the essay demonstrates a strong understanding of the American Revolution's impact on modern American society. It presents a well-structured argument supported by relevant evidence and analysis. The essay is well-written, maintaining a clear and engaging style.  With a slightly more in-depth exploration of counterclaims and a more insightful conclusion, the essay could achieve an even higher level of sophistication.** 


In [12]:
# Generate suggestions and rewrite the essay based on the grading feedback
suggestions_and_rewrite = generate_suggestions_and_rewrite_essay(model, grading_feedback, essay_text)
extracted_data = extract_json_improvements(suggestions_and_rewrite)
print("Extracted Data:", extracted_data)  # Debugging statement


Full response from model:
 ```json
[
  {
    "improvement": "Introduce counterclaims regarding the limited immediate impact of the Revolution on certain groups (e.g., slavery, women's rights).",
    "criterion_from_rubric": "Argument Development",
    "reason_for_suggestion": "The essay needs to address counterarguments to reach the highest level of argument development.",
    "original_text": "Moreover, the American Revolution had a profound impact on the social fabric of the nation. The revolution challenged existing social hierarchies and promoted the idea of equality.",
    "revised_text": "While the American Revolution espoused ideals of equality, its immediate impact on the social fabric of the nation was complex and uneven.  The revolution undeniably challenged existing social hierarchies and promoted the idea of equality, as evidenced by... However, this promise of equality was not immediately realized for all groups.  The institution of slavery persisted, starkly contradicting

In [13]:
# Display the suggestions and revised essay
display(Markdown(f"**Suggestions and Revised Essay:**\n\n{extracted_data['response_without_json']}"))


**Suggestions and Revised Essay:**

```json

```


In [14]:
# Display the improvements in the new essay with hover effect
display_improvements(extracted_data['improvements'], essay_text)


In [15]:
# Calculate token count for the prompt used in grading
grading_prompt = format_prompt(rubric_content, essay_text)
grading_prompt_tokens = estimate_tokens(grading_prompt)


In [16]:
# Calculate token count for the prompt used in generating suggestions and rewriting the essay
suggestions_prompt = format_suggestions_and_rewrite_prompt(grading_feedback, essay_text)
suggestions_prompt_tokens = estimate_tokens(suggestions_prompt)


In [17]:
# Calculate costs
calculate_costs(grading_prompt_tokens, suggestions_prompt_tokens)


Grading prompt tokens: 1338
Suggestions prompt tokens: 1413
Total tokens for grading request: 1438
Total tokens for suggestions request: 1613
Estimated cost per grading request: $0.0006
Estimated cost per suggestions request: $0.0006
Estimated total cost for 100 users with 10 requests each: $0.61
