In [1]:
import textstat
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.probability import FreqDist
from nltk.corpus import stopwords
import language_tool_python
import statistics
import logging

# Setup basic logging (you can configure it as needed)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Download necessary NLTK data
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')

def analyze_essay(essay):
    """
    Analyze various aspects of an essay for linguistic sophistication.

    :param essay: A string containing the essay text.
    :return: A dictionary with various metrics and an overall quality score.
    """
    
    tool = language_tool_python.LanguageToolPublicAPI('en-US')

    # Readability and complexity metrics
    flesch_reading_ease = textstat.flesch_reading_ease(essay)
    smog_index = textstat.smog_index(essay)
    flesch_kincaid_grade = textstat.flesch_kincaid_grade(essay)

    # Tokenize essay into words and sentences
    words = word_tokenize(essay)
    sentences = sent_tokenize(essay)
    unique_words = len(set(words))

    # Lexical diversity
    lexical_diversity = len(set(words)) / len(words) if words else 0

    # Sentence length variability
    sentence_lengths = [len(word_tokenize(sentence)) for sentence in sentences]
    sentence_length_variability = statistics.stdev(sentence_lengths) if len(sentence_lengths) > 1 else 0

    # Grammatical mistakes
    matches = tool.check(essay)
    grammar_errors = len(matches) 

    return   {
        "flesch_reading_ease": flesch_reading_ease,
        "smog_index": smog_index,
        "flesch_kincaid_grade": flesch_kincaid_grade,
        "unique_words": unique_words,
        "lexical_diversity": lexical_diversity,
        "grammar_errors": grammar_errors,
    }
    pass


[nltk_data] Downloading package punkt to /Users/nils/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/nils/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to /Users/nils/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
def apply_metric (df):
    df['essay_ESL'] = df['essay_ESL'].astype(str)
    df['essay_AAE'] = df['essay_AAE'].astype(str)
    df['essay_base'] = df['essay_base'].astype(str)

    if 'metrics_SAE' not in df.columns:
        df['metrics_SAE'] = [{} for _ in range(len(df))]

    # Loop through each essay
    for index, row in df.iterrows():
        # Check if the analysis has not been done for this row
        if not row['metrics_SAE']:
            try:
                # Analyze the essay
                essay_metrics = analyze_essay(row['essay_base'])
                # Save the analysis result to the 'metrics_ESL' column
                df.at[index, 'metrics_SAE'] = essay_metrics
            except Exception as e:
                # Log or print the error and continue
                print(f"Error processing essay at index {index}: {e}")


    if 'metrics_AAE' not in df.columns:
        df['metrics_AAE'] = [{} for _ in range(len(df))]

    # Loop through each essay
    for index, row in df.iterrows():
        # Check if the analysis has not been done for this row
        if not row['metrics_AAE']:
            try:
                # Analyze the essay
                essay_metrics = analyze_essay(row['essay_AAE'])
                # Save the analysis result to the 'metrics_ESL' column
                df.at[index, 'metrics_AAE'] = essay_metrics
            except Exception as e:
                # Log or print the error and continue
                print(f"Error processing essay at index {index}: {e}")


    if 'metrics_ESL' not in df.columns:
        df['metrics_ESL'] = [{} for _ in range(len(df))]

    # Loop through each essay
    for index, row in df.iterrows():
        # Check if the analysis has not been done for this row
        if not row['metrics_ESL']:
            try:
                # Analyze the essay
                essay_metrics = analyze_essay(row['essay_ESL'])
                # Save the analysis result to the 'metrics_ESL' column
                df.at[index, 'metrics_ESL'] = essay_metrics
            except Exception as e:
                # Log or print the error and continue
                print(f"Error processing essay at index {index}: {e}")



In [4]:
import pandas as pd 
gpt = pd.read_csv('all_essays_gpt.csv', sep=';')
gemini = pd.read_csv('all_essays_gemini.csv', sep=';')
llama70b = pd.read_csv('all_essays_llama.csv', sep=';')
llama7b = pd.read_csv('all_essays_llama7B.csv', sep=';')
llama13b = pd.read_csv('all_essays_llama13B.csv', sep=';')
vicuna7b = pd.read_csv('all_essays_vicuna7B.csv', sep=';')
vicuna13b = pd.read_csv('all_essays_vicuna13B.csv', sep=';')
alpaca = pd.read_csv('all_essays_alpaca.csv', sep=';')

In [28]:
#apply_metric(gpt) 
apply_metric(gemini)
#apply_metric(llama70b) 
#apply_metric(llama7b) 
#apply_metric(llama13b)
#apply_metric(vicuna7b)


Error processing essay at index 152: <!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js ie6 oldie" lang="en-US"> <![endif]-->
<!--[if IE 7]>    <html class="no-js ie7 oldie" lang="en-US"> <![endif]-->
<!--[if IE 8]>    <html class="no-js ie8 oldie" lang="en-US"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en-US"> <!--<![endif]-->
<head>


<title>languagetool.org | 520: Web server is returning an unknown error</title>
<meta charset="UTF-8" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
<meta name="robots" content="noindex, nofollow" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<link rel="stylesheet" id="cf_styles-css" href="/cdn-cgi/styles/main.css" />


</head>
<body>
<div id="cf-wrapper">
    <div id="cf-error-details" class="p-0">
        <header class="mx-auto pt-10 lg:pt-6 lg:px-8 w-240 lg:w-full mb-8">
            <h1 class="inline-block sm:block sm:mb-

In [21]:
apply_metric(vicuna13b)

In [19]:
apply_metric(alpaca)

In [24]:
gpt.to_csv('all_essays_gpt.csv', sep=';')
gemini.to_csv('all_essays_gemini.csv', sep=';')
llama70b.to_csv('all_essays_llama.csv', sep=';')
llama7b.to_csv('all_essays_llama7B.csv', sep=';')
llama13b.to_csv('all_essays_llama13B.csv', sep=';')
vicuna7b.to_csv('all_essays_vicuna7B.csv', sep=';')
vicuna13b.to_csv('all_essays_vicuna13B.csv', sep=';')
alpaca.to_csv('all_essays_alpaca.csv', sep=';')

In [5]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd 

def convert_results(df, model_name):
# Convert dictionaries to DataFrame
    df_SAE = pd.DataFrame(list(df['metrics_SAE']))
    df_ESL = pd.DataFrame(list(df['metrics_ESL']))
    df_AAE = pd.DataFrame(list(df['metrics_AAE']))

    # Calculate means
    means_SAE = df_SAE.mean()
    means_ESL = df_ESL.mean()
    means_AAE = df_AAE.mean()


    results = pd.concat([means_SAE, means_ESL, means_AAE], axis=1, keys=['SAE', 'ESL', 'AAE'])
    results.to_csv(f'results_{model_name}.csv')
