In [None]:
import pandas as pd
import os
from pathlib import Path
import string
# !pip install pyspellchecker
from spellchecker import SpellChecker



# For C-iii ==================START==================
import nltk
from nltk.parse import DependencyGraph

# Download the necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('tagsets')
nltk.download('dependency_treebank')
# For C-iii ===================END=================



# Function to read text from file
def read_text_file(filename):
    filename = str(Path().absolute()) + '/essays_dataset/essays/' + filename
    try:
        with open(filename, 'r') as file:
            content = file.read()
        return content
    except FileNotFoundError:
        return None


# Function to count sentences
def count_sentences(text):
    # Split text into sentences based on "\n", "\t", and "."
    sentences = [sentence.strip() for sentence in text.replace('\n', '\t').split('\t')]
    sentences = [sentence.strip() for s in sentences for sentence in s.split('.')]
    # Remove empty strings resulting from extra "\t", "\n" or "."
    sentences = [sentence for sentence in sentences if sentence]
    sentence_count = 0
    # Count the finite verbs:
    for sentence in sentences:
        count = 0
        # Tokenize the sentence into words
        words = word_tokenize(sentence)

        # Tag the words with their part-of-speech (POS)
        tagged_words = pos_tag(words)

        # Initialize counters
        finite_verb_count = 0
        coordinate_clause = False
        subordinate_clause = False

        # Check for coordinate or subordinate clauses
        for i, (word, pos) in enumerate(tagged_words):
            if pos == 'CC' and i > 0 and i < len(tagged_words) - 1:
                coordinate_clause = True
            elif pos in ['IN', 'DT', 'WDT'] and i > 0 and i < len(tagged_words) - 1:
                subordinate_clause = True

        # Count the number of finite verbs
        for word, pos in tagged_words:
            if pos.startswith('V') and pos != 'VBG':
                finite_verb_count += 1

        if coordinate_clause == False and subordinate_clause == False:
            count = finite_verb_count
        else:
            count = 1

        sentence_count += count


    return sentence_count



# Function to count words
def count_words(text):
    # Split text into words based on whitespace, full stop, newline, or tab
    words = text.replace('\n', ' ').replace('\t', ' ').replace('.', ' ').replace('?', ' ').replace(',', ' ').replace('!', ' ').split()
    # Count the number of words
    return len(words)


## Function to count spelling mistakes
def count_spelling_mistakes(text):
    # Remove punctuation marks
    for punctuation in string.punctuation:
        text = text.replace(punctuation, ' ')
    # Initialize SpellChecker
    spell = SpellChecker()
    # Find misspelled words
    misspelled = spell.unknown(text.split())
    # Count the number of misspelled words
    num_misspelled = len(misspelled)
    # Return both the count and the list of misspelled words
    return num_misspelled, list(misspelled)


# Define a function to scale the values between 0 and 4
def scale_values_0_4(value, min_val, max_val):
    scaled_value = ((value - min_val) / (max_val - min_val)) * 4
    return round(scaled_value, 2)


# Define a function to scale the values between 1 and 5
def scale_values_1_5(value, min_val, max_val):
    scaled_value = ((value - min_val) / (max_val - min_val)) * 4 + 1
    return round(scaled_value, 2)




# For C-iii ==================START==================


# Define a function to scale the values between 1 and 5
def reverse_scale_values_1_5(value, min_val, max_val):
    scaled_value = ((max_val - value) / (max_val - min_val)) * 4 + 1
    return round(scaled_value, 2)


# Function to check syntactic well-formedness of a sentence
def check_syntactic_wellformedness(sentence):
    #print("Sentence:", sentence)

    # Tokenize the sentence
    tokens = nltk.word_tokenize(sentence)
    #print("Tokens:", tokens)

    # Perform Part-of-Speech tagging
    pos_tags = nltk.pos_tag(tokens)
    #print("POS Tags:", pos_tags)

    # Convert POS tags to string format compatible with DependencyGraph
    dep_str = "\n".join([f"{i+1}\t{token}\t{tag}\t{idx}\t{tag}\t_\t{head}\t_\t_\t_"
                         for i, ((token, tag), (idx, head)) in enumerate(zip(pos_tags, enumerate(range(1, len(pos_tags) + 1))))])

    # Perform dependency parsing
    parser = DependencyGraph(dep_str, top_relation_label='root')

    # Counter for mistakes
    mistake_count = 0

    # Check criteria for main sentence formation
    if not is_main_sentence_formed_properly(pos_tags):
        mistake_count += 1


    # Check for missing constituents
    if not are_constituents_formed_properly(pos_tags):
        mistake_count += 1

    # Check for subordinating conjunctions
    if not is_subordinating_conjunction_correct(pos_tags):
        mistake_count += 1

    # Check for verb agreement and tense consistency
    if not is_verb_agreement_and_tense_consistent(pos_tags):
        mistake_count += 1

    # Check for conjunction usage
    if not is_conjunction_usage_correct(pos_tags):
        mistake_count += 1

    # Check for plurality agreement of noun with verb
    if not is_plural_singular_agreement_correct(pos_tags):
        mistake_count += 1




#==================


    # Check for missing constituents
    if not is_subject_verb_agreement_correct(pos_tags):
        mistake_count += 1

    # Check for subordinating conjunctions
    if not is_pronoun_reference_clear(pos_tags):
        mistake_count += 1

    # Check for verb agreement and tense consistency
    if not is_parallel_structure_correct(sentence):
        mistake_count += 1

    # Check for conjunction usage
    if not contains_sentence_fragment(sentence):
        mistake_count += 1

    # Check for plurality agreement of noun with verb
    if not is_conjunction_usage_correct(pos_tags):
        mistake_count += 1

    # Check for missing constituents
    if not is_consistency_in_voice_and_perspective_correct(pos_tags):
        mistake_count += 1


#==================



    return mistake_count

# Function to check if main sentence formation is proper
def is_main_sentence_formed_properly(pos_tags):
    # Check if the sentence starts with a valid word
    first_word_tag = pos_tags[0][1]
    if first_word_tag in ['VB', 'VBP', 'VBZ', 'VBG', 'VBD', 'VBN']:  # Verbs
        return False

    # Check for negation adverb before the main verb
    for i in range(1, len(pos_tags)):
        if pos_tags[i][1] == 'VB' and pos_tags[i-1][1] == 'RB' and pos_tags[i-1][0].lower() == 'not':
            return False

    return True



# Function to check if constituents are formed properly
def are_constituents_formed_properly(pos_tags):
    # Check for missing determiners in noun phrases
    for i in range(len(pos_tags)):
        if pos_tags[i][1] == 'NN' and (i == 0 or pos_tags[i-1][1] != 'DT'):
            return False
    return True

# Function to check if subordinating conjunction is used correctly
def is_subordinating_conjunction_correct(pos_tags):
    subordinating_conjunctions = ['when', 'although', 'if', 'because']
    for word, tag in pos_tags:
        if word.lower() in subordinating_conjunctions:
            # Check if the corresponding clause is finite or includes a gerund
            if tag not in ['VBP', 'VBZ', 'VBG', 'VBD', 'VBN']:  # Finite verbs or gerunds
                return False
    return True

# Function to check if verb agreement and tense consistency are correct
def is_verb_agreement_and_tense_consistent(pos_tags):
    for i in range(len(pos_tags)):
        word, tag = pos_tags[i]
        if tag in ['VBZ', 'VBP', 'VBD', 'VBN']:  # Verbs in non-base form
            if i == 0 or (pos_tags[i-1][1] != 'PRP' and pos_tags[i-1][1] != 'NN'):  # Verb is not preceded by a personal pronoun or noun
                return False
    return True

# Function to check if conjunction usage is correct
def is_conjunction_usage_correct(pos_tags):
    for i in range(len(pos_tags)):
        if pos_tags[i][1] == 'CC':  # Conjunction
            if i == 0 or i == len(pos_tags) - 1:  # Conjunction appears at the beginning or end of the sentence
                return False
    return True

# Function to check if there is plural/singular agreement between noun and verb
def is_plural_singular_agreement_correct(pos_tags):
    # Find the index of the first verb in the sentence
    verb_index = next((i for i, (word, tag) in enumerate(pos_tags) if tag.startswith('VB')), None)
    if verb_index is not None:
        # Find the index of the first noun occurring before the verb
        noun_index = next((i for i in range(verb_index) if pos_tags[i][1].startswith('NN')), None)
        if noun_index is not None:
            # Check if the noun and verb agree in plurality
            noun_tag = pos_tags[noun_index][1]
            verb_tag = pos_tags[verb_index][1]
            if noun_tag.endswith('S') and not verb_tag.endswith('S'):  # Noun is plural, but verb is singular
                return False
            elif not noun_tag.endswith('S') and verb_tag.endswith('S'):  # Noun is singular, but verb is plural
                return False
    return True




#==================


# Function to check subject-verb agreement
def is_subject_verb_agreement_correct(pos_tags):
    for i in range(len(pos_tags)):
        word, tag = pos_tags[i]
        if tag.startswith('VB') and i > 0:
            prev_word, prev_tag = pos_tags[i-1]
            if prev_tag.startswith('NN') and prev_tag != 'NNP':  # Check if preceding word is a noun
                return False
    return True

# Function to check pronoun reference
def is_pronoun_reference_clear(pos_tags):
    pronouns = {'PRP', 'PRP$', 'WP', 'WP$'}
    for i in range(len(pos_tags)):
        word, tag = pos_tags[i]
        if tag in pronouns and i > 0:
            prev_word, prev_tag = pos_tags[i-1]
            if prev_tag.startswith('NN'):  # Check if preceding word is a noun
                return True
    return False

# Function to check parallel structure
def is_parallel_structure_correct(sentence):
    # Example: "She likes swimming, hiking, and to ride horses"
    if ', and' in sentence or ', or' in sentence:
        return False
    return True

# Function to check for sentence fragments
def contains_sentence_fragment(sentence):
    if sentence.endswith(('.', '!', '?')):  # Check if sentence ends with a punctuation mark
        return False
    return True


# Function to check conjunction usage
def is_conjunction_usage_correct(pos_tags):
    for i in range(len(pos_tags)):
        word, tag = pos_tags[i]
        if word.lower() in {'but', 'and', 'or', 'yet', 'so', 'nor'} and i > 0:
            prev_word, prev_tag = pos_tags[i-1]
            if prev_tag != '.':
                return False
    return True

# Function to check consistency in voice and perspective
def is_consistency_in_voice_and_perspective_correct(pos_tags):
    perspective_tags = {'PRP', 'PRP$', 'WP', 'WP$'}
    first_person_tags = {'PRP', 'PRP$'}  # First-person perspective tags
    found_perspective_tag = False
    for word, tag in pos_tags:
        if tag in perspective_tags:
            found_perspective_tag = True
            if tag in first_person_tags:
                return False  # Inconsistent use of perspective
    return found_perspective_tag  # No perspective tag found or consistent perspective usage


#==================






# Function to count mistakes in an essay
def count_mistakes_in_essay(essay):

    essay_cleaned = essay.replace('\n', ' ').replace('\t', ' ')

    # Tokenize the essay into sentences
    sentences = nltk.sent_tokenize(essay_cleaned)

    # Counter for total mistakes in the essay
    total_mistakes = 0

    # Iterate through each sentence
    for sentence in sentences:
        # Check syntactic well-formedness of the sentence
        mistakes_in_sentence = check_syntactic_wellformedness(sentence)

        # Update total mistake count
        total_mistakes += mistakes_in_sentence

    return total_mistakes, round(total_mistakes/len(sentences),2)



def calc_pred(x):
    if x > 11.9:
        return 'high'
    else:
        return 'low'



# For C-iii =================END===================


def main():

    curr_dir_path = str(Path().absolute())
    comp_path = curr_dir_path +'/essays_dataset/index.csv'
    #    df = pd.read_csv("/essays_dataset/index.csv", delimiter=";", encoding="utf-8")
    #    df['num_sentences'] = None
    df = pd.read_csv(comp_path, delimiter=";", encoding="utf-8")
    df['content'] = df['filename'].apply(read_text_file)

    # Counting total sentences in essay
    df['num_sentences'] = df['content'].apply(count_sentences)

    # Counting total words in essay
    df['num_words'] = df['content'].apply(count_words)

    # Counting total spelling mistakes in essay
    df['num_spelling_mistakes'], df['misspelled_words_list'] = zip(*df['content'].apply(count_spelling_mistakes))

    # Counting spelling mistakes per word - (if essay is only 1 sentence, then fewer spelling mistakes is not necessarily a good score)
    df['spelling_mistakes_per_word'] = round(df['num_spelling_mistakes'] / df['num_words'],2)

    # Scale the specified columns between scores 1 to 5
    columns_to_scale = ['num_sentences', 'num_words']
    for col in columns_to_scale:
        min_val = df[col].min()
        max_val = df[col].max()
        df[col + '_score'] = df[col].apply(lambda x: scale_values_1_5(x, min_val, max_val))

    # Scale the specified columns between scores 0 to 4
    min_val = df['spelling_mistakes_per_word'].min()
    max_val = df['spelling_mistakes_per_word'].max()
    df['spelling_mistakes' + '_score'] = df['spelling_mistakes_per_word'].apply(lambda x: scale_values_0_4(x, min_val, max_val))

    # Create final column SCORE_a with the average of 'num_sentences' and 'num_words'
    df['SCORE_a'] = (df['num_sentences_score'] + df['num_words_score']) / 2

    # Create final columns SCORE_b
    df['SCORE_b'] = df['spelling_mistakes_score']



    # For C-iii ==================START==================


    # Calculate Syntactic well-formedness (c.iii)
    df[['syntax_mistakes', 'syntax_mistakes_per_sent']] = df['content'].apply(lambda x: pd.Series(count_mistakes_in_essay(x)))

    # Scale the specified columns between scores 1 to 5
    min_val = df['syntax_mistakes_per_sent'].min()
    max_val = df['syntax_mistakes_per_sent'].max()
    df['SCORE_ciii'] = df['syntax_mistakes_per_sent'].apply(lambda x: reverse_scale_values_1_5(x, min_val, max_val))

    df['FINAL_SCORE'] = 2 * df['SCORE_a'] - df['SCORE_b'] + 2 * df['SCORE_ciii']
    df['pred'] = df['FINAL_SCORE'].apply(lambda x: calc_pred(x))

    accuracy = (df['grade'] == df['pred']).mean() * 100

    print("Accuracy: {:.2f}%".format(accuracy))


    # For C-iii ===================END=================

    df_comp = df[['filename','content','num_sentences','num_words','num_spelling_mistakes','syntax_mistakes','syntax_mistakes_per_sent','SCORE_a','SCORE_b','SCORE_ciii','FINAL_SCORE','pred','grade']]
    # # CSV file for viewing analysis fields and scores - saved in current directory
    # df.to_csv(curr_dir_path + "/Final_df.csv", index=False)

    df_comp.head(20)

main()

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package tagsets to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!
[nltk_data] Downloading package dependency_treebank to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package dependency_treebank is already up-to-date!


Accuracy: 82.00%


In [None]:
df_final = df[['SCORE_a','SCORE_b','SCORE_ciii','grade']]
df_final

Unnamed: 0,SCORE_a,SCORE_b,SCORE_ciii,grade
0,2.800,0.26,2.43,low
1,2.200,0.77,4.28,low
2,3.410,0.13,3.02,high
3,1.960,2.06,3.68,low
4,1.660,0.77,2.00,low
...,...,...,...,...
95,3.030,0.00,3.80,high
96,2.165,0.77,2.73,low
97,3.305,0.00,2.45,high
98,3.260,0.26,4.24,high


In [None]:
# For ML Algorithms

import pandas as pd
from sklearn.model_selection import train_test_split


# Split the data into features (X) and target variable (y)
x = df_final.drop('grade', axis = 1)  # Assuming 'target_column' is the name of the target variable
y = df_final['grade']

# Split the data into train and test sets with stratification
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, stratify = y, random_state = 42)


In [None]:
x_train

Unnamed: 0,SCORE_a,SCORE_b,SCORE_ciii
39,3.905,0.13,3.59
3,1.960,2.06,3.68
13,1.690,0.90,2.95
27,1.565,0.65,2.12
30,2.110,0.39,3.43
...,...,...,...
42,3.295,0.39,2.74
80,2.915,0.39,4.40
65,2.030,0.65,2.43
52,3.140,0.52,3.58


In [None]:
# Naive Bayes ALgorithm

from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Initialize the Naive Bayes classifier
nb_classifier = GaussianNB()

# Train the classifier on the training data
nb_classifier.fit(x_train, y_train)

# Make predictions on the test data
nb_predictions = nb_classifier.predict(x_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, nb_predictions)
print("Naive Bayes Accuracy:", accuracy)

# Calculate precision, recall, and F1-score
# Adjust pos_label according to your target variable
precision = precision_score(y_test, nb_predictions, pos_label='high')
recall = recall_score(y_test, nb_predictions, pos_label='high')
f1 = f1_score(y_test, nb_predictions, pos_label='high')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Naive Bayes Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1-score: 1.0


In [None]:
# Logistic Regression


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# Initialize the logistic regression model
logreg_model = LogisticRegression()

# Train the model
logreg_model.fit(x_train, y_train)

# Predict on the test set
y_pred = logreg_model.predict(x_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Logistic Regression Accuracy:", accuracy)

# Calculate precision, recall, and F1-score
# Adjust pos_label according to your target variable
precision = precision_score(y_test, y_pred, pos_label='high')
recall = recall_score(y_test, y_pred, pos_label='high')
f1 = f1_score(y_test, y_pred, pos_label='high')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Logistic Regression Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1-score: 1.0


In [None]:
# MLP Classifier

import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Initialize the MLPClassifier
mlp_classifier = MLPClassifier()

# Train the classifier
mlp_classifier.fit(x_train, y_train)

# Predict on the test set
mlp_pred = mlp_classifier.predict(x_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, mlp_pred)
print("Multilayer Perceptron Accuracy:", accuracy)

# Calculate precision, recall, and F1-score
# Adjust pos_label according to your target variable
precision = precision_score(y_test, mlp_pred, pos_label='high')
recall = recall_score(y_test, mlp_pred, pos_label='high')
f1 = f1_score(y_test, mlp_pred, pos_label='high')

print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Multilayer Perceptron Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1-score: 1.0




In [None]:
# Define a function to scale the values between 1 and 5
def reverse_scale_values_1_5(value, min_val, max_val):
    scaled_value = ((max_val - value) / (max_val - min_val)) * 4 + 1
    return round(scaled_value, 2)


# Function to check syntactic well-formedness of a sentence
def check_syntactic_wellformedness(sentence):
    #print("Sentence:", sentence)

    # Tokenize the sentence
    tokens = nltk.word_tokenize(sentence)
    #print("Tokens:", tokens)

    # Perform Part-of-Speech tagging
    pos_tags = nltk.pos_tag(tokens)
    #print("POS Tags:", pos_tags)

    # Convert POS tags to string format compatible with DependencyGraph
    dep_str = "\n".join([f"{i+1}\t{token}\t{tag}\t{idx}\t{tag}\t_\t{head}\t_\t_\t_"
                         for i, ((token, tag), (idx, head)) in enumerate(zip(pos_tags, enumerate(range(1, len(pos_tags) + 1))))])

    # Perform dependency parsing
    parser = DependencyGraph(dep_str, top_relation_label='root')

    # Counter for mistakes
    mistake_count = 0

    # Check criteria for main sentence formation
    if not is_main_sentence_formed_properly(pos_tags):
        print("Error: Main sentence formation is not proper")
        mistake_count += 1



    # Check for missing constituents
    if not are_constituents_formed_properly(pos_tags):
        print("Error: Constituents are not formed properly")
        mistake_count += 1

    # Check for subordinating conjunctions
    if not is_subordinating_conjunction_correct(pos_tags):
        print("Error: Subordinating conjunction is used incorrectly")
        mistake_count += 1

    # Check for verb agreement and tense consistency
    if not is_verb_agreement_and_tense_consistent(pos_tags):
        print("Error: Verb agreement and tense consistency issue")
        mistake_count += 1

    # Check for conjunction usage
    if not is_conjunction_usage_correct(pos_tags):
        print("Error: Conjunction usage is incorrect")
        mistake_count += 1

    # Check for plurality agreement of noun with verb
    if not is_plural_singular_agreement_correct(pos_tags):
        print("Error: Plurality agreement of noun with verb is incorrect")
        mistake_count += 1

    return mistake_count

# Function to check if main sentence formation is proper
def is_main_sentence_formed_properly(pos_tags):
    # Check if the sentence starts with a valid word
    first_word_tag = pos_tags[0][1]
    if first_word_tag in ['VB', 'VBP', 'VBZ', 'VBG', 'VBD', 'VBN']:  # Verbs
        return False

    # Check for negation adverb before the main verb
    for i in range(1, len(pos_tags)):
        if pos_tags[i][1] == 'VB' and pos_tags[i-1][1] == 'RB' and pos_tags[i-1][0].lower() == 'not':
            return False

    return True



# Function to check if constituents are formed properly
def are_constituents_formed_properly(pos_tags):
    # Check for missing determiners in noun phrases
    for i in range(len(pos_tags)):
        if pos_tags[i][1] == 'NN' and (i == 0 or pos_tags[i-1][1] != 'DT'):
            return False
    return True

# Function to check if subordinating conjunction is used correctly
def is_subordinating_conjunction_correct(pos_tags):
    subordinating_conjunctions = ['when', 'although', 'if', 'because']
    for word, tag in pos_tags:
        if word.lower() in subordinating_conjunctions:
            # Check if the corresponding clause is finite or includes a gerund
            if tag not in ['VBP', 'VBZ', 'VBG', 'VBD', 'VBN']:  # Finite verbs or gerunds
                return False
    return True

# Function to check if verb agreement and tense consistency are correct
def is_verb_agreement_and_tense_consistent(pos_tags):
    for i in range(len(pos_tags)):
        word, tag = pos_tags[i]
        if tag in ['VBZ', 'VBP', 'VBD', 'VBN']:  # Verbs in non-base form
            if i == 0 or (pos_tags[i-1][1] != 'PRP' and pos_tags[i-1][1] != 'NN'):  # Verb is not preceded by a personal pronoun or noun
                return False
    return True

# Function to check if conjunction usage is correct
def is_conjunction_usage_correct(pos_tags):
    for i in range(len(pos_tags)):
        if pos_tags[i][1] == 'CC':  # Conjunction
            if i == 0 or i == len(pos_tags) - 1:  # Conjunction appears at the beginning or end of the sentence
                return False
    return True

# Function to check if there is plural/singular agreement between noun and verb
def is_plural_singular_agreement_correct(pos_tags):
    # Find the index of the first verb in the sentence
    verb_index = next((i for i, (word, tag) in enumerate(pos_tags) if tag.startswith('VB')), None)
    if verb_index is not None:
        # Find the index of the first noun occurring before the verb
        noun_index = next((i for i in range(verb_index) if pos_tags[i][1].startswith('NN')), None)
        if noun_index is not None:
            # Check if the noun and verb agree in plurality
            noun_tag = pos_tags[noun_index][1]
            verb_tag = pos_tags[verb_index][1]
            if noun_tag.endswith('S') and not verb_tag.endswith('S'):  # Noun is plural, but verb is singular
                return False
            elif not noun_tag.endswith('S') and verb_tag.endswith('S'):  # Noun is singular, but verb is plural
                return False
    return True

# Function to count mistakes in an essay
def count_mistakes_in_essay(essay):

    essay_cleaned = essay.replace('\n', ' ').replace('\t', ' ')

    # Tokenize the essay into sentences
    sentences = nltk.sent_tokenize(essay_cleaned)

    # Counter for total mistakes in the essay
    total_mistakes = 0

    # Iterate through each sentence
    for sentence in sentences:
        print('============================')
        print(sentence)
        # Check syntactic well-formedness of the sentence
        mistakes_in_sentence = check_syntactic_wellformedness(sentence)

        # Update total mistake count
        total_mistakes += mistakes_in_sentence

    # Print total sentences in the essay
    print("Length:", len(sentences))

    # Print total mistakes in the essay
    print("Total mistakes in the essay:", total_mistakes)

    return total_mistakes


essay = '''This is an important aspect of today time.
This products rathen are not much better, but today is not important the really character of the product, but only the money and the client not rappresented the important actor in this process.
Every day any people buy same products that is not rappresented the your necessity, but is only important buy any product.
To explain this argoment in my nation, at the television, there is an program that discuss of the problem rappresented by this.
More people go to this program television to talk about your problem, that is very radicate in my nation.
The modern society rappresented the perfect ambient to influenced the minds of all the person.
In my self is present the reasons of this statement, that is one of the problem of the life.
But not all the people and the time is in accord with this problem, because any time the person is too according with the make products.
Thus I agree with this statement, because this event is present in my life every day, and rappresented the problem with I do fighting.
But to explain all the aspect about this argoment is very inportant to illustre any examples.
The television programs that every day introduce in the minds more argoment, news and other problem, or breaking news, is the first actor in this process.
This opinion rappresented my self in my life, because for me the life of all the people is not possible to influence by the activity of any person.
The society lose the propriety when this problem will rappresent the must argoment of the talk and the life of the people, because as very difficult live at a time with this argoment.
The my request is that the new politics discuss about this problem.
'''

count_mistakes_in_essay(essay)

This is an important aspect of today time.
Error: Constituents are not formed properly
Error: Verb agreement and tense consistency issue
This products rathen are not much better, but today is not important the really character of the product, but only the money and the client not rappresented the important actor in this process.
Error: Constituents are not formed properly
Error: Verb agreement and tense consistency issue
Error: Plurality agreement of noun with verb is incorrect
Every day any people buy same products that is not rappresented the your necessity, but is only important buy any product.
Error: Constituents are not formed properly
Error: Verb agreement and tense consistency issue
To explain this argoment in my nation, at the television, there is an program that discuss of the problem rappresented by this.
Error: Constituents are not formed properly
Error: Verb agreement and tense consistency issue
More people go to this program television to talk about your problem, that is 

33

In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

def count_finite_verbs(sentence):
    # Tokenize the sentence into words
    words = word_tokenize(sentence)

    # Tag the words with their part-of-speech (POS)
    tagged_words = pos_tag(words)

    # Initialize counters
    finite_verb_count = 0
    coordinate_clause = False
    subordinate_clause = False

    # Check for coordinate or subordinate clauses
    for i, (word, pos) in enumerate(tagged_words):
        if pos == 'CC' and i > 0 and i < len(tagged_words) - 1:
            coordinate_clause = True
        elif pos in ['IN', 'DT', 'WDT'] and i > 0 and i < len(tagged_words) - 1:
            subordinate_clause = True

    # Count the number of finite verbs
    for word, pos in tagged_words:
        if pos.startswith('V') and pos != 'VBG':
            finite_verb_count += 1

    return finite_verb_count, coordinate_clause, subordinate_clause

# Example usage
text = '''This is an important aspect of today time.
This products rathen are not much better, but today is not important the really character of the product, but only the money and the client not rappresented the important actor in this process.
Every day any people buy same products that is not rappresented the your necessity, but is only important buy any product.
To explain this argoment in my nation, at the television, there is an program that discuss of the problem rappresented by this.
More people go to this program television to talk about your problem, that is very radicate in my nation.
The modern society rappresented the perfect ambient to influenced the minds of all the person.
In my self is present the reasons of this statement, that is one of the problem of the life.
But not all the people and the time is in accord with this problem, because any time the person is too according with the make products.
Thus I agree with this statement, because this event is present in my life every day, and rappresented the problem with I do fighting.
But to explain all the aspect about this argoment is very inportant to illustre any examples.
The television programs that every day introduce in the minds more argoment, news and other problem, or breaking news, is the first actor in this process.
This opinion rappresented my self in my life, because for me the life of all the people is not possible to influence by the activity of any person.
The society lose the propriety when this problem will rappresent the must argoment of the talk and the life of the people, because as very difficult live at a time with this argoment.
The my request is that the new politics discuss about this problem.'''


def count_sentences(text):
    # Split text into sentences based on "\n", "\t", and "."
    sentences = [sentence.strip() for sentence in text.replace('\n', '\t').split('\t')]
    sentences = [sentence.strip() for s in sentences for sentence in s.split('.')]
    # Remove empty strings resulting from extra "\t", "\n" or "."
    sentences = [sentence for sentence in sentences if sentence]
    sentence_count = 0
    # Count the finite verbs:
    for sentence in sentences:
        count = 0
        # Tokenize the sentence into words
        words = word_tokenize(sentence)

        # Tag the words with their part-of-speech (POS)
        tagged_words = pos_tag(words)

        # Initialize counters
        finite_verb_count = 0
        coordinate_clause = False
        subordinate_clause = False

        # Check for coordinate or subordinate clauses
        for i, (word, pos) in enumerate(tagged_words):
            if pos == 'CC' and i > 0 and i < len(tagged_words) - 1:
                coordinate_clause = True
            elif pos in ['IN', 'DT', 'WDT'] and i > 0 and i < len(tagged_words) - 1:
                subordinate_clause = True

        # Count the number of finite verbs
        for word, pos in tagged_words:
            if pos.startswith('V') and pos != 'VBG':
                finite_verb_count += 1

        if coordinate_clause == False and subordinate_clause == False:
            count = finite_verb_count
        else:
            count = 1

        sentence_count += count


    return sentence_count
print(count_sentences(text))

14


In [None]:
import nltk
from nltk.parse import DependencyGraph

# Download the necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('tagsets')
nltk.download('dependency_treebank')

# Function to check syntactic well-formedness of a sentence
def check_syntactic_wellformedness(sentence):
    # Tokenize the sentence
    tokens = nltk.word_tokenize(sentence)

    # Perform Part-of-Speech tagging
    pos_tags = nltk.pos_tag(tokens)

    # Convert POS tags to string format compatible with DependencyGraph
    dep_str = "\n".join([f"{i+1}\t{token}\t{tag}\t{idx}\t{tag}\t_\t{head}\t_\t_\t_"
                         for i, ((token, tag), (idx, head)) in enumerate(zip(pos_tags, enumerate(range(1, len(pos_tags) + 1))))])

    # Perform dependency parsing
    parser = DependencyGraph(dep_str, top_relation_label='root')

    # Counter for mistakes
    mistake_count = 0

    # Check criteria for main sentence formation
    if parser.root is None or parser.root['rel'] != 'root':
        print("Error: Sentence does not have a proper root")
        mistake_count += 1

    # Check for missing constituents and other criteria
    for node in parser.nodes.values():
        # Check for missing determiners in noun phrases
        if node['tag'] == 'NN' and 'DT' not in node['deps']:
            print("Error: Missing determiner in noun phrase")
            mistake_count += 1

        # Check for subordinating conjunctions without main verbs or gerunds
        if node['tag'] in ['IN', 'RB'] and 'ccomp' not in node['deps'] and 'xcomp' not in node['deps']:
            print("Error: Subordinating conjunction used incorrectly")
            mistake_count += 1

    return mistake_count

# Function to count mistakes in an essay
def count_mistakes_in_essay(essay):
    # Tokenize the essay into sentences
    sentences = nltk.sent_tokenize(essay)

    # Counter for total mistakes in the essay
    total_mistakes = 0

    # Iterate through each sentence
    for sentence in sentences:
        print(sentence)
        # Check syntactic well-formedness of the sentence
        mistakes_in_sentence = check_syntactic_wellformedness(sentence)

        # Update total mistake count
        total_mistakes += mistakes_in_sentence

    # Print total mistakes in the essay
    print("Total mistakes in the essay:", total_mistakes)

# Example usage
essay = """
The cat sat on the mat.
Because it was raining.
I went to the store.
Dog not want to play.
"""

count_mistakes_in_essay(essay)



The cat sat on the mat.
Error: Sentence does not have a proper root
Error: Missing determiner in noun phrase
Error: Subordinating conjunction used incorrectly
Error: Missing determiner in noun phrase
Because it was raining.
Error: Sentence does not have a proper root
Error: Subordinating conjunction used incorrectly
I went to the store.
Error: Sentence does not have a proper root
Error: Missing determiner in noun phrase
Dog not want to play.
Error: Sentence does not have a proper root
Error: Subordinating conjunction used incorrectly
Total mistakes in the essay: 10


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package tagsets to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!
[nltk_data] Downloading package dependency_treebank to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package dependency_treebank is already up-to-date!


In [None]:
import nltk
from nltk.parse import DependencyGraph

# Download the necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('tagsets')
nltk.download('dependency_treebank')

# Function to check syntactic well-formedness of a sentence
def check_syntactic_wellformedness(sentence):
    print("Sentence:", sentence)

    # Tokenize the sentence
    tokens = nltk.word_tokenize(sentence)
    print('Tokens---------------')
    print(tokens)
    # Perform Part-of-Speech tagging
    pos_tags = nltk.pos_tag(tokens)
    print('POS TAGS---------------')
    print(pos_tags)

    # Convert POS tags to string format compatible with DependencyGraph
    dep_str = "\n".join([f"{i+1}\t{token}\t{tag}\t{idx}\t{tag}\t_\t{head}\t_\t_\t_"
                         for i, ((token, tag), (idx, head)) in enumerate(zip(pos_tags, enumerate(range(1, len(pos_tags) + 1))))])
    print('DEP STR---------------')
    print(dep_str)

    # Perform dependency parsing
    parser = DependencyGraph(dep_str, top_relation_label='root')
    print('PARSER---------------')
    print(parser)
    # Counter for mistakes
    mistake_count = 0

    # Check criteria for main sentence formation
    if parser.root is None or parser.root['rel'] != 'root':
        print("Error: Sentence does not have a proper root")
        mistake_count += 1

    # Check for missing constituents and other criteria
    for node in parser.nodes.values():
        # Check for missing determiners in noun phrases
        if node['tag'] == 'NN' and 'DT' not in node['deps']:
            print("Error: Missing determiner in noun phrase")
            mistake_count += 1

        # Check for subordinating conjunctions without main verbs or gerunds
        if node['tag'] in ['IN', 'RB'] and 'ccomp' not in node['deps'] and 'xcomp' not in node['deps']:
            print("Error: Subordinating conjunction used incorrectly")
            mistake_count += 1

    return mistake_count

# Function to count mistakes in an essay
def count_mistakes_in_essay(essay):
    # Tokenize the essay into sentences
    sentences = nltk.sent_tokenize(essay)

    # Counter for total mistakes in the essay
    total_mistakes = 0

    # Iterate through each sentence
    for sentence in sentences:
        # Check syntactic well-formedness of the sentence
        mistakes_in_sentence = check_syntactic_wellformedness(sentence)

        # Update total mistake count
        total_mistakes += mistakes_in_sentence

    # Print total mistakes in the essay
    print("Total mistakes in the essay:", total_mistakes)

# Example usage
essay = """
The cat sat on the mat.
Because it was raining.
I went to the store.
Dog not want to play.
"""

count_mistakes_in_essay(essay)


Sentence: 
The cat sat on the mat.
Tokens---------------
['The', 'cat', 'sat', 'on', 'the', 'mat', '.']
POS TAGS---------------
[('The', 'DT'), ('cat', 'NN'), ('sat', 'VBD'), ('on', 'IN'), ('the', 'DT'), ('mat', 'NN'), ('.', '.')]
DEP STR---------------
1	The	DT	0	DT	_	1	_	_	_
2	cat	NN	1	NN	_	2	_	_	_
3	sat	VBD	2	VBD	_	3	_	_	_
4	on	IN	3	IN	_	4	_	_	_
5	the	DT	4	DT	_	5	_	_	_
6	mat	NN	5	NN	_	6	_	_	_
7	.	.	6	.	_	7	_	_	_
PARSER---------------
defaultdict(<function DependencyGraph.__init__.<locals>.<lambda> at 0x1223349a0>,
            {0: {'address': 0,
                 'ctag': 'TOP',
                 'deps': defaultdict(<class 'list'>, {'root': []}),
                 'feats': None,
                 'head': None,
                 'lemma': None,
                 'rel': None,
                 'tag': 'TOP',
                 'word': None},
             1: {'address': 1,
                 'ctag': '0',
                 'deps': defaultdict(<class 'list'>, {'_': [1]}),
                 'feats': '_',


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package tagsets to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!
[nltk_data] Downloading package dependency_treebank to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package dependency_treebank is already up-to-date!


In [None]:


essay = '''This is an important aspect of today time.
This products rathen are not much better, but today is not important the really character of the product, but only the money and the client not rappresented the important actor in this process.
Every day any people buy same products that is not rappresented the your necessity, but is only important buy any product.
To explain this argoment in my nation, at the television, there is an program that discuss of the problem rappresented by this.
More people go to this program television to talk about your problem, that is very radicate in my nation.
The modern society rappresented the perfect ambient to influenced the minds of all the person.
In my self is present the reasons of this statement, that is one of the problem of the life.
But not all the people and the time is in accord with this problem, because any time the person is too according with the make products.
Thus I agree with this statement, because this event is present in my life every day, and rappresented the problem with I do fighting.
But to explain all the aspect about this argoment is very inportant to illustre any examples.
The television programs that every day introduce in the minds more argoment, news and other problem, or breaking news, is the first actor in this process.
This opinion rappresented my self in my life, because for me the life of all the people is not possible to influence by the activity of any person.
The society lose the propriety when this problem will rappresent the must argoment of the talk and the life of the people, because as very difficult live at a time with this argoment.
The my request is that the new politics discuss about this problem.'''

print(count_mistakes_in_essay(essay))

Error: Sentence does not have a proper root
Error: Missing determiner in noun phrase
Error: Subordinating conjunction used incorrectly
Error: Missing determiner in noun phrase
Error: Missing determiner in noun phrase
Error: Sentence does not have a proper root
Error: Missing determiner in noun phrase
Error: Subordinating conjunction used incorrectly
Error: Missing determiner in noun phrase
Error: Subordinating conjunction used incorrectly
Error: Subordinating conjunction used incorrectly
Error: Missing determiner in noun phrase
Error: Subordinating conjunction used incorrectly
Error: Missing determiner in noun phrase
Error: Subordinating conjunction used incorrectly
Error: Missing determiner in noun phrase
Error: Missing determiner in noun phrase
Error: Subordinating conjunction used incorrectly
Error: Missing determiner in noun phrase
Error: Subordinating conjunction used incorrectly
Error: Missing determiner in noun phrase
Error: Sentence does not have a proper root
Error: Missing de

In [None]:
!pip install nltk

import nltk
from nltk import Tree

# Sample sentences
sentences = [
    "My dog with a broken leg I not want.",
    "I do not want my dog with a broken leg.",
    "I came because he was sick."
]

# Constituency parsing function
def constituency_parse(sentence):
    return nltk.ChartParser().parse(sentence.split())

# Check well-formedness of sentences
def check_wellformedness(sentences):
    for sentence in sentences:
        try:
            # Parse the sentence
            parse_tree = next(constituency_parse(sentence))
            # Check for common mistakes in parse tree
            # Add your checks here based on identified patterns
            # For example:
            if 'SBAR' in str(parse_tree):
                print("Subordinate clause found:", sentence)
            else:
                print("Sentence is well-formed:", sentence)
        except Exception as e:
            print("Error parsing sentence:", sentence, e)

# Check well-formedness of sentences
check_wellformedness(sentences)


python(69460) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.




[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.
[nltk_data] Downloading package words to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.
[nltk_data] Downloading package treebank to
[nltk_data]     /Users/shwetaparihar/nltk_data...


Error parsing sentence: My dog with a broken leg I not want. ChartParser.__init__() missing 1 required positional argument: 'grammar'
Error parsing sentence: I do not want my dog with a broken leg. ChartParser.__init__() missing 1 required positional argument: 'grammar'
Error parsing sentence: I came because he was sick. ChartParser.__init__() missing 1 required positional argument: 'grammar'


[nltk_data]   Unzipping corpora/treebank.zip.


In [None]:
!pip install nltk

import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('treebank')
nltk.download('universal_tagset')

import nltk
from nltk.parse import DependencyGraph

sentence = 'The cat sat on the mat.'

# Tokenize the sentence
tokens = nltk.word_tokenize(sentence)

# Part-of-speech tagging
pos_tags = nltk.pos_tag(tokens)

# Convert POS tags to Universal Dependency tags
pos_tags = [(word, nltk.map_tag('en-ptb', 'universal', tag)) for word, tag in pos_tags]

# Create a string in CoNLL format
conll_format = '\n'.join(['\t'.join((str(i+1), word, '_', pos, pos, '_',
                                    str(head), rel if rel else 'ROOT', '_', '_'))
                                    for i, ((word, pos), (head, rel)) in enumerate(zip(pos_tags, [(0, None)] + [(i+1, 'root') for i in range(len(tokens))]))])

# Parse the CoNLL format string into a DependencyGraph
graph = DependencyGraph(conll_format)

# Print the dependencies
print(graph.tree())


python(70194) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


(The (cat (sat (on (the (mat .))))))


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package treebank to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package treebank is already up-to-date!
[nltk_data] Downloading package universal_tagset to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package universal_tagset is already up-to-date!


In [None]:
import nltk
from nltk.parse import DependencyGraph

# Download the necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('tagsets')
nltk.download('dependency_treebank')

# Function to check syntactic well-formedness of a sentence
def check_syntactic_wellformedness(sentence):
    print("Sentence:", sentence)

    # Tokenize the sentence
    tokens = nltk.word_tokenize(sentence)

    # Perform Part-of-Speech tagging
    pos_tags = nltk.pos_tag(tokens)

    # Convert POS tags to string format compatible with DependencyGraph
    dep_str = "\n".join([f"{i+1}\t{token}\t{tag}\t{idx}\t{tag}\t_\t{head}\t_\t_\t_"
                         for i, ((token, tag), (idx, head)) in enumerate(zip(pos_tags, enumerate(range(1, len(pos_tags) + 1))))])

    # Perform dependency parsing
    parser = DependencyGraph(dep_str, top_relation_label='root')

    # Counter for mistakes
    mistake_count = 0

    # Check criteria for main sentence formation
    if parser.root is None or parser.root['rel'] != 'root':
        print("Error: Sentence does not have a proper root")
        mistake_count += 1

    # Check for missing constituents and other criteria
    for node in parser.nodes.values():
        # Check for missing determiners in noun phrases
        if node['tag'] == 'NN' and 'DT' not in node['deps']:
            print("Error: Missing determiner in noun phrase")
            mistake_count += 1

        # Check for subordinating conjunctions without main verbs or gerunds
        if node['tag'] in ['IN', 'RB'] and 'ccomp' not in node['deps'] and 'xcomp' not in node['deps']:
            print("Error: Subordinating conjunction used incorrectly")
            mistake_count += 1

    return mistake_count

# Function to count mistakes in an essay
def count_mistakes_in_essay(essay):
    # Tokenize the essay into sentences
    sentences = nltk.sent_tokenize(essay)

    # Counter for total mistakes in the essay
    total_mistakes = 0

    # Iterate through each sentence
    for sentence in sentences:
        # Check syntactic well-formedness of the sentence
        mistakes_in_sentence = check_syntactic_wellformedness(sentence)

        # Update total mistake count
        total_mistakes += mistakes_in_sentence

    # Print total mistakes in the essay
    print("Total mistakes in the essay:", total_mistakes)

# Example usage
essay = """
The cat sat on the mat.
Because it was raining.
I went to the store.
Dog not want to play.
"""

count_mistakes_in_essay(essay)


Sentence: 
The cat sat on the mat.
Error: Sentence does not have a proper root
Error: Missing determiner in noun phrase
Error: Subordinating conjunction used incorrectly
Error: Missing determiner in noun phrase
Sentence: Because it was raining.
Error: Sentence does not have a proper root
Error: Subordinating conjunction used incorrectly
Sentence: I went to the store.
Error: Sentence does not have a proper root
Error: Missing determiner in noun phrase
Sentence: Dog not want to play.
Error: Sentence does not have a proper root
Error: Subordinating conjunction used incorrectly
Total mistakes in the essay: 10


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package tagsets to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!
[nltk_data] Downloading package dependency_treebank to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package dependency_treebank is already up-to-date!


In [None]:
import nltk
from nltk.parse import DependencyGraph

# Download the necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('tagsets')
nltk.download('dependency_treebank')

# Function to check syntactic well-formedness of a sentence
def check_syntactic_wellformedness(sentence):
    print("Sentence:", sentence)

    # Tokenize the sentence
    tokens = nltk.word_tokenize(sentence)
    print("Tokens:", tokens)

    # Perform Part-of-Speech tagging
    pos_tags = nltk.pos_tag(tokens)
    print("POS Tags:", pos_tags)

    # Convert POS tags to string format compatible with DependencyGraph
    dep_str = "\n".join([f"{i+1}\t{token}\t{tag}\t{idx}\t{tag}\t_\t{head}\t_\t_\t_"
                         for i, ((token, tag), (idx, head)) in enumerate(zip(pos_tags, enumerate(range(1, len(pos_tags) + 1))))])

    # Perform dependency parsing
    parser = DependencyGraph(dep_str, top_relation_label='root')

    # Counter for mistakes
    mistake_count = 0

    # Check criteria for main sentence formation
    if parser.root is None or parser.root['rel'] != 'root':
        print("Error: Sentence does not have a proper root")
        mistake_count += 1

    # Check for missing constituents and other criteria
    for node in parser.nodes.values():
        # Check for missing determiners in noun phrases
        if node['tag'] == 'NN' and 'DT' not in node['deps']:
            print("Error: Missing determiner in noun phrase")
            mistake_count += 1

        # Check for subordinating conjunctions without main verbs or gerunds
        if node['tag'] in ['IN', 'RB'] and 'ccomp' not in node['deps'] and 'xcomp' not in node['deps']:
            print("Error: Subordinating conjunction used incorrectly")
            mistake_count += 1

    return mistake_count

# Function to count mistakes in an essay
def count_mistakes_in_essay(essay):
    # Tokenize the essay into sentences
    sentences = nltk.sent_tokenize(essay)

    # Counter for total mistakes in the essay
    total_mistakes = 0

    # Iterate through each sentence
    for sentence in sentences:
        # Check syntactic well-formedness of the sentence
        mistakes_in_sentence = check_syntactic_wellformedness(sentence)

        # Update total mistake count
        total_mistakes += mistakes_in_sentence

    # Print total mistakes in the essay
    print("Total mistakes in the essay:", total_mistakes)

# Example usage
essay = """
The cat sat on the mat.
Because it was raining.
I went to the store.
Dog not want to play.
"""

count_mistakes_in_essay(essay)


Sentence: 
The cat sat on the mat.
Tokens: ['The', 'cat', 'sat', 'on', 'the', 'mat', '.']
POS Tags: [('The', 'DT'), ('cat', 'NN'), ('sat', 'VBD'), ('on', 'IN'), ('the', 'DT'), ('mat', 'NN'), ('.', '.')]
Error: Sentence does not have a proper root
Error: Missing determiner in noun phrase
Error: Subordinating conjunction used incorrectly
Error: Missing determiner in noun phrase
Sentence: Because it was raining.
Tokens: ['Because', 'it', 'was', 'raining', '.']
POS Tags: [('Because', 'IN'), ('it', 'PRP'), ('was', 'VBD'), ('raining', 'VBG'), ('.', '.')]
Error: Sentence does not have a proper root
Error: Subordinating conjunction used incorrectly
Sentence: I went to the store.
Tokens: ['I', 'went', 'to', 'the', 'store', '.']
POS Tags: [('I', 'PRP'), ('went', 'VBD'), ('to', 'TO'), ('the', 'DT'), ('store', 'NN'), ('.', '.')]
Error: Sentence does not have a proper root
Error: Missing determiner in noun phrase
Sentence: Dog not want to play.
Tokens: ['Dog', 'not', 'want', 'to', 'play', '.']
POS 

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package tagsets to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!
[nltk_data] Downloading package dependency_treebank to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package dependency_treebank is already up-to-date!


In [None]:
##################### FINAL #######################

In [None]:
import nltk
from nltk.parse import DependencyGraph

# Download the necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('tagsets')
nltk.download('dependency_treebank')

# Function to check syntactic well-formedness of a sentence
def check_syntactic_wellformedness(sentence):
    print("Sentence:", sentence)

    # Tokenize the sentence
    tokens = nltk.word_tokenize(sentence)
    print("Tokens:", tokens)

    # Perform Part-of-Speech tagging
    pos_tags = nltk.pos_tag(tokens)
    print("POS Tags:", pos_tags)

    # Convert POS tags to string format compatible with DependencyGraph
    dep_str = "\n".join([f"{i+1}\t{token}\t{tag}\t{idx}\t{tag}\t_\t{head}\t_\t_\t_"
                         for i, ((token, tag), (idx, head)) in enumerate(zip(pos_tags, enumerate(range(1, len(pos_tags) + 1))))])

    # Perform dependency parsing
    parser = DependencyGraph(dep_str, top_relation_label='root')

    # Counter for mistakes
    mistake_count = 0

    # Check criteria for main sentence formation
    if not is_main_sentence_formed_properly(pos_tags):
        print("Error: Main sentence formation is not proper")
        mistake_count += 1

    # Check for missing constituents
    if not are_constituents_formed_properly(pos_tags):
        print("Error: Constituents are not formed properly")
        mistake_count += 1

    # Check for subordinating conjunctions
    if not is_subordinating_conjunction_correct(pos_tags):
        print("Error: Subordinating conjunction is used incorrectly")
        mistake_count += 1

    return mistake_count

# Function to check if main sentence formation is proper
def is_main_sentence_formed_properly(pos_tags):
    # Check if the sentence starts with a valid word
    first_word_tag = pos_tags[0][1]
    if first_word_tag in ['VB', 'VBP', 'VBZ', 'VBG', 'VBD', 'VBN']:  # Verbs
        return False

    # Check for negation adverb before the main verb
    for i in range(1, len(pos_tags)):
        if pos_tags[i][1] == 'VB' and pos_tags[i-1][1] == 'RB' and pos_tags[i-1][0].lower() == 'not':
            return False

    return True

# Function to check if constituents are formed properly
def are_constituents_formed_properly(pos_tags):
    # Check for missing determiners in noun phrases
    for i in range(len(pos_tags)):
        if pos_tags[i][1] == 'NN' and (i == 0 or pos_tags[i-1][1] != 'DT'):
            return False
    return True

# Function to check if subordinating conjunction is used correctly
def is_subordinating_conjunction_correct(pos_tags):
    subordinating_conjunctions = ['when', 'although', 'if', 'because']
    for word, tag in pos_tags:
        if word.lower() in subordinating_conjunctions:
            # Check if the corresponding clause is finite or includes a gerund
            if tag not in ['VBP', 'VBZ', 'VBG', 'VBD', 'VBN']:  # Finite verbs or gerunds
                return False
    return True

# Function to count mistakes in an essay
def count_mistakes_in_essay(essay):
    # Tokenize the essay into sentences
    sentences = nltk.sent_tokenize(essay)

    # Counter for total mistakes in the essay
    total_mistakes = 0

    # Iterate through each sentence
    for sentence in sentences:
        # Check syntactic well-formedness of the sentence
        mistakes_in_sentence = check_syntactic_wellformedness(sentence)

        # Update total mistake count
        total_mistakes += mistakes_in_sentence

    # Print total mistakes in the essay
    print("Total mistakes in the essay:", total_mistakes)

# Example usage
essay = """
The cat sat on the mat.
Because it was raining.
I went to the store.
Dog not want to play.
"""

count_mistakes_in_essay(essay)


Sentence: 
The cat sat on the mat.
Tokens: ['The', 'cat', 'sat', 'on', 'the', 'mat', '.']
POS Tags: [('The', 'DT'), ('cat', 'NN'), ('sat', 'VBD'), ('on', 'IN'), ('the', 'DT'), ('mat', 'NN'), ('.', '.')]
Sentence: Because it was raining.
Tokens: ['Because', 'it', 'was', 'raining', '.']
POS Tags: [('Because', 'IN'), ('it', 'PRP'), ('was', 'VBD'), ('raining', 'VBG'), ('.', '.')]
Error: Subordinating conjunction is used incorrectly
Sentence: I went to the store.
Tokens: ['I', 'went', 'to', 'the', 'store', '.']
POS Tags: [('I', 'PRP'), ('went', 'VBD'), ('to', 'TO'), ('the', 'DT'), ('store', 'NN'), ('.', '.')]
Sentence: Dog not want to play.
Tokens: ['Dog', 'not', 'want', 'to', 'play', '.']
POS Tags: [('Dog', 'NNP'), ('not', 'RB'), ('want', 'VB'), ('to', 'TO'), ('play', 'VB'), ('.', '.')]
Error: Main sentence formation is not proper
Total mistakes in the essay: 2


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package tagsets to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!
[nltk_data] Downloading package dependency_treebank to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package dependency_treebank is already up-to-date!


In [None]:
import nltk
from nltk.parse import DependencyGraph

# Download the necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('tagsets')
nltk.download('dependency_treebank')

# Function to check syntactic well-formedness of a sentence
def check_syntactic_wellformedness(sentence):
    print("Sentence:", sentence)

    # Tokenize the sentence
    tokens = nltk.word_tokenize(sentence)
    print("Tokens:", tokens)

    # Perform Part-of-Speech tagging
    pos_tags = nltk.pos_tag(tokens)
    print("POS Tags:", pos_tags)

    # Convert POS tags to string format compatible with DependencyGraph
    dep_str = "\n".join([f"{i+1}\t{token}\t{tag}\t{idx}\t{tag}\t_\t{head}\t_\t_\t_"
                         for i, ((token, tag), (idx, head)) in enumerate(zip(pos_tags, enumerate(range(1, len(pos_tags) + 1))))])

    # Perform dependency parsing
    parser = DependencyGraph(dep_str, top_relation_label='root')

    # Counter for mistakes
    mistake_count = 0

    # Check criteria for main sentence formation
    if not is_main_sentence_formed_properly(pos_tags):
        print("Error: Main sentence formation is not proper")
        mistake_count += 1

    # Check for missing constituents
    if not are_constituents_formed_properly(pos_tags):
        print("Error: Constituents are not formed properly")
        mistake_count += 1

    # Check for subordinating conjunctions
    if not is_subordinating_conjunction_correct(pos_tags):
        print("Error: Subordinating conjunction is used incorrectly")
        mistake_count += 1

    # Check for verb agreement and tense consistency
    if not is_verb_agreement_and_tense_consistent(pos_tags):
        print("Error: Verb agreement and tense consistency issue")
        mistake_count += 1

    # Check for conjunction usage
    if not is_conjunction_usage_correct(pos_tags):
        print("Error: Conjunction usage is incorrect")
        mistake_count += 1

    return mistake_count

# Function to check if main sentence formation is proper
def is_main_sentence_formed_properly(pos_tags):
    # Check if the sentence starts with a valid word
    first_word_tag = pos_tags[0][1]
    if first_word_tag in ['VB', 'VBP', 'VBZ', 'VBG', 'VBD', 'VBN']:  # Verbs
        return False

    # Check for negation adverb before the main verb
    for i in range(1, len(pos_tags)):
        if pos_tags[i][1] == 'VB' and pos_tags[i-1][1] == 'RB' and pos_tags[i-1][0].lower() == 'not':
            return False

    return True

# Function to check if constituents are formed properly
def are_constituents_formed_properly(pos_tags):
    # Check for missing determiners in noun phrases
    for i in range(len(pos_tags)):
        if pos_tags[i][1] == 'NN' and (i == 0 or pos_tags[i-1][1] != 'DT'):
            return False
    return True

# Function to check if subordinating conjunction is used correctly
def is_subordinating_conjunction_correct(pos_tags):
    subordinating_conjunctions = ['when', 'although', 'if', 'because']
    for word, tag in pos_tags:
        if word.lower() in subordinating_conjunctions:
            # Check if the corresponding clause is finite or includes a gerund
            if tag not in ['VBP', 'VBZ', 'VBG', 'VBD', 'VBN']:  # Finite verbs or gerunds
                return False
    return True

# Function to check if verb agreement and tense consistency are correct
def is_verb_agreement_and_tense_consistent(pos_tags):
    for i in range(len(pos_tags)):
        if pos_tags[i][1] in ['VBZ', 'VBG', 'VBD', 'VBN']:  # Verbs in non-base form
            if i == 0 or pos_tags[i-1][1] != 'NN':  # Verb is not preceded by a noun
                return False
    return True

# Function to check if conjunction usage is correct
def is_conjunction_usage_correct(pos_tags):
    for i in range(len(pos_tags)):
        if pos_tags[i][1] == 'CC':  # Conjunction
            if i == 0 or i == len(pos_tags) - 1:  # Conjunction appears at the beginning or end of the sentence
                return False
    return True

# Function to count mistakes in an essay
def count_mistakes_in_essay(essay):
    # Tokenize the essay into sentences
    sentences = nltk.sent_tokenize(essay)

    # Counter for total mistakes in the essay
    total_mistakes = 0

    # Iterate through each sentence
    for sentence in sentences:
        # Check syntactic well-formedness of the sentence
        mistakes_in_sentence = check_syntactic_wellformedness(sentence)

        # Update total mistake count
        total_mistakes += mistakes_in_sentence

    # Print total mistakes in the essay
    print("Total mistakes in the essay:", total_mistakes)

# Example usage
essay = """
The cat sat on the mat.
Because it was raining.
I went to the store.
Dog not want to play.
"""

count_mistakes_in_essay(essay)


Sentence: 
The cat sat on the mat.
Tokens: ['The', 'cat', 'sat', 'on', 'the', 'mat', '.']
POS Tags: [('The', 'DT'), ('cat', 'NN'), ('sat', 'VBD'), ('on', 'IN'), ('the', 'DT'), ('mat', 'NN'), ('.', '.')]
Sentence: Because it was raining.
Tokens: ['Because', 'it', 'was', 'raining', '.']
POS Tags: [('Because', 'IN'), ('it', 'PRP'), ('was', 'VBD'), ('raining', 'VBG'), ('.', '.')]
Error: Subordinating conjunction is used incorrectly
Error: Verb agreement and tense consistency issue
Sentence: I went to the store.
Tokens: ['I', 'went', 'to', 'the', 'store', '.']
POS Tags: [('I', 'PRP'), ('went', 'VBD'), ('to', 'TO'), ('the', 'DT'), ('store', 'NN'), ('.', '.')]
Error: Verb agreement and tense consistency issue
Sentence: Dog not want to play.
Tokens: ['Dog', 'not', 'want', 'to', 'play', '.']
POS Tags: [('Dog', 'NNP'), ('not', 'RB'), ('want', 'VB'), ('to', 'TO'), ('play', 'VB'), ('.', '.')]
Error: Main sentence formation is not proper
Total mistakes in the essay: 4


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package tagsets to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!
[nltk_data] Downloading package dependency_treebank to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package dependency_treebank is already up-to-date!


In [None]:
import nltk
from nltk.parse import DependencyGraph

# Download the necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('tagsets')
nltk.download('dependency_treebank')

# Function to check syntactic well-formedness of a sentence
def check_syntactic_wellformedness(sentence):
    print("Sentence:", sentence)

    # Tokenize the sentence
    tokens = nltk.word_tokenize(sentence)
    print("Tokens:", tokens)

    # Perform Part-of-Speech tagging
    pos_tags = nltk.pos_tag(tokens)
    print("POS Tags:", pos_tags)

    # Convert POS tags to string format compatible with DependencyGraph
    dep_str = "\n".join([f"{i+1}\t{token}\t{tag}\t{idx}\t{tag}\t_\t{head}\t_\t_\t_"
                         for i, ((token, tag), (idx, head)) in enumerate(zip(pos_tags, enumerate(range(1, len(pos_tags) + 1))))])

    # Perform dependency parsing
    parser = DependencyGraph(dep_str, top_relation_label='root')

    # Counter for mistakes
    mistake_count = 0

    # Check criteria for main sentence formation
    if not is_main_sentence_formed_properly(pos_tags):
        print("Error: Main sentence formation is not proper")
        mistake_count += 1

    # Check for missing constituents
    if not are_constituents_formed_properly(pos_tags):
        print("Error: Constituents are not formed properly")
        mistake_count += 1

    # Check for subordinating conjunctions
    if not is_subordinating_conjunction_correct(pos_tags):
        print("Error: Subordinating conjunction is used incorrectly")
        mistake_count += 1

    # Check for verb agreement and tense consistency
    if not is_verb_agreement_and_tense_consistent(pos_tags):
        print("Error: Verb agreement and tense consistency issue")
        mistake_count += 1

    # Check for conjunction usage
    if not is_conjunction_usage_correct(pos_tags):
        print("Error: Conjunction usage is incorrect")
        mistake_count += 1

    return mistake_count

# Function to check if main sentence formation is proper
def is_main_sentence_formed_properly(pos_tags):
    # Check if the sentence starts with a valid word
    first_word_tag = pos_tags[0][1]
    if first_word_tag in ['VB', 'VBP', 'VBZ', 'VBG', 'VBD', 'VBN']:  # Verbs
        return False

    # Check for negation adverb before the main verb
    for i in range(1, len(pos_tags)):
        if pos_tags[i][1] == 'VB' and pos_tags[i-1][1] == 'RB' and pos_tags[i-1][0].lower() == 'not':
            return False

    return True

# Function to check if constituents are formed properly
def are_constituents_formed_properly(pos_tags):
    # Check for missing determiners in noun phrases
    for i in range(len(pos_tags)):
        if pos_tags[i][1] == 'NN' and (i == 0 or pos_tags[i-1][1] != 'DT'):
            return False
    return True

# Function to check if subordinating conjunction is used correctly
def is_subordinating_conjunction_correct(pos_tags):
    subordinating_conjunctions = ['when', 'although', 'if', 'because']
    for word, tag in pos_tags:
        if word.lower() in subordinating_conjunctions:
            # Check if the corresponding clause is finite or includes a gerund
            if tag not in ['VBP', 'VBZ', 'VBG', 'VBD', 'VBN']:  # Finite verbs or gerunds
                return False
    return True

# Function to check if verb agreement and tense consistency are correct
def is_verb_agreement_and_tense_consistent(pos_tags):
    for i in range(len(pos_tags)):
        word, tag = pos_tags[i]
        if tag in ['VBZ', 'VBG', 'VBD', 'VBN']:  # Verbs in non-base form
            if i == 0 or pos_tags[i-1][1] != 'PRP':  # Verb is not preceded by a personal pronoun
                return False
    return True


# Function to check if conjunction usage is correct
def is_conjunction_usage_correct(pos_tags):
    for i in range(len(pos_tags)):
        if pos_tags[i][1] == 'CC':  # Conjunction
            if i == 0 or i == len(pos_tags) - 1:  # Conjunction appears at the beginning or end of the sentence
                return False
    return True

# Function to count mistakes in an essay
def count_mistakes_in_essay(essay):
    # Tokenize the essay into sentences
    sentences = nltk.sent_tokenize(essay)

    # Counter for total mistakes in the essay
    total_mistakes = 0

    # Iterate through each sentence
    for sentence in sentences:
        # Check syntactic well-formedness of the sentence
        mistakes_in_sentence = check_syntactic_wellformedness(sentence)

        # Update total mistake count
        total_mistakes += mistakes_in_sentence

    # Print total mistakes in the essay
    print("Total mistakes in the essay:", total_mistakes)

# Example usage
essay = """
The cat sat on the mat.
Because it was raining.
I went to the store.
Dog not want to play.
"""

count_mistakes_in_essay(essay)


Sentence: 
The cat sat on the mat.
Tokens: ['The', 'cat', 'sat', 'on', 'the', 'mat', '.']
POS Tags: [('The', 'DT'), ('cat', 'NN'), ('sat', 'VBD'), ('on', 'IN'), ('the', 'DT'), ('mat', 'NN'), ('.', '.')]
Error: Verb agreement and tense consistency issue
Sentence: Because it was raining.
Tokens: ['Because', 'it', 'was', 'raining', '.']
POS Tags: [('Because', 'IN'), ('it', 'PRP'), ('was', 'VBD'), ('raining', 'VBG'), ('.', '.')]
Error: Subordinating conjunction is used incorrectly
Error: Verb agreement and tense consistency issue
Sentence: I went to the store.
Tokens: ['I', 'went', 'to', 'the', 'store', '.']
POS Tags: [('I', 'PRP'), ('went', 'VBD'), ('to', 'TO'), ('the', 'DT'), ('store', 'NN'), ('.', '.')]
Sentence: Dog not want to play.
Tokens: ['Dog', 'not', 'want', 'to', 'play', '.']
POS Tags: [('Dog', 'NNP'), ('not', 'RB'), ('want', 'VB'), ('to', 'TO'), ('play', 'VB'), ('.', '.')]
Error: Main sentence formation is not proper
Total mistakes in the essay: 4


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package tagsets to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!
[nltk_data] Downloading package dependency_treebank to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package dependency_treebank is already up-to-date!


In [None]:
import nltk
from nltk.parse import DependencyGraph

# Download the necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('tagsets')
nltk.download('dependency_treebank')

# Function to check syntactic well-formedness of a sentence
def check_syntactic_wellformedness(sentence):
    print("Sentence:", sentence)

    # Tokenize the sentence
    tokens = nltk.word_tokenize(sentence)
    print("Tokens:", tokens)

    # Perform Part-of-Speech tagging
    pos_tags = nltk.pos_tag(tokens)
    print("POS Tags:", pos_tags)

    # Convert POS tags to string format compatible with DependencyGraph
    dep_str = "\n".join([f"{i+1}\t{token}\t{tag}\t{idx}\t{tag}\t_\t{head}\t_\t_\t_"
                         for i, ((token, tag), (idx, head)) in enumerate(zip(pos_tags, enumerate(range(1, len(pos_tags) + 1))))])

    # Perform dependency parsing
    parser = DependencyGraph(dep_str, top_relation_label='root')

    # Counter for mistakes
    mistake_count = 0

    # Check criteria for main sentence formation
    if not is_main_sentence_formed_properly(pos_tags):
        print("Error: Main sentence formation is not proper")
        mistake_count += 1

    # Check for missing constituents
    if not are_constituents_formed_properly(pos_tags):
        print("Error: Constituents are not formed properly")
        mistake_count += 1

    # Check for subordinating conjunctions
    if not is_subordinating_conjunction_correct(pos_tags):
        print("Error: Subordinating conjunction is used incorrectly")
        mistake_count += 1

    # Check for verb agreement and tense consistency
    if not is_verb_agreement_and_tense_consistent(pos_tags):
        print("Error: Verb agreement and tense consistency issue")
        mistake_count += 1

    # Check for conjunction usage
    if not is_conjunction_usage_correct(pos_tags):
        print("Error: Conjunction usage is incorrect")
        mistake_count += 1

    return mistake_count

# Function to check if main sentence formation is proper
def is_main_sentence_formed_properly(pos_tags):
    # Check if the sentence starts with a valid word
    first_word_tag = pos_tags[0][1]
    if first_word_tag in ['VB', 'VBP', 'VBZ', 'VBG', 'VBD', 'VBN']:  # Verbs
        return False

    # Check for negation adverb before the main verb
    for i in range(1, len(pos_tags)):
        if pos_tags[i][1] == 'VB' and pos_tags[i-1][1] == 'RB' and pos_tags[i-1][0].lower() == 'not':
            return False

    return True

# Function to check if constituents are formed properly
def are_constituents_formed_properly(pos_tags):
    # Check for missing determiners in noun phrases
    for i in range(len(pos_tags)):
        if pos_tags[i][1] == 'NN' and (i == 0 or pos_tags[i-1][1] != 'DT'):
            return False
    return True

# Function to check if subordinating conjunction is used correctly
def is_subordinating_conjunction_correct(pos_tags):
    subordinating_conjunctions = ['when', 'although', 'if', 'because']
    for word, tag in pos_tags:
        if word.lower() in subordinating_conjunctions:
            # Check if the corresponding clause is finite or includes a gerund
            if tag not in ['VBP', 'VBZ', 'VBG', 'VBD', 'VBN']:  # Finite verbs or gerunds
                return False
    return True

# Function to check if verb agreement and tense consistency are correct
def is_verb_agreement_and_tense_consistent(pos_tags):
    for i in range(len(pos_tags)):
        word, tag = pos_tags[i]
        if tag in ['VBZ', 'VBP', 'VBD', 'VBN']:  # Verbs in non-base form
            if i == 0 or (pos_tags[i-1][1] != 'PRP' and pos_tags[i-1][1] != 'NN'):  # Verb is not preceded by a personal pronoun or noun
                return False
    return True



# Function to check if conjunction usage is correct
def is_conjunction_usage_correct(pos_tags):
    for i in range(len(pos_tags)):
        if pos_tags[i][1] == 'CC':  # Conjunction
            if i == 0 or i == len(pos_tags) - 1:  # Conjunction appears at the beginning or end of the sentence
                return False
    return True



# # Function to check for plurality agreement of nouns and verbs
# def check_plurality_agreement(pos_tags):
#     # Initialize variables to track noun and verb plurality
#     noun_plurality = None
#     verb_plurality = None

#     # Check for plurality agreement of nouns and verbs
#     for word, tag in pos_tags:
#         if tag.startswith('NN'):  # Nouns
#             if noun_plurality is None:
#                 noun_plurality = tag.endswith('S')
#             elif noun_plurality != tag.endswith('S'):
#                 return False
#         elif tag.startswith('VB'):  # Verbs
#             if verb_plurality is None:
#                 verb_plurality = tag.endswith('S')
#             elif verb_plurality != tag.endswith('S'):
#                 return False
#     return True

# Function to check if there is plural/singular agreement between noun and verb
def is_plural_singular_agreement_correct(pos_tags):
    # Find the index of the first verb in the sentence
    verb_index = next((i for i, (word, tag) in enumerate(pos_tags) if tag.startswith('VB')), None)
    if verb_index is not None:
        # Find the index of the first noun occurring before the verb
        noun_index = next((i for i in range(verb_index) if pos_tags[i][1].startswith('NN')), None)
        if noun_index is not None:
            # Check if the noun and verb agree in plurality
            noun_tag = pos_tags[noun_index][1]
            verb_tag = pos_tags[verb_index][1]
            if noun_tag.endswith('S') and not verb_tag.endswith('S'):  # Noun is plural, but verb is singular
                return False
            elif not noun_tag.endswith('S') and verb_tag.endswith('S'):  # Noun is singular, but verb is plural
                return False
    return True



# Function to count mistakes in an essay
def count_mistakes_in_essay(essay):
    # Tokenize the essay into sentences
    sentences = nltk.sent_tokenize(essay)

    # Counter for total mistakes in the essay
    total_mistakes = 0

    # Iterate through each sentence
    for sentence in sentences:
        print('=================================================')
        # Check syntactic well-formedness of the sentence
        mistakes_in_sentence = check_syntactic_wellformedness(sentence)

        # Update total mistake count
        total_mistakes += mistakes_in_sentence

    # Print total mistakes in the essay
    print("Total mistakes in the essay:", total_mistakes)

# Example usage
# essay = """
# The cat sat on the mat.
# Because it was raining.
# I went to the store.
# Dog not want to play.
# """
essay = ''''This is an important aspect of today time.
This products rathen are not much better, but today is not important the really character of the product, but only the money and the client not rappresented the important actor in this process.
Every day any people buy same products that is not rappresented the your necessity, but is only important buy any product.
To explain this argoment in my nation, at the television, there is an program that discuss of the problem rappresented by this.
More people go to this program television to talk about your problem, that is very radicate in my nation.
The modern society rappresented the perfect ambient to influenced the minds of all the person.
In my self is present the reasons of this statement, that is one of the problem of the life.
But not all the people and the time is in accord with this problem, because any time the person is too according with the make products.
Thus I agree with this statement, because this event is present in my life every day, and rappresented the problem with I do fighting.
But to explain all the aspect about this argoment is very inportant to illustre any examples.
The television programs that every day introduce in the minds more argoment, news and other problem, or breaking news, is the first actor in this process.
This opinion rappresented my self in my life, because for me the life of all the people is not possible to influence by the activity of any person.
The society lose the propriety when this problem will rappresent the must argoment of the talk and the life of the people, because as very difficult live at a time with this argoment.
The my request is that the new politics discuss about this problem. '''
count_mistakes_in_essay(essay)


Sentence: 'This is an important aspect of today time.
Tokens: ["'This", 'is', 'an', 'important', 'aspect', 'of', 'today', 'time', '.']
POS Tags: [("'This", 'NN'), ('is', 'VBZ'), ('an', 'DT'), ('important', 'JJ'), ('aspect', 'NN'), ('of', 'IN'), ('today', 'NN'), ('time', 'NN'), ('.', '.')]
Error: Constituents are not formed properly
Sentence: This products rathen are not much better, but today is not important the really character of the product, but only the money and the client not rappresented the important actor in this process.
Tokens: ['This', 'products', 'rathen', 'are', 'not', 'much', 'better', ',', 'but', 'today', 'is', 'not', 'important', 'the', 'really', 'character', 'of', 'the', 'product', ',', 'but', 'only', 'the', 'money', 'and', 'the', 'client', 'not', 'rappresented', 'the', 'important', 'actor', 'in', 'this', 'process', '.']
POS Tags: [('This', 'DT'), ('products', 'NNS'), ('rathen', 'NN'), ('are', 'VBP'), ('not', 'RB'), ('much', 'JJ'), ('better', 'RBR'), (',', ','), ('bu

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package tagsets to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!
[nltk_data] Downloading package dependency_treebank to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package dependency_treebank is already up-to-date!


In [None]:
import nltk
from nltk.parse import DependencyGraph

# Download the necessary resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('tagsets')
nltk.download('dependency_treebank')

# Function to check syntactic well-formedness of a sentence
def check_syntactic_wellformedness(sentence):
    print("Sentence:", sentence)

    # Tokenize the sentence
    tokens = nltk.word_tokenize(sentence)
    print("Tokens:", tokens)

    # Perform Part-of-Speech tagging
    pos_tags = nltk.pos_tag(tokens)
    print("POS Tags:", pos_tags)

    # Convert POS tags to string format compatible with DependencyGraph
    dep_str = "\n".join([f"{i+1}\t{token}\t{tag}\t{idx}\t{tag}\t_\t{head}\t_\t_\t_"
                         for i, ((token, tag), (idx, head)) in enumerate(zip(pos_tags, enumerate(range(1, len(pos_tags) + 1))))])

    # Perform dependency parsing
    parser = DependencyGraph(dep_str, top_relation_label='root')

    # Counter for mistakes
    mistake_count = 0

    # Check criteria for main sentence formation
    if not is_main_sentence_formed_properly(pos_tags):
        print("Error: Main sentence formation is not proper")
        mistake_count += 1

    # Check for missing constituents
    if not are_constituents_formed_properly(pos_tags):
        print("Error: Constituents are not formed properly")
        mistake_count += 1

    # Check for subordinating conjunctions
    if not is_subordinating_conjunction_correct(pos_tags):
        print("Error: Subordinating conjunction is used incorrectly")
        mistake_count += 1

    # Check for verb agreement and tense consistency
    if not is_verb_agreement_and_tense_consistent(pos_tags):
        print("Error: Verb agreement and tense consistency issue")
        mistake_count += 1

    # Check for conjunction usage
    if not is_conjunction_usage_correct(pos_tags):
        print("Error: Conjunction usage is incorrect")
        mistake_count += 1

    # Check for plurality agreement of noun with verb
    if not is_plural_singular_agreement_correct(pos_tags):
        print("Error: Plurality agreement of noun with verb is incorrect")
        mistake_count += 1

    return mistake_count

# Function to check if main sentence formation is proper
def is_main_sentence_formed_properly(pos_tags):
    # Check if the sentence starts with a valid word
    first_word_tag = pos_tags[0][1]
    if first_word_tag in ['VB', 'VBP', 'VBZ', 'VBG', 'VBD', 'VBN']:  # Verbs
        return False

    # Check for negation adverb before the main verb
    for i in range(1, len(pos_tags)):
        if pos_tags[i][1] == 'VB' and pos_tags[i-1][1] == 'RB' and pos_tags[i-1][0].lower() == 'not':
            return False

    return True

# Function to check if constituents are formed properly
def are_constituents_formed_properly(pos_tags):
    # Check for missing determiners in noun phrases
    for i in range(len(pos_tags)):
        if pos_tags[i][1] == 'NN' and (i == 0 or pos_tags[i-1][1] != 'DT'):
            return False
    return True

# Function to check if subordinating conjunction is used correctly
def is_subordinating_conjunction_correct(pos_tags):
    subordinating_conjunctions = ['when', 'although', 'if', 'because']
    for word, tag in pos_tags:
        if word.lower() in subordinating_conjunctions:
            # Check if the corresponding clause is finite or includes a gerund
            if tag not in ['VBP', 'VBZ', 'VBG', 'VBD', 'VBN']:  # Finite verbs or gerunds
                return False
    return True

# Function to check if verb agreement and tense consistency are correct
def is_verb_agreement_and_tense_consistent(pos_tags):
    for i in range(len(pos_tags)):
        word, tag = pos_tags[i]
        if tag in ['VBZ', 'VBP', 'VBD', 'VBN']:  # Verbs in non-base form
            if i == 0 or (pos_tags[i-1][1] != 'PRP' and pos_tags[i-1][1] != 'NN'):  # Verb is not preceded by a personal pronoun or noun
                return False
    return True

# Function to check if conjunction usage is correct
def is_conjunction_usage_correct(pos_tags):
    for i in range(len(pos_tags)):
        if pos_tags[i][1] == 'CC':  # Conjunction
            if i == 0 or i == len(pos_tags) - 1:  # Conjunction appears at the beginning or end of the sentence
                return False
    return True

# Function to check if there is plural/singular agreement between noun and verb
def is_plural_singular_agreement_correct(pos_tags):
    # Find the index of the first verb in the sentence
    verb_index = next((i for i, (word, tag) in enumerate(pos_tags) if tag.startswith('VB')), None)
    if verb_index is not None:
        # Find the index of the first noun occurring before the verb
        noun_index = next((i for i in range(verb_index) if pos_tags[i][1].startswith('NN')), None)
        if noun_index is not None:
            # Check if the noun and verb agree in plurality
            noun_tag = pos_tags[noun_index][1]
            verb_tag = pos_tags[verb_index][1]
            if noun_tag.endswith('S') and not verb_tag.endswith('S'):  # Noun is plural, but verb is singular
                return False
            elif not noun_tag.endswith('S') and verb_tag.endswith('S'):  # Noun is singular, but verb is plural
                return False
    return True

# Function to count mistakes in an essay
def count_mistakes_in_essay(essay):
    # Tokenize the essay into sentences
    sentences = nltk.sent_tokenize(essay)

    # Counter for total mistakes in the essay
    total_mistakes = 0

    # Iterate through each sentence
    for sentence in sentences:
        print('=================================================')
        # Check syntactic well-formedness of the sentence
        mistakes_in_sentence = check_syntactic_wellformedness(sentence)

        # Update total mistake count
        total_mistakes += mistakes_in_sentence

    # Print total sentences in the essay
    print("Length:", len(sentences))

    # Print total mistakes in the essay
    print("Total mistakes in the essay:", total_mistakes)


essay = ''''This is an important aspect of today time.
This products rathen are not much better, but today is not important the really character of the product, but only the money and the client not rappresented the important actor in this process.
Every day any people buy same products that is not rappresented the your necessity, but is only important buy any product.
To explain this argoment in my nation, at the television, there is an program that discuss of the problem rappresented by this.
More people go to this program television to talk about your problem, that is very radicate in my nation.
The modern society rappresented the perfect ambient to influenced the minds of all the person.
In my self is present the reasons of this statement, that is one of the problem of the life.
But not all the people and the time is in accord with this problem, because any time the person is too according with the make products.
Thus I agree with this statement, because this event is present in my life every day, and rappresented the problem with I do fighting.
But to explain all the aspect about this argoment is very inportant to illustre any examples.
The television programs that every day introduce in the minds more argoment, news and other problem, or breaking news, is the first actor in this process.
This opinion rappresented my self in my life, because for me the life of all the people is not possible to influence by the activity of any person.
The society lose the propriety when this problem will rappresent the must argoment of the talk and the life of the people, because as very difficult live at a time with this argoment.
The my request is that the new politics discuss about this problem. '''

count_mistakes_in_essay(essay)


Sentence: 'This is an important aspect of today time.
Tokens: ["'This", 'is', 'an', 'important', 'aspect', 'of', 'today', 'time', '.']
POS Tags: [("'This", 'NN'), ('is', 'VBZ'), ('an', 'DT'), ('important', 'JJ'), ('aspect', 'NN'), ('of', 'IN'), ('today', 'NN'), ('time', 'NN'), ('.', '.')]
Error: Constituents are not formed properly
Sentence: This products rathen are not much better, but today is not important the really character of the product, but only the money and the client not rappresented the important actor in this process.
Tokens: ['This', 'products', 'rathen', 'are', 'not', 'much', 'better', ',', 'but', 'today', 'is', 'not', 'important', 'the', 'really', 'character', 'of', 'the', 'product', ',', 'but', 'only', 'the', 'money', 'and', 'the', 'client', 'not', 'rappresented', 'the', 'important', 'actor', 'in', 'this', 'process', '.']
POS Tags: [('This', 'DT'), ('products', 'NNS'), ('rathen', 'NN'), ('are', 'VBP'), ('not', 'RB'), ('much', 'JJ'), ('better', 'RBR'), (',', ','), ('bu

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package tagsets to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package tagsets is already up-to-date!
[nltk_data] Downloading package dependency_treebank to
[nltk_data]     /Users/shwetaparihar/nltk_data...
[nltk_data]   Package dependency_treebank is already up-to-date!
