# Assessment of Typewise and ChatGPT on their GED Performance

This notebook can be used to assess the grammatical error detecion performance of Typewise and ChatGPT using the [Corpus DYS](https://nakala.fr/10.34847/nkl.ced0370u) of French dyslexic texts [(Bodard et al. 2022)](https://link.springer.com/10.1007/s10579-022-09603-6). It computes Precision/Recall/F1/Accuracy and counts the missed errors per error type for each system.



In [1]:
!pip install -Uqq ipdb
import ipdb

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m775.8/775.8 KB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.4/386.4 KB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m33.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires ipython~=7.9.0, but you have ipython 8.8.0 which is incompatible.[0m[31m
[0m

In [2]:
!pip install sacremoses

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sacremoses
  Downloading sacremoses-0.0.53.tar.gz (880 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m880.6/880.6 KB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Created wheel for sacremoses: filename=sacremoses-0.0.53-py3-none-any.whl size=895260 sha256=d79cef30f88e7dfb297f252ed0f50659143b55fe457e17c5b33d3174ff416397
  Stored in directory: /root/.cache/pip/wheels/82/ab/9b/c15899bf659ba74f623ac776e861cf2eb8608c1825ddec66a4
Successfully built sacremoses
Installing collected packages: sacremoses
Successfully installed sacremoses-0.0.53


In [3]:
import pandas as pd
import re
import requests
import json
import csv
import ipdb
from sklearn.model_selection import train_test_split
from sacremoses import MosesTokenizer
from collections import Counter

## Data Preparation

### Utility Classes and Functions for Data Preparation

In [4]:
class IndexedToken():

  def __init__(self, token, offset, length, label=None, types=None):
    self.token = token
    self.char_offset = offset
    self.length = length
    self.label = label
    self.types = types

  def __repr__(self):
    return str((self.token, self.char_offset, self.length, self.label))

  def set_label(self, label):
    self.label = label

  def set_types(self, types):
    self.types = types

class TokenizedSegment():

  def __init__(self, tokens):
    self.indexed_tokens = tokens

  def __repr__(self):
    return str(self.indexed_tokens)
  
  def __iter__(self):
    return TokenizedSegmentIter(self)

  def __getitem__(self, index):
    return self.indexed_tokens[index]



class TokenizedSegmentIter():
  def __init__(self, tokenized_segment):
    self._indexed_tokens = tokenized_segment.indexed_tokens
    self._current_index = 0

  def __iter__(self):
    return self

  def __next__(self):
    if self._current_index < len(self._indexed_tokens):
      token = self._indexed_tokens[self._current_index]
      self._current_index += 1
      return token
    raise StopIteration

In [5]:
def tokenize_with_offsets(sentence):
    tokenizer = MosesTokenizer()
    tokens = tokenizer.tokenize(sentence.strip())
    token_offsets = []
    current_offset = 0
    for i, token in enumerate(tokens):
        token_offset = IndexedToken(token, current_offset, len(token))
        token_offsets.append(token_offset)
        current_offset += len(token)
        while (current_offset + 1) < len(sentence) and sentence[current_offset].isspace():
          current_offset += 1
    tokenized_sent = TokenizedSegment(token_offsets)
    return tokenized_sent

### Gold Data

In [6]:
# if an error form contains multiple error types, there are missing values in the csv

def fill_missing_values(infile, outfile):
    rows = []
    # Open the CSV file and read the rows
    with open(infile, 'r', encoding='utf-8') as file:
        reader = csv.reader(file)
        for row in reader:
            rows.append(row)
    # Iterate through the rows and fill in missing values
    for i, row in enumerate(rows):
        # leave incomplete rows because of omissions unchanged
        if not row[1] and row[2]:
          continue
        # leave incomplete rows because of additions unchanged
        elif row[1] and not row[2]:
          continue
        # fill in missing values if multiple types are annotated for an error
        elif not row[1] and not row[2] and not row[3]:
          for j, value in enumerate(row):
            if not value:
              row[j] = rows[i-1][j]
    # Write the updated rows back to the CSV file
    with open(outfile, 'w', encoding='utf-8') as file:
        writer = csv.writer(file)
        for row in rows:
            writer.writerow(row)

In [10]:
def clean_annotated_segment(segment):
  # remove annotated word omissions
  segment = re.sub(r"<err>\[.+\]</err>\s?", "", segment)
  # remove err tags
  segment = segment.replace("<err>", "").replace("</err>", "")

  return segment


def read_gold_data(path):
  df = pd.read_csv(path)

  # Create a new column with the cleaned sentences
  df["Phrase ou syntagme cleaned"] = df["Phrase ou syntagme"].apply(lambda x: clean_annotated_segment(x))

  # Group the dataframe by the cleaned sentences and keep all the original rows
  grouped = df.groupby("Phrase ou syntagme cleaned")

  merged_rows = []

  for name, group in grouped:
      errors = []

      for index, row in group.iterrows():
          # Get the original sentence and the error
          sentence = row["Phrase ou syntagme cleaned"].strip()
          annotated_sentence = row["Phrase ou syntagme"].strip()

          error = row["Forme erronée"] if row["Forme erronée"] else ""        

          # Find the character offset and end indexes of the error in the sentence
          if row["Type de l'erreur"] == "omission mot":
            correction = row["Forme correcte"]
            try:
              start = annotated_sentence.index(f"<err>[{correction.strip()}]</err>")
              end = start
            except AttributeError:
              continue
          else:
            start = sentence.index(f"{error.strip()}")
            end = start + len(f"{error.strip()}")
          
          errors.append({
              "offset": start,
              "length": end - start,
              "type": row["Type de l'erreur"],
              "error_form": error
          })

      merged_rows.append({
          "sentence": sentence,
          "errors": errors
      })
  return merged_rows

In [11]:
#fill_missing_values("annotation_erreurs_corpus_dys.csv", "annotation_erreurs_corpus_dys_filled.csv")

segments = read_gold_data("annotation_erreurs_corpus_dys_filled.csv")

train, test = train_test_split(segments, test_size=0.4, random_state=42)

with open("gold_data_test.jsonl", "w", encoding="utf-8") as outf:
  for seg in test:
    json_line = json.dumps(seg)
    outf.write(f"{json_line}\n")



In [23]:
# prepare gold data for comparison with system outputs

def prepare_gold_data(data):
  gold_data = []
  for segment in data:
    tokenized_sent = tokenize_with_offsets(segment["sentence"])

    error_offsets = [error["offset"] for error in segment["errors"]]

    error_tokens = [error["error_form"] for error in segment["errors"]]

    tokens = [tok.token for tok in tokenized_sent]

    for err_tok in error_tokens:
      if err_tok not in tokens:
        continue
    errors = {}
    for error in segment["errors"]:
      if error["offset"] in errors.keys():
        errors[error["offset"]].append(error["type"])
      else:
        errors[error["offset"]] = [error["type"]] 
    for i, token in enumerate(tokenized_sent):
        if token.char_offset in errors.keys():
          tokenized_sent[i].set_label("err")
          tokenized_sent[i].set_types(errors[token.char_offset])
        else:
          tokenized_sent[i].set_label("corr")
    
    gold_data.append(tokenized_sent)

  return gold_data


### Typewise Data

In [14]:
# Use Typewise API to annotate test data and dump the results to json files

def annotate_with_typewise(data, correction_outfile, grammar_outfile):

  annotated_data_correction = []
  annotated_data_grammar = []


  correction_url = "https://api.typewise.ai/latest/correction/whole_sentence"
  grammar_correction_url = "https://api.typewise.ai/latest/grammar_correction/whole_text_grammar_correction"
  
  for segment in data:

    payload = {"languages": ["fr"],
               "text": segment["sentence"].strip(),
               "remove_low_prob_tokens": False}

    correction_response = requests.post(correction_url, json=payload).json()
    grammar_response = requests.post(grammar_correction_url, json=payload).json()

    annotated_data_correction.append(correction_response)
    annotated_data_grammar.append(grammar_response)
  
  correction = {"responses": annotated_data_correction}
  grammar = {"responses": annotated_data_grammar}


  with open(correction_outfile, 'w', encoding='utf-8') as corr, open(grammar_outfile, 'w', encoding='utf-8') as gram:
    json.dump(correction, corr, indent=2)
    json.dump(grammar, gram, indent=2)





In [15]:
# prepare annotated data for comparison with gold data

def prepare_annotations_typewise(correction_path, grammar_path):
  with open(correction_path, 'r', encoding='utf-8') as correction, open(grammar_path, 'r', encoding='utf-8') as grammar:
    correction_data = json.load(correction)
    grammar_data = json.load(grammar)

  annotated_data = []
  for correction_response, grammar_response in zip(correction_data["responses"], grammar_data["responses"]):

    error_offsets = [match["start_index"] for match in correction_response["tokens"]]
    error_offsets.extend([match["startIndex"] for match in grammar_response["matches"]])

    tokenized_sent = tokenize_with_offsets(correction_response["original_text"].strip())

    # for each IndexedToken, if offset is in error_offsets, set the label to "err"
    for i, token in enumerate(tokenized_sent):
      if token.char_offset in error_offsets:
        tokenized_sent[i].set_label("err")
      else:
        tokenized_sent[i].set_label("corr")
    
    annotated_data.append(tokenized_sent)

  return annotated_data

In [None]:
# only to showcase how it was annotated, data for reproduction are in the /data folder
annotate_with_typewise(test, 'data/typewise_annotated_correction.json', 'data/typewise_annotated_grammar.json')

### ChatGPT Data

In [None]:
# print test data for ChatGPT

i = 0

for segment in test:
  if (i % 2) == 0:
    print("\n\n")
  print(segment["sentence"])
  i += 1





et tous le monde été choqué car je fesé tous se que je voulais
puis au fur et mesure des années a cause de l’exode rural l’industrie tertiaire est devenu très importante ainsi réduisent le primaire a seulement 3 pourcent de la population actives



il s’arrete sur le bas côté
tu rigol toi qui ne veu aps rété avec té grenparen qui n'on que 65 ans tu ne va pas me dir que tu est en retar de troi heur  a ceuse d'une pérsone rencontré dans la rue comme sa



puis j’ai une semaine de vacance et je risque d’aller faire du velo.
Suite a pluseurs  rechercher et remise en quetion ! sur ma situiation.



L'on pouré mettre en cause les professeur qui dé le colége dégoûte beaucoup d'élève ,
dacor dacor alé raconte la moi séte istoire



si microsoft  baise le prix de ses pack xbox 360 ses car la demende en frence de set consol est peu inportente donc le prix baiss
pour prepare un cheval il faut un cur sabot , une brosse dure , une brosse bousse , pine , et equipement du cheval c est un tapi un f

In [16]:
def get_chatGPT_error_offsets(annotated_segment):
  sent = annotated_segment["sentence"]

  current_offset = 0
  error_offsets = []
  

  for token in annotated_segment["tokens"]:
    # check index of token (0 if no whitespace, 1 if whitespace before token)
    # sometimes, chatGPT corrected erronous tokens instead of just labelling, then the substring doesn't match with the token in the source sentence
    if token[0] not in sent:
      error_offsets.append((current_offset + len(sent) - len(sent.lstrip())))
      continue
    index = sent.index(token[0])
    # add index to current_offset
    current_offset += index
    # add offset to error_offsets if the token is labelled as "err"
    if token[1] == "err":
      error_offsets.append(current_offset)
    # add length of token to current_offset
    current_offset += len(token[0])
    # cut token from input sentence
    sent = sent[index:].lstrip().lstrip(token[0])
  
  return error_offsets
    

In [17]:
def prepare_annotations_chatGPT(path):
  prepared_data = []
  annotated_segs = []
  with open(path, 'r', encoding='utf-8') as inf:
    for line in inf:
      annotated_segs.append(json.loads(line))

  # TODO: 
  for seg in annotated_segs:
    # tokenize segment
    tokenized_seg = tokenize_with_offsets(seg["sentence"])

    # TODO: get offsets for errors
    error_offsets = get_chatGPT_error_offsets(seg)
    # TODO: set labels for each token
    for i, token in enumerate(tokenized_seg):
        if token.char_offset in error_offsets:
          tokenized_seg[i].set_label("err")
        else:
          tokenized_seg[i].set_label("corr")
    
    prepared_data.append(tokenized_seg)

  return prepared_data


## Evaluate Typewise and ChatGPT

### Compute Performance in Terms of P/R/F1/ACC

In [18]:
def compute_stats(gold_data, test_data):
  total_n = 0
  tp, fp, tn, fn = 0, 0, 0, 0
  for gold, test in zip(gold_data, test_data):
    for gold_token, test_token in zip(gold, test):
      total_n += 1
      if gold_token.label == "err" and test_token.label == "err":
        tp += 1
      elif gold_token.label == "corr" and test_token.label == "corr":
        tn += 1
      elif gold_token.label == "corr" and test_token.label == "err":
        fp += 1
      elif gold_token.label == "err" and test_token.label == "corr":
        fn += 1

  try:
    precision = tp / (tp + fp)
  except ZeroDivisionError:
    precision = 0
  try:
    recall = tp / (tp + fn)
  except ZeroDivisionError:
    recall = 0
  try:
    f1 = 2 * ((precision * recall) / (precision + recall))
  except ZeroDivisionError:
    f1 = 0
  try:
    acc = (tp + tn) / total_n
  except ZeroDivisionError:
    acc = 0

  return precision, recall, f1, acc



In [19]:
def get_percentages(gold_counts, test_counts):
  percentages = {}
  for key, gold_count in gold_counts.items():
    test_count = test_counts[key]
    perc = test_count / gold_count * 100
    percentages[key] = perc
  return percentages

In [24]:
# load data for evaluation
typewise_data = prepare_annotations_typewise('data/typewise_annotated_correction.json', 'data/typewise_annotated_grammar.json')
chatGPT_data = prepare_annotations_chatGPT("data/annotated_chatGPT_clean.jsonl")
gold_data  = prepare_gold_data(test)

In [25]:
# compute stats for both systems
typewise_p, typewise_r, typewise_f1, typewise_acc = compute_stats(gold_data, typewise_data)
print("Typewise")
print("-----------------------------")
print(f"Precision: {typewise_p}\nRecall: {typewise_r}\nF1: {typewise_f1}\nAccuracy: {typewise_acc}")
print("=============================\n\n")


chatGPT_p, chatGPT_r, chatGPT_f1, chatGPT_acc = compute_stats(gold_data, chatGPT_data)
print("ChatGPT")
print("-----------------------------")
print(f"Precision: {chatGPT_p}\nRecall: {chatGPT_r}\nF1: {chatGPT_f1}\nAccuracy: {chatGPT_acc}")
print("=============================")

Typewise
-----------------------------
Precision: 0.652542372881356
Recall: 0.719626168224299
F1: 0.6844444444444445
Accuracy: 0.8301435406698564


ChatGPT
-----------------------------
Precision: 0.7394270122783083
Recall: 0.7265415549597856
F1: 0.7329276538201487
Accuracy: 0.8645869043537882


### Count Misses and Hits per Error Type

####Error Categorization according to Bodard et al 2022:

Mapping to annotation guide of corpus DYS:

- Wrong grapheme, silent letters and lexical morphograms: lettre muette + phonétisation
- Inflection errors: accord + conjugaison
- Confusion between graphemes phonetically close: approximation
- Homophones: homophones
- Split word or run-on: segmentation + apostrophe + trait d'union
- Capitalization: majuscule
- Omission: omission
- Substitution: substitution
- Word omission or repetition: omission mot + ajout mot + déplacement mot
- Addition: ajout
- Transposition: transposition
- Wrong lexical choice: substitution mots
- Liaison: liaison
- Unrecognized word: mot non reconnu
- Shifting: déplacement



In [26]:
ERROR_TYPES = {
    "lettre muette": "Wrong grapheme, silent letters and lexical morphograms",
    "phonétisation": "Wrong grapheme, silent letters and lexical morphograms",
    "accord": "Inflection errors",
    "conjugaison": "Inflection errors",
    "approximation": "Confusion between graphemes phonetically close",
    "homophones": "Homophones",
    "segmentation": "Split word or run-on",
    "apostrophe": "Split word or run-on",
    "trait d'union": "Split word or run-on",
    "majuscule": "Capitalization",
    "omission": "Omission",
    "substitution": "Substitution",
    "omission mot": "Word omission or repetition",
    "ajout mot": "Word omission or repetition",
    "déplacement mot": "Word omission or repetition",
    "ajout": "Addition",
    "transposition": "Transposition",
    "substitution mots": "Wrong lexical choice",
    "liaison": "Liaison",
    "mot non reconnu": "Unrecognized word",
    "déplacement": "Shifting"
}

In [28]:
from collections import Counter

def get_all_error_types(path):
  df = pd.read_csv(path)

  # Group the dataframe by the cleaned sentences and keep all the original rows
  grouped = df.groupby("Type de l'erreur")
  names = []
  # Iterate over the grouped dataframe
  for name, group in grouped:
    for row in group.iterrows():
      names.append(name)
  
  return Counter(names)

error_types = get_all_error_types("data/annotation_erreurs_corpus_dys.csv")
total_errs = 0
for err, count in error_types.items():
  print(err, count)
  total_errs += count
print(len(error_types))
print(total_errs)

accord 534
ajout 37
ajout mot 10
apostrophe 117
approximation 473
conjugaison 260
déplacement 2
déplacement mot 1
homophones 334
lettre muette 251
liaison 6
majuscule 93
mot non reconnu 5
omission 90
omission mot 27
phonétisation 556
segmentation 47
substitution 47
substitution mots 17
trait d'union 24
transposition 30
21
2961


In [29]:
# Error analysis for typewise & chatGPT
def analyze_errors(gold_data, test_data):
  fn_error_types = []
  tp_error_types = []
  fn, tp = 0, 0

  for gold, test in zip(gold_data, test_data):
    for gold_token, test_token in zip(gold, test):
      # hits
      if gold_token.label == "err" and test_token.label == "err":
        tp += 1
        tp_error_types.extend([ERROR_TYPES[err] for err in gold_token.types])
      # misses
      elif gold_token.label == "err" and test_token.label == "corr":
        fn += 1
        fn_error_types.extend([ERROR_TYPES[err] for err in gold_token.types])

  fn_type_count = Counter(fn_error_types)
  tp_type_count = Counter(tp_error_types)

  return fn_type_count, tp_type_count, fn, tp

In [30]:
def get_error_types_in_prepared_gold_data(data):
  error_types = []
  for tok_sent in data:
    for tok in tok_sent:
      if tok.label == "err":
        error_types.extend([ERROR_TYPES[err] for err in tok.types])
  err_type_count = Counter(error_types)
  return err_type_count


In [31]:
counted_error_types_gold = get_error_types_in_prepared_gold_data(gold_data)
for t, c in counted_error_types_gold.items():
  print(f"{t}: {c}")

Homophones: 103
Confusion between graphemes phonetically close: 183
Inflection errors: 269
Wrong grapheme, silent letters and lexical morphograms: 316
Word omission or repetition: 13
Capitalization: 41
Transposition: 14
Omission: 33
Split word or run-on: 73
Substitution: 25
Addition: 9
Wrong lexical choice: 6
Liaison: 2
Shifting: 1
Unrecognized word: 4


In [32]:
typewise_fn_type_count, typewise_tp_type_count, fn, tp = analyze_errors(gold_data, typewise_data)

print(f"Number of Misses by Typewise per Error Type")
print(f"-------------------------------------------")
for key, count in typewise_fn_type_count.items():
  print(f"{key}: {count}")
print(f"-------------------------------------------")

print(f"FNs: {typewise_fn_type_count}")
print(f"TPs: {typewise_tp_type_count}")
print(f"#FNs: {fn}")
print(f"#TPs: {tp}")

Number of Misses by Typewise per Error Type
-------------------------------------------
Homophones: 45
Word omission or repetition: 5
Inflection errors: 100
Confusion between graphemes phonetically close: 24
Transposition: 4
Substitution: 11
Wrong grapheme, silent letters and lexical morphograms: 41
Split word or run-on: 5
Wrong lexical choice: 5
Capitalization: 2
Addition: 2
Omission: 2
Unrecognized word: 1
-------------------------------------------
FNs: Counter({'Inflection errors': 100, 'Homophones': 45, 'Wrong grapheme, silent letters and lexical morphograms': 41, 'Confusion between graphemes phonetically close': 24, 'Substitution': 11, 'Word omission or repetition': 5, 'Split word or run-on': 5, 'Wrong lexical choice': 5, 'Transposition': 4, 'Capitalization': 2, 'Addition': 2, 'Omission': 2, 'Unrecognized word': 1})
TPs: Counter({'Wrong grapheme, silent letters and lexical morphograms': 275, 'Inflection errors': 169, 'Confusion between graphemes phonetically close': 159, 'Split w

In [33]:
typewise_fn_perc = get_percentages(counted_error_types_gold, typewise_fn_type_count)

ordered = dict(sorted(typewise_fn_perc.items(), key=lambda item: item[1], reverse=True))
for key, perc in ordered.items():
  print(f"{key}: {perc}")

Wrong lexical choice: 83.33333333333334
Substitution: 44.0
Homophones: 43.689320388349515
Word omission or repetition: 38.46153846153847
Inflection errors: 37.174721189591075
Transposition: 28.57142857142857
Unrecognized word: 25.0
Addition: 22.22222222222222
Confusion between graphemes phonetically close: 13.114754098360656
Wrong grapheme, silent letters and lexical morphograms: 12.974683544303797
Split word or run-on: 6.8493150684931505
Omission: 6.0606060606060606
Capitalization: 4.878048780487805
Liaison: 0.0
Shifting: 0.0


In [34]:
typewise_tp_perc = get_percentages(counted_error_types_gold, typewise_tp_type_count)

ordered = dict(sorted(typewise_tp_perc.items(), key=lambda item: item[1], reverse=True))
for key, perc in ordered.items():
  print(f"{key}: {perc}")

Liaison: 100.0
Shifting: 100.0
Capitalization: 95.1219512195122
Omission: 93.93939393939394
Split word or run-on: 93.15068493150685
Wrong grapheme, silent letters and lexical morphograms: 87.0253164556962
Confusion between graphemes phonetically close: 86.88524590163934
Addition: 77.77777777777779
Unrecognized word: 75.0
Transposition: 71.42857142857143
Inflection errors: 62.825278810408925
Word omission or repetition: 61.53846153846154
Homophones: 56.310679611650485
Substitution: 56.00000000000001
Wrong lexical choice: 16.666666666666664


In [35]:
chatGPT_fn_type_count, chatGPT_tp_type_count, fn, tp = analyze_errors(gold_data, chatGPT_data)

print(f"Number of Misses by ChatGPT per Error Type")
print(f"-------------------------------------------")
for key, count in chatGPT_fn_type_count.items():
  print(f"{key}: {count}")
print(f"-------------------------------------------")

print(f"FNs: {chatGPT_fn_type_count}")
print(f"TPs: {chatGPT_tp_type_count}")
print(f"#FNs: {fn}")
print(f"#TPs: {tp}")

Number of Misses by ChatGPT per Error Type
-------------------------------------------
Homophones: 43
Inflection errors: 79
Confusion between graphemes phonetically close: 33
Capitalization: 27
Split word or run-on: 13
Wrong grapheme, silent letters and lexical morphograms: 21
Wrong lexical choice: 5
Word omission or repetition: 3
Substitution: 4
Addition: 1
Transposition: 1
-------------------------------------------
FNs: Counter({'Inflection errors': 79, 'Homophones': 43, 'Confusion between graphemes phonetically close': 33, 'Capitalization': 27, 'Wrong grapheme, silent letters and lexical morphograms': 21, 'Split word or run-on': 13, 'Wrong lexical choice': 5, 'Substitution': 4, 'Word omission or repetition': 3, 'Addition': 1, 'Transposition': 1})
TPs: Counter({'Wrong grapheme, silent letters and lexical morphograms': 293, 'Inflection errors': 189, 'Confusion between graphemes phonetically close': 150, 'Homophones': 60, 'Split word or run-on': 60, 'Omission': 33, 'Substitution': 21,

In [36]:
chatGPT_fn_perc = get_percentages(counted_error_types_gold, chatGPT_fn_type_count)

ordered = dict(sorted(chatGPT_fn_perc.items(), key=lambda item: item[1], reverse=True))
for key, perc in ordered.items():
  print(f"{key}: {perc}")

Wrong lexical choice: 83.33333333333334
Capitalization: 65.85365853658537
Homophones: 41.74757281553398
Inflection errors: 29.36802973977695
Word omission or repetition: 23.076923076923077
Confusion between graphemes phonetically close: 18.0327868852459
Split word or run-on: 17.80821917808219
Substitution: 16.0
Addition: 11.11111111111111
Transposition: 7.142857142857142
Wrong grapheme, silent letters and lexical morphograms: 6.645569620253164
Omission: 0.0
Liaison: 0.0
Shifting: 0.0
Unrecognized word: 0.0


In [37]:
chatGPT_tp_perc = get_percentages(counted_error_types_gold, chatGPT_tp_type_count)

ordered = dict(sorted(chatGPT_tp_perc.items(), key=lambda item: item[1], reverse=True))
for key, perc in ordered.items():
  print(f"{key}: {perc}")

Omission: 100.0
Liaison: 100.0
Shifting: 100.0
Unrecognized word: 100.0
Transposition: 92.85714285714286
Wrong grapheme, silent letters and lexical morphograms: 92.72151898734177
Addition: 88.88888888888889
Substitution: 84.0
Split word or run-on: 82.1917808219178
Confusion between graphemes phonetically close: 81.9672131147541
Word omission or repetition: 76.92307692307693
Inflection errors: 70.26022304832715
Homophones: 58.252427184466015
Capitalization: 34.146341463414636
Wrong lexical choice: 16.666666666666664
