In [70]:
import string
import numpy as np

In [71]:
class calcute_based_on_edit_distance:

  def get_score(self, ref_tokens, pred_tokens):
    self.backtrace = [[None for _ in range(0, len(ref_tokens) + 1)] for _ in range(0, len(pred_tokens) + 1)]
    self.cost = np.zeros((len(pred_tokens) + 1, len(ref_tokens) + 1))

    self.cost[0] = [j for j in range(0, len(ref_tokens) + 1)]
    self.backtrace[0][ : ]  = "ins"

    self.cost[ : ,0] = [j for j in range(0, len(pred_tokens) + 1)]
    self.backtrace[ : ][0] = "del"

    for row in range(1, len(pred_tokens) + 1):
      for col in range(1, len(ref_tokens) + 1):
        if ref_tokens[col - 1] == pred_tokens[row - 1]:
          self.cost[row][col] = self.cost[row - 1][col - 1]
          self.backtrace[row][col] = "same"
        else:
          substitution = self.cost[row - 1][col - 1]
          insert = self.cost[row][col - 1]
          delete = self.cost[row - 1][col]
          fainal_score = min(insert, delete, substitution)
          self.cost[row][col] = fainal_score + 1
          if fainal_score == insert:
            self.backtrace[row][col] = "ins"
          elif fainal_score == delete:
            self.backtrace[row][col] = "del"
          elif fainal_score == substitution:
            self.backtrace[row][col] = "sub"
    i, j = len(pred_tokens), len(ref_tokens)
    self.num_sub, self.num_del, self.num_ins, self.num_same = 0, 0, 0, 0
    while i > 0 or j > 0:
      if self.backtrace[i][j] == "same":
        self.num_same += 1
        i -= 1
        j -= 1

      elif self.backtrace[i][j] == "sub":
        self.num_sub += 1
        i -= 1
        j -= 1

      elif self.backtrace[i][j] == "del":
        self.num_del += 1
        i -= 1

      elif self.backtrace[i][j] == "ins":
        self.num_ins += 1
        j -= 1
    return (self.num_del + self.num_ins + self.num_sub) / len(ref_tokens)

In [72]:
class TER(calcute_based_on_edit_distance):
  def __init__(self):
    persian_punctuations = '''`÷×؛<>_()*&^%][ـ،/:"؟.,'{}~¦+|!”…“–ـ'''
    self.punctuations_list = string.punctuation + persian_punctuations

  def fit(self, ref, pred):
    ref_tokens = self._preprocess(ref)
    pred_tokens = self._preprocess(pred)
    return self.get_score(ref_tokens, pred_tokens)

  def _preprocess(self, text):
    text = text.lower()
    translator = str.maketrans('', '', self.punctuations_list)
    text = text.translate(translator)
    return text.split() #return tokens
  def get_edits(self):
    return {
        "number of delete" : self.num_del,
        "number of insert" : self.num_ins,
        "number of substitution" : self.num_sub,
        "number of same" : self.num_same
    }

In [73]:
model = TER()
model.fit("i am ali", "i am ali pfD")

0.3333333333333333

In [74]:
model.get_edits()

{'number of delete': 1,
 'number of insert': 0,
 'number of substitution': 0,
 'number of same': 3}