# Placeholder Check

First, let's import the necessary modules.

In [94]:
from pathlib import Path
import pandas as pd
import re
from collections import Counter

## Define a ContentFile Class

A class may not technically be required for this project, but for other projects, a class structure may be useful in keeping track of all kinds of parameters, like language, locale code, a list of errors, the contents of the file, etc. It could be a helpful container. Use this as a template for future file objects.

In [95]:
class ContentFile:
    def __init__(self, file_object):
        self.contents = file_object.read()
        self.placeholders = get_placeholders(self.contents)

## Define Functions for Placeholder Extraction and Check

In [110]:
def get_placeholders(text):
    return sorted(re.findall(PLACEHOLDER_PATTERN, text))

def get_errors(source_ph, target_ph):
    errors = list()
    source_ph_count = Counter(source_ph)
    target_ph_count = Counter(target_ph)
    for s_ph, count in source_ph_count.items():
        diff = count - target_ph_count[s_ph]
        if diff > 0:  
            errors.append(f"{diff} instance(s) of placeholders missing from translation: {s_ph}")
        elif diff < 0:
            errors.append(f"{abs(diff)} instance(s) of placeholder superfluously added to the translation: {s_ph}")
    for t_ph, count in target_ph_count.items():
        if t_ph not in source_ph_count.keys():
            errors.append(f"{count} instance(s) of non-corresponding placeholders: {t_ph}")
    return errors   

## Define Constants

In [111]:
SOURCE = Path(r"source_files")
TRANSLATION = Path(r"translation")
PLACEHOLDER_PATTERN = re.compile(r"%{.*?}")

## Read Source and Target Files

We get the relative path of each target file and read the contents of each into an entire string.

## Extract and Compare Placeholders

Placeholders are extracted using a regular expression. They are ordered as a sorted list. This sorting allows for placeholders to be in a different order between source and target file.

Placeholders are compared and an error is printed to the console for each error.

In [112]:
# go through each language folder and identify pairs of source and target files
for language_folder in TRANSLATION.iterdir():
    language = language_folder.name
    for target_file in language_folder.rglob("*.txt"):
        relative = target_file.relative_to(language_folder)
        source_file = SOURCE / relative

        # Read each target file and its corresponding source file.
        with open(target_file, 'r', encoding="utf-8") as t, \
             open(source_file, 'r', encoding="utf-8") as s:
            target = ContentFile(t)
            source = ContentFile(s)
            
            # Compare the placeholders and print an error to the console if not identical.
            if source.placeholders != target.placeholders:
                print(f"Errors detected in {t.name}.")
                errors = get_errors(source.placeholders, target.placeholders)
                for error in errors:
                    print("    " + error)

Errors detected in translation\Chinese (Simplified)\assets\feedback.txt.
    1 instance(s) of placeholders missing from translation: %{customerService}
    1 instance(s) of placeholders missing from translation: %{phoneNum}
    1 instance(s) of placeholders missing from translation: %{productName}
    1 instance(s) of placeholders missing from translation: %{user}
Errors detected in translation\Chinese (Simplified)\assets\new_user.txt.
    1 instance(s) of placeholders missing from translation: %{accountManager}
    1 instance(s) of placeholders missing from translation: %{accountType}
    1 instance(s) of placeholders missing from translation: %{loginCredentials}
    1 instance(s) of placeholders missing from translation: %{newUser}
Errors detected in translation\Chinese (Simplified)\notification\notification.txt.
    1 instance(s) of placeholders missing from translation: %{managerName}
    1 instance(s) of placeholders missing from translation: %{newAccount}
    1 instance(s) of pla