In [1]:

# Persian to equivalent mapping based on your table
persian_to_equivalent = {
    "ب": "B",
    "ج": "J",
    "د": "D",
    "س": "S",
    "ص": "Ṣ",
    "ط": "Ṭ",
    "ق": "Q",
    "ل": "L",
    "م": "M",
    "و": "V",
    "ه": "H",
    "ه‍": "H",
    "ن": "N",
    "ی": "Y",
    "الف": "A",
    "پ": "P",
    "ت": "T",
    "ث": "Ṯ",
    "ز": "Z",
    "ژ": "Ž",
    "ژ (معلولین و جانبازان)": "Ž",
    "ش": "Š",
    "ع": "O",
    "ف": "F",
    "ك": "K",
    "گ": "G",
    "0": "0",
    "1": "1",
    "2": "2",
    "3": "3",
    "4": "4",
    "5": "5",
    "6": "6",
    "7": "7",
    "8": "8",
    "9": "9"
}

In [2]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

In [6]:
# Threshold handled dynamically

import os
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np

# Directory containing XML files
xml_dir = '/home/infres/lotfi-23/validation/XML'  # Replace with the path to your XML files
output_csv_path = '/home/infres/lotfi-23/validation/string_plate_validatoin.csv'  # Path to save the CSV file

# List to store results
plate_data = []

# Process each XML file
for xml_file in os.listdir(xml_dir):
    if xml_file.endswith('.xml'):
        # Parse the XML file
        tree = ET.parse(os.path.join(xml_dir, xml_file))
        root = tree.getroot()

        # Extract characters and their xmin values
        elements = []
        for obj in root.iter("object"):
            char = obj.find("name").text
            xmin_text = obj.find("bndbox").find("xmin").text
            try:
                xmin = float(xmin_text)  # Convert xmin to a float
            except ValueError:
                print(f"Skipping invalid xmin value: {xmin_text}")
                continue

            if char != "کل ناحیه پلاک":  # Ignore this specific tag
                char = persian_to_equivalent.get(char, char)  # Replace Persian characters
                elements.append((xmin, char))

        # Sort elements by xmin
        elements.sort(key=lambda x: x[0])

        # Calculate the dynamic threshold
        xmin_values = [x[0] for x in elements]
        if len(xmin_values) > 1:
            differences = np.diff(xmin_values)  # Calculate differences between consecutive xmin values
            mean_diff = np.mean(differences)  # Calculate the mean of the differences
            std_diff = np.std(differences)  # Calculate the standard deviation of the differences
            # Use mean + a scaled standard deviation to calculate the threshold
            dynamic_threshold = mean_diff + 2.5 * std_diff  # Adjust the scaling factor as needed
            # Optionally, add a fixed offset for even larger thresholds
        else:
            dynamic_threshold = 50.0  # Default threshold if there are insufficient values

        # Split characters into groups based on xmin differences
        plate_chars = []
        current_plate = []
        last_xmin = None
        for xmin, char in elements:
            if last_xmin is not None and (xmin - last_xmin) > dynamic_threshold:
                plate_chars.append(''.join(current_plate))  # Append current plate
                current_plate = []  # Start a new plate
            current_plate.append(char)
            last_xmin = xmin

        # Append the last plate
        if current_plate:
            plate_chars.append(''.join(current_plate))

        # Join plates with space to form the final string
        formatted_plate_string = ' '.join(plate_chars)

        # Add to the list with filename and formatted plate string
        plate_data.append([xml_file.replace('.xml', ''), formatted_plate_string])

# Convert to DataFrame and save to CSV
df = pd.DataFrame(plate_data, columns=["nameOfTheFile", "plateString"])
df.to_csv(output_csv_path, index=False)

print(f"CSV file saved at {output_csv_path}")

CSV file saved at /home/infres/lotfi-23/validation/string_plate_validatoin.csv


In [None]:
## For test dataset

# Threshold handled dynamically

import os
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np

# Directory containing XML files
xml_dir = '/home/infres/lotfi-23/test/XML'  # Replace with the path to your XML files
output_csv_path = '/home/infres/lotfi-23/test/string_plate_test.csv'  # Path to save the CSV file

# List to store results
plate_data = []

# Process each XML file
for xml_file in os.listdir(xml_dir):
    if xml_file.endswith('.xml'):
        # Parse the XML file
        tree = ET.parse(os.path.join(xml_dir, xml_file))
        root = tree.getroot()

        # Extract characters and their xmin values
        elements = []
        for obj in root.iter("object"):
            char = obj.find("name").text
            xmin_text = obj.find("bndbox").find("xmin").text
            try:
                xmin = float(xmin_text)  # Convert xmin to a float
            except ValueError:
                print(f"Skipping invalid xmin value: {xmin_text}")
                continue

            if char != "کل ناحیه پلاک":  # Ignore this specific tag
                char = persian_to_equivalent.get(char, char)  # Replace Persian characters
                elements.append((xmin, char))

        # Sort elements by xmin
        elements.sort(key=lambda x: x[0])

        # Calculate the dynamic threshold
        xmin_values = [x[0] for x in elements]
        if len(xmin_values) > 1:
            differences = np.diff(xmin_values)  # Calculate differences between consecutive xmin values
            mean_diff = np.mean(differences)  # Calculate the mean of the differences
            std_diff = np.std(differences)  # Calculate the standard deviation of the differences
            # Use mean + a scaled standard deviation to calculate the threshold
            dynamic_threshold = mean_diff + 2.5 * std_diff  # Adjust the scaling factor as needed
            # Optionally, add a fixed offset for even larger thresholds
        else:
            dynamic_threshold = 50.0  # Default threshold if there are insufficient values

        # Split characters into groups based on xmin differences
        plate_chars = []
        current_plate = []
        last_xmin = None
        for xmin, char in elements:
            if last_xmin is not None and (xmin - last_xmin) > dynamic_threshold:
                plate_chars.append(''.join(current_plate))  # Append current plate
                current_plate = []  # Start a new plate
            current_plate.append(char)
            last_xmin = xmin

        # Append the last plate
        if current_plate:
            plate_chars.append(''.join(current_plate))

        # Join plates with space to form the final string
        formatted_plate_string = ' '.join(plate_chars)

        # Add to the list with filename and formatted plate string
        plate_data.append([xml_file.replace('.xml', ''), formatted_plate_string])

# Convert to DataFrame and save to CSV
df = pd.DataFrame(plate_data, columns=["nameOfTheFile", "plateString"])
df.to_csv(output_csv_path, index=False)

print(f"CSV file saved at {output_csv_path}")

CSV file saved at /home/infres/lotfi-23/test/string_plate_test.csv
