In [8]:
import pandas as pd
import Levenshtein

# Load the data from Excel files
old_df = pd.read_excel('old.xlsx')
original_df = pd.read_excel('original.xlsx')

# Convert 'old' column to string
old_df['old'] = old_df['old'].astype(str)

# Function to find the closest match in the original list
def find_closest_match(value, original_list, max_difference):
    # Remove square brackets and any leading/trailing spaces
    cleaned_value = value.strip('[] ')

    # Calculate the Levenshtein distance for each value in the original list
    distances = [
        (original_value, Levenshtein.distance(cleaned_value, original_value))
        for original_value in original_list
    ]

    # Filter matches with a maximum distance specified by max_difference
    close_matches = [(original_value, distance) for original_value, distance in distances if distance <= max_difference]

    if close_matches:
        # If there are close matches, return the one with the minimum distance
        closest_match, _ = min(close_matches, key=lambda x: x[1])
        return closest_match
    else:
        # If no close matches are found, return the original value
        return value 

# Get maximum difference from the user
max_diff = int(input("Enter the maximum allowed character difference: "))

# Apply the function to create the new column
old_df['new'] = old_df['old'].apply(
    lambda x: find_closest_match(x, original_df['original'].tolist(), max_difference=max_diff)
)

# Save the result to a new Excel file
old_df.to_excel('new.xlsx', index=False)