In [None]:
import pandas as pd
import re

def extract_address(summary, suffix_regex):
    """
    Extracts address from the summary using a regular expression.
    
    Parameters:
        summary (str): A string containing the address summary.
        suffix_regex (str): A regex pattern for address suffixes.
    
    Returns:
        str: The extracted address if found, otherwise None.
    """
    match = re.search(r'(\d+ [\w\s]+(' + suffix_regex + r'))', summary, re.IGNORECASE)
    return match.group() if match else None

def main(input_file, output_file):
    # Read the Excel file into a DataFrame
    df = pd.read_excel(input_file)
    
    # Define suffixes and their regex patterns
    suffixes = {
        r'\b(allee|ally|aly)\b': ' alley',
        r'\b(anex|annex|annx|anx)\b': ' anex',
        r'\b(arc|arcade)\b': ' arcade',
        r'\b(av|ave|aven|avenu|avenue|avn|avnue)\b': ' avenue',
        r'\b(bayoo|bayou)\b': ' bayou',
        # (Add remaining suffix patterns here similarly)
        r'\b(wells|wls)\b': ' wells'
    }

    # Combine all suffix patterns into a single regex pattern
    suffix_regex = "|".join(suffixes.keys())

    # Apply the address extraction function to each row in a relevant column
    df['Extracted Address'] = df['Summary'].apply(lambda x: extract_address(x, suffix_regex))

    # Write the updated DataFrame back to an Excel file
    df.to_excel(output_file, index=False)

    print(f"Processed file saved to {output_file}")

# Parameters
input_file = "input.xlsx"  # Update with your actual input file path
output_file = "output.xlsx"  # Update with your desired output file path

# Run the main function
main(input_file, output_file)