In [10]:
import re
def extract_supplier_vendor_name(text: str):
    """
    Detect a supplier/vendor name in free-form text.
    Handles:
        - "supplier ABC Corporation"
        - "vendor XYZ Ltd"
        - "from Acme Industries"
        - "shipper Global Logistics"
    Returns the extracted name (cleaned and title-cased) or None.
    """
    if not text:
        return None

    patterns = [
        r"supplier\s+(?:name\s+)?(?:is\s+)?([A-Za-z0-9\s\.,&\-']+?)(?:\s+(?:for|with|has|is|on|in|at|,)|$)",
        r"vendor\s+(?:name\s+)?(?:is\s+)?([A-Za-z0-9\s\.,&\-']+?)(?:\s+(?:for|with|has|is|on|in|at|,)|$)",
        r"from\s+(?:supplier\s+)?(?:vendor\s+)?([A-Za-z0-9\s\.,&\-']+?)(?:\s+(?:for|with|has|is|on|in|at|,)|$)",
        r"shipper\s+(?:name\s+)?(?:is\s+)?([A-Za-z0-9\s\.,&\-']+?)(?:\s+(?:for|with|has|is|on|in|at|,)|$)",
    ]
    
    for pat in patterns:
        m = re.search(pat, text, flags=re.IGNORECASE)
        if m:
            # Extract and clean the name
            name = m.group(1).strip()
            # Remove trailing punctuation
            name = re.sub(r'[,.\s]+$', '', name)
            # Clean up extra whitespace
            name = re.sub(r'\s+', ' ', name)
            # Return if valid length (at least 2 characters)
            if len(name) >= 2:
                return name.title()
    
    return None


In [12]:
print(extract_supplier_vendor_name("QUEST COMPOSITE TECHNOLOGY")) 

None


In [13]:
import re
def extract_supplier_vendor_name(text: str):
    """
    Detect a supplier/vendor name in free-form text.
    Handles:
        - "supplier ABC Corporation"
        - "vendor XYZ Ltd"
        - "from Acme Industries"
        - "shipper Global Logistics"
        - "supplier QUEST COMPOSITE TECHNOLOGY(0026071)"
    Returns the extracted name (cleaned and title-cased) or None.
    """
    if not text:
        return None

    patterns = [
        r"supplier\s+(?:name\s+)?(?:is\s+)?([A-Za-z0-9\s\.,&\-'()]+?)(?:\s+(?:for|with|has|on|in|at)\b|$)",
        r"vendor\s+(?:name\s+)?(?:is\s+)?([A-Za-z0-9\s\.,&\-'()]+?)(?:\s+(?:for|with|has|on|in|at)\b|$)",
        r"from\s+(?:supplier\s+)?(?:vendor\s+)?([A-Za-z0-9\s\.,&\-'()]+?)(?:\s+(?:for|with|has|on|in|at)\b|$)",
        r"shipper\s+(?:name\s+)?(?:is\s+)?([A-Za-z0-9\s\.,&\-'()]+?)(?:\s+(?:for|with|has|on|in|at)\b|$)",
    ]
    
    for pat in patterns:
        m = re.search(pat, text, flags=re.IGNORECASE)
        if m:
            # Extract and clean the name
            name = m.group(1).strip()
            # Remove trailing punctuation (but keep parentheses if they're part of the name)
            name = re.sub(r'[,.\s]+$', '', name)
            # Clean up extra whitespace
            name = re.sub(r'\s+', ' ', name)
            # Return if valid length (at least 2 characters)
            if len(name) >= 2:
                return name.title()
    
    return None

In [14]:
print(extract_supplier_vendor_name("QUEST COMPOSITE TECHNOLOGY(0026071)")) 

None


In [15]:
print(extract_supplier_vendor_name("supplier QUEST COMPOSITE TECHNOLOGY(0026071)"))
# Output: Quest Composite Technology(0026071)

Quest Composite Technology(0026071)
