In [1]:
from typing import List
import re
import random

In [2]:
# Old version
def valid_emails(strings: List[str]) -> List[str]:
    """Take list of potential emails and returns only valid ones"""

    valid_email_regex = "^[a-zA-Z0-9+_.-]+@[a-zA-Z0-9.-]+$"

    def is_valid_email(email: str) -> bool:
        return bool(re.fullmatch(valid_email_regex, email))

    emails = []
    for email in strings:
        if is_valid_email(email):
            emails.append(email)

    return emails

In [3]:
# fastest version with compile
def valid_emails_fast(strings: List[str]) -> List[str]:
    """Take list of potential emails and returns only valid ones"""

    valid_email_regex = "^[a-zA-Z0-9+_.-]+@[a-zA-Z0-9.-]+$"
    email_regex_compiled = re.compile(valid_email_regex)

    def is_valid_email(email: str) -> bool:
        return bool(email_regex_compiled.fullmatch(email))

    emails = []
    for email in strings:
        if is_valid_email(email):
            emails.append(email)

    return emails

In [4]:
# generate random strings
symbols = 'abcdefghjklmn1234567890#@.'
strings = [''.join(random.choices(symbols,k=10)) for _ in range(10**6)]

In [5]:
%%time
# test old version valid_emails
emails = valid_emails(strings)

CPU times: user 614 ms, sys: 10.6 ms, total: 625 ms
Wall time: 623 ms


In [6]:
%%time
# test new version valid_emails
emails_new = valid_emails_fast(strings)

CPU times: user 304 ms, sys: 4.52 ms, total: 309 ms
Wall time: 308 ms


In [7]:
# sanity check
emails_new == emails

True