In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install aiodns

Collecting aiodns
  Downloading aiodns-3.0.0-py3-none-any.whl (5.0 kB)
Collecting pycares>=4.0.0
  Downloading pycares-4.1.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (291 kB)
[K     |████████████████████████████████| 291 kB 5.1 MB/s 
Installing collected packages: pycares, aiodns
Successfully installed aiodns-3.0.0 pycares-4.1.2


In [3]:
!pip install verify_email

Collecting verify_email
  Downloading verify_email-2.4.3-py3-none-any.whl (5.9 kB)
Collecting aiosmtpd
  Downloading aiosmtpd-1.4.2-py3-none-any.whl (143 kB)
[K     |████████████████████████████████| 143 kB 5.0 MB/s 
Collecting atpublic
  Downloading atpublic-3.0.1-py3-none-any.whl (4.8 kB)
Installing collected packages: atpublic, aiosmtpd, verify-email
Successfully installed aiosmtpd-1.4.2 atpublic-3.0.1 verify-email-2.4.3


In [4]:
import asyncio
import aiodns
import logging
import re
import smtplib
import socket
import threading
import collections.abc as abc
import sys

EMAIL_REGEX = r'(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)'
MX_DNS_CACHE = {}
MX_CHECK_CACHE = {}

# Set up logging on module load and avoid adding 'ch' or 'logger' to module
# namespace.  We could assign the logger to a module level name, but it is only
# used by two functions, and this approach demonstrates using the 'logging'
# namespace to retrieve arbitrary loggers.

def setup_module_logger(name):
    """Set up module level logging with formatting"""
    logger = logging.getLogger(name)
    ch = logging.StreamHandler()
    # Really should not be configuring formats in a library, see
    # https://docs.python.org/3/howto/logging.html#configuring-logging-for-a-library
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )
    ch.setFormatter(formatter)
    logger.addHandler(ch)


setup_module_logger('verify_email')


def is_list(obj):
    return isinstance(obj, abc.Sequence) and not isinstance(obj, str)

async def get_mx_ip(hostname):
    '''Get MX record by hostname.
    '''
    if hostname not in MX_DNS_CACHE:
        try:
            resolver = aiodns.DNSResolver()
            MX_DNS_CACHE[hostname] = await  resolver.query(hostname, 'MX')
        except aiodns.error.DNSError as e:
            MX_DNS_CACHE[hostname] = None
    return MX_DNS_CACHE[hostname]


async def get_mx_hosts(email):
    '''Caching the result in MX_DNS_CACHE to improve performance.
    '''
    hostname = email[email.find('@') + 1:]
    if hostname in MX_DNS_CACHE:
        mx_hosts = MX_DNS_CACHE[hostname]
    else:
        mx_hosts = await get_mx_ip(hostname)
    return mx_hosts



async def handler_verify(mx_hosts, email, timeout=None):
    for mx in mx_hosts:
        res = await network_calls(mx, email, timeout)
        if res:
            return res
        return False


async def syntax_check(email):
    if re.match(EMAIL_REGEX, email):
        return True
    return False


async def _verify_email(email, timeout=None, verify=True):
    '''Validate email by syntax check, domain check and handler check.
    '''
    is_valid_syntax = await syntax_check(email)
    if is_valid_syntax:
        if verify:
            mx_hosts = await get_mx_hosts(email)
            if mx_hosts is None:
                return False
            else:
                return await handler_verify(mx_hosts, email, timeout)
    else:
        return False

def verify_email(emails, timeout=None, verify=True, debug=False):
    if debug:
        logger = logging.getLogger('verify_email')
        logger.setLevel(logging.DEBUG)
    result = []
    if not is_list(emails):
        emails = [emails]

    # asyncio events doesn't fully support windows platform
    # See: https://github.com/kakshay21/verify_email/issues/34#issuecomment-616971628
    if sys.platform == "win32":
        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
    loop = asyncio.get_event_loop()

    for email in emails:
        resp = loop.run_until_complete(_verify_email(email, timeout, verify))
        result.append(resp)

    return result if len(result) > 1 else result[0]

async def verify_email_async(emails, timeout=None, verify=True, debug=False):
    if debug:
        logger = logging.getLogger('verify_email')
        logger.setLevel(logging.DEBUG)
    result = []
    if not is_list(emails):
        emails = [emails]


    for email in emails:
        result.append(await _verify_email(email, timeout, verify))

    return result if len(result) > 1 else result[0]

async def network_calls(mx, email, timeout=20):
    logger = logging.getLogger('verify_email')
    result = False
    try:
        smtp = smtplib.SMTP(mx.host, timeout=timeout)
        status, _ = smtp.ehlo()
        if status >= 400:
            smtp.quit()
            logger.debug(f'{mx} answer: {status} - {_}\n')
            return False
        smtp.mail('')
        status, _ = smtp.rcpt(email)
        if status >= 400:
            logger.debug(f'{mx} answer: {status} - {_}\n')
            result = False
        if status >= 200 and status <= 250:
            result = True

        logger.debug(f'{mx} answer: {status} - {_}\n')
        smtp.quit()

    except smtplib.SMTPServerDisconnected:
        logger.debug(f'Server does not permit verify user, {mx} disconnected.\n')
    except smtplib.SMTPConnectError:
        logger.debug(f'Unable to connect to {mx}.\n')
    except socket.timeout as e:
        logger.debug(f'Timeout connecting to server {mx}: {e}.\n')
        return None
    except socket.error as e:
        logger.debug(f'ServerError or socket.error exception raised {e}.\n')
        return None

    return result

In [6]:
from datetime import datetime
from verify_email import verify_email
import nest_asyncio
nest_asyncio.apply()
import multiprocessing
from pandas import *
import pandas as pd
import csv

input_file = '/content/drive/MyDrive/Data Science by OneLearn/Beginner Python for Data Science/Module Project 2 - Email Automation Project/email verifier sample sheet.csv'
data = read_csv(input_file)
emails = data["email"].tolist()

#["manish@truscholar.io", "anuragband10@gmail.com", "foo@bar.com", "ex@example.com"]  # add emails
b = datetime.now()



def validate(email):
    #a = datetime.now()
    value = verify_email(email)

    #delta = datetime.now() - a
    #print(value, email, (delta.microseconds + delta.microseconds/1E6))
    return str(value)

status =[]
for email in emails:
  v = validate(email)
  status.append(v)


pool = multiprocessing.Pool()
result = pool.map(validate, emails)
#delta = datetime.now() - b
#print(delta.total_seconds())



In [7]:
status_dic = {'email': emails ,'status': status}

df= pd.DataFrame(status_dic)
#pd.to_csv('/content/drive/MyDrive/email validate output.csv')
df.to_excel('/content/drive/MyDrive/new output.xlsx', index = False)

In [10]:
import pandas as pd

status_dic = {'email': emails ,'status': status}

df= pd.DataFrame(status_dic)
pd.read_csv('/content/drive/MyDrive/Data Science by OneLearn/Beginner Python for Data Science/Module Project 2 - Email Automation Project/new output.xlsx - Sheet1.csv')

Unnamed: 0,email,status
0,kkaggarwal48@gmail.com,True
1,mssnaac@gmail.com,True
2,sgoffice@aiu.ac.in,True
3,chairmantsche@gmail.com,True
4,aenugumallareddy@gmail.com,True
...,...,...
78,vpaa.office@aue.ae,True
79,chris@spjain.org,True
80,anand@cbfs.edu.om,True
81,djamali@sharjah.ac.ae,True


In [11]:
a = True

In [12]:
type(a)

bool

In [13]:
str(a)

'True'