In [1]:
from itertools import tee
from collections import defaultdict
import urllib.request

In [2]:
def parse(content):
    """Reads from file-like object content and creates a dictionary mapping letters to the set of letters that appear after them.
    """
    d = defaultdict()
    for line in content:        
        if not line:
            break
        line = line.decode().lower()
        letters, next_letters = tee(filter(str.isalpha, line))
        try:
            next(next_letters) # should be one ahead of letters
            for c in letters:
                d.setdefault(c, set()).add(next(next_letters))
        except StopIteration: # raised by next_letters when it's finished, possibly stoping the loop
            pass
    return d   

def fetch_page(url):
    """Returns results of parse on the url content.
    """
    with urllib.request.urlopen(url) as f:
        d = parse(f)
    return d

urls = (
    'https://raw.githubusercontent.com/yoavram/Py4Eng/master/data/gulliver.txt',
    'https://raw.githubusercontent.com/yoavram/Py4Eng/master/data/alice.txt'
)

In [3]:
%%timeit -n 1 -r 1

result = [fetch_page(url) for url in urls]

1 loop, best of 1: 4.03 s per loop


<http://aiohttp.readthedocs.org/>

In [4]:
import aiohttp
import asyncio
from itertools import tee
from collections import defaultdict

In [5]:
async def parse(content):
    """Reads from file-like object content and creates a dictionary mapping letters to the set of letters that appear after them.
    """
    d = defaultdict()
    async for line in content:
        if not line:
            break
        line = line.decode().lower()
        letters, next_letters = tee(filter(str.isalpha, line))
        try:
            next(next_letters) # should be one ahead of letters
            for c in letters:
                d.setdefault(c, set()).add(next(next_letters))
        except StopIteration: # raised by next_letters when it's finished, possibly stoping the loop
            pass
    return d   

async def fetch_page(url):
    """Returns results of parse on the url content.
    """    
    response = await aiohttp.get(url)
    assert response.status == 200
    d = await parse(response.content) # response.content is a file-like object
    response.close()
    return d

tasks = [fetch_page(url) for url in urls]
loop = asyncio.get_event_loop()

In [6]:
%%timeit -n 1 -r 1

result = loop.run_until_complete(asyncio.wait(tasks))

1 loop, best of 1: 1.25 s per loop


In [7]:
loop.close()