# 使用future处理并发

## future指一种对象，表示异步执行的操作

In [1]:
import os
import time
import sys
import requests

POP20_CC = ('CN IN US ID BR PK NG BD RU JP MX PH VN ET EG DE IR TR CD FR').split()

BASE_URL = 'http://flupy.org/data/flags'

DEST_DIR = 'downloads/'


def save_flag(img, filename):
    path = os.path.join(DEST_DIR, filename)
    with open(path, 'wb') as fp:
        fp.write(img)
        

def get_flag(cc):
    url = '{}/{cc}/{cc}.gif'.format(BASE_URL, cc=cc.lower())
    resp = requests.get(url)
    return resp.content


def show(text):
    print(text, end=' ')
    sys.stdout.flush()
    
    
def download_many(cc_list):
    for cc in sorted(cc_list):
        image = get_flag(cc)
        show(cc)
        save_flag(image, cc.lower() + '.gif')
    
    return len(cc_list)


def main(download_many):
    t0 = time.time()
    count = download_many(POP20_CC)
    elapsed = time.time() - t0
    msg = '\n{} flags downloaded in {:.2f}s'
    print(msg.format(count, elapsed))

In [16]:
main(download_many)

BD BR CD CN DE EG ET FR ID IN IR JP MX NG PH PK RU TR US VN 
20 flags downloaded in 5.24s


### 使用futures.ThreadPoolExecutor类实现多线程下载的脚本

In [2]:
from concurrent import futures

MAX_WORKERS = 20


def download_one(cc):
    image = get_flag(cc)
    show(cc)
    save_flag(image, cc.lower() + '.gif')
    return cc


def download_many(cc_list):
    workers = min(MAX_WORKERS, len(cc_list))
    with futures.ThreadPoolExecutor(workers) as executor:
        res = executor.map(download_one, sorted(cc_list))    
        # 返回值是一个迭代器，迭代器的__next__方法调用各个future的result()方法，因此我们得到的是各个future的结果，而非future本身
        # future对象可理解为可调用对象download_one的排期（即future对象由Executor.submit(download_one)取得）
        # 这个函数返回结果的顺序与调用开始的顺序一致，只不过在迭代器res上调用__next__（调用各个future的result的方法）时会发生阻塞
    return len(list(res))

In [26]:
main(download_many)

US VN IR NG IN BD JP FR CN BR EG RUID  DE PH ET MX TRCD  PK 
20 flags downloaded in 1.06s


In [7]:
def download_one(cc):
    image = get_flag(cc)
    show(cc)
    save_flag(image, cc.lower() + '.gif')
    return cc


def download_many(cc_list):
    cc_list = cc_list[:5]
    with futures.ThreadPoolExecutor(max_workers=3) as executor:    # 结果的顺序与工作的线程数有关
        to_do = []
        for cc in sorted(cc_list):
            future = executor.submit(download_one, cc)
            to_do.append(future)
            msg = 'Scheduled for {}: {}'
            print(msg.format(cc, future))
            
        results = []
        for future in futures.as_completed(to_do):    
            # as_completed函数在future运行结束后产出future（它的参数是一个future列表，返回值是一个迭代器）
            res = future.result()
            msg = '{} result: {!r}'
            print(msg.format(future, res))
            results.append(res)
            
    return len(results)

In [8]:
main(download_many) 

Scheduled for BR: <Future at 0x7efdf8b23a60 state=running>
Scheduled for CN: <Future at 0x7efdf9567dc0 state=running>
Scheduled for ID: <Future at 0x7efdf95594c0 state=running>
Scheduled for IN: <Future at 0x7efdf8b45310 state=pending>
Scheduled for US: <Future at 0x7efdf82fc070 state=pending>
BR CN ID <Future at 0x7efdf8b23a60 state=finished returned str> result: 'BR'
<Future at 0x7efdf9567dc0 state=finished returned str> result: 'CN'
<Future at 0x7efdf95594c0 state=finished returned str> result: 'ID'
IN <Future at 0x7efdf8b45310 state=finished returned str> result: 'IN'
US <Future at 0x7efdf82fc070 state=finished returned str> result: 'US'

5 flags downloaded in 0.47s


In [31]:
from time import time, strftime
from concurrent import futures
from time import sleep


def display(*args):
    print(strftime('[%H:%M:%S]'), end=' ')
    print(*args)
    
    
def loiter(n):
    msg = '{}loiter({}): doing nothing for {}s'
    display(msg.format('\t'*n, n, n))
    sleep(n)
    msg = '{}loiter({}): done'
    display(msg.format('\t'*n, n))
    return n * 10


def main():
    display('Script starting.')
    executor = futures.ThreadPoolExecutor(max_workers=3)
    results = executor.map(loiter, range(5))
    display('results:', results)
    display('Waiting for individual results:')
    for i, result in enumerate(results):    # 这个函数返回结果的顺序与调用开始的顺序一致
        display('result {}: {}'.format(i, result))
        

main()

[16:52:15] Script starting.
[16:52:15] loiter(0): doing nothing for 0s
[16:52:15] loiter(0): done
[16:52:15] 	loiter(1): doing nothing for 1s
[16:52:15] 		loiter(2): doing nothing for 2s
[16:52:15] 			loiter(3): doing nothing for 3s
[16:52:15] results: <generator object Executor.map.<locals>.result_iterator at 0x7fa5e84f25f0>
[16:52:15] Waiting for individual results:
[16:52:15] result 0: 0
[16:52:16] 	loiter(1): done
[16:52:16] 				loiter(4): doing nothing for 4s
[16:52:16] result 1: 10
[16:52:17] 		loiter(2): done
[16:52:17] result 2: 20
[16:52:18] 			loiter(3): done
[16:52:18] result 3: 30
[16:52:20] 				loiter(4): done
[16:52:20] result 4: 40


## 显示下载进度并处理错误

In [6]:
import requests
from enum import Enum
from collections import namedtuple


HTTPStatus = Enum('Status', 'ok not_found error')
Result = namedtuple('Result', 'status data')


def get_flag(base_url, cc):
    url = '{}/{cc}/{cc}.gif'.format(base_url, cc=cc.lower())
    resp = request.get(url)
    if resp.status_code != 200:
        resp.raise_for_status()
    return resp.content


def download_one(cc, base_url, verbose=False):
    try:
        image = get_flag(base_url, cc)
    except requests.exceptions.HTTPError as exc:
        res = exc.response
        if res.status_code == 404:
            status = HTTPStatus.not_found
            msg = 'not found'
        else:
            raise
    else:
        save_flag(image, cc.lower() + '.gif')
        status = HTTPStatus.ok
        msg = 'ok'
        
    if verbose:
        print(cc, msg)
        
    return Result(status, cc)

In [7]:
import collections

import tqdm


def download_many(cc_list, base_url, verbose, max_req):
    counter = collections.Counter()
    cc_iter = sorted(cc_list)
    if not verbose:
        cc_iter = tqdm.tqdm(cc_iter)
    for cc in cc_iter:
        try:
            res = download_one(cc, base_url, verbose)
        except requests.exceptions.HTTPError as exc:
            error_msg = 'HTTP error {res.status_code} - {res.reason}'
            error_msg = error_msg.format(res=exc.response)
        except requests.exceptions.ConnectionError as exc:
            error_msg = 'Connection error'
        else:
            error_msg = ''
            status = res.status
            
        if error_msg:
            status = HTTPStatus.error
        counter[status] += 1
        if verbose and error_msg:
            print('*** Error for {}: {}'.format(cc, error_msg))
            
    return counter

In [8]:
import collections
from concurrent import futures
import requests

import tqdm

from flags2_common import main, HTTPStatus
from flags2_sequential import download_one

DEFAULT_CONCUR_REQ = 30
MAX_CONCUR_REQ = 1000


def download_many(cc_list, base_url, verbose, concur_req):
    counter = collections.Counter()
    with futures.ThreadPoolExecutor(max_workers=concur_req) as executor:
        to_do_map = {}
        for cc in sorted(cc_list):
            future = executor.submit(download_one, cc, base_url, verbose)
            to_do_map[future] = cc
        done_iter = futures.as_completed(to_do_map)
        if not verbose:
            done_iter = tqdm.tqdm(done_iter, total=len(cc_list))
            # 为了更新进度条，各个future运行结束后立即获取结果
        for future in done_iter:
            try:
                res = future.result()
            except requests.exceptions.HTTPError as exc:
                error_msg = 'HTTP error {res.status_code} - {res.reason}'
                error_msg = error_msg.format(res=exc.response)
            except requests.exceptions.ConnectionError as exc:
                error_msg = 'Connection error'
            else:
                error_msg = ''
                status = res.status
                
            if error_msg:
                status = HTTPStatus.error
            counter[status] += 1
            if verbose and error_msg:
                cc = to_do_map[future]
                print('*** Error for {}: {}'.format(cc, error_msg))
    return counter