## 单线程版本

In [8]:
import requests
import time


def download_one(url):
    resp = requests.get(url)
    print(f"Read {len(resp.content)} from {url}")


def download_all(sites):
    for site in sites:
        download_one(site)

        
def main():
    sites = [ 
        'https://en.wikipedia.org/wiki/Portal:Arts', 
        'https://en.wikipedia.org/wiki/Portal:History', 
        'https://en.wikipedia.org/wiki/Portal:Society', 
        'https://en.wikipedia.org/wiki/Portal:Biography', 
        'https://en.wikipedia.org/wiki/Portal:Mathematics', 
        'https://en.wikipedia.org/wiki/Portal:Technology', 
        'https://en.wikipedia.org/wiki/Portal:Geography', 
        'https://en.wikipedia.org/wiki/Portal:Science', 
        'https://en.wikipedia.org/wiki/Computer_science', 
        'https://en.wikipedia.org/wiki/Python_(programming_language)', 
        'https://en.wikipedia.org/wiki/Java_(programming_language)', 
        'https://en.wikipedia.org/wiki/PHP', 
        'https://en.wikipedia.org/wiki/Node.js', 
        'https://en.wikipedia.org/wiki/The_C_Programming_Language', 
        'https://en.wikipedia.org/wiki/Go_(programming_language)' 
    ]
    start_time = time.perf_counter()
    download_all(sites)
    end_time = time.perf_counter()
    print(f"Download {len(sites)} sites in {end_time - start_time} seconds")

In [None]:
# main()

## 多线程版本

In [None]:
import concurrent.futures
import requests
import threading
import time

def download_one(url):
    resp = requests.get(url)
    print(f"Read {len(resp.content)} from {url}")

def download_all(sites):
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        executor.map(download_one, sites)
        
        
def main():
    sites = [ 
        'https://en.wikipedia.org/wiki/Portal:Arts', 
        'https://en.wikipedia.org/wiki/Portal:History', 
        'https://en.wikipedia.org/wiki/Portal:Society', 
        'https://en.wikipedia.org/wiki/Portal:Biography', 
        'https://en.wikipedia.org/wiki/Portal:Mathematics', 
        'https://en.wikipedia.org/wiki/Portal:Technology', 
        'https://en.wikipedia.org/wiki/Portal:Geography', 
        'https://en.wikipedia.org/wiki/Portal:Science', 
        'https://en.wikipedia.org/wiki/Computer_science', 
        'https://en.wikipedia.org/wiki/Python_(programming_language)', 
        'https://en.wikipedia.org/wiki/Java_(programming_language)', 
        'https://en.wikipedia.org/wiki/PHP', 
        'https://en.wikipedia.org/wiki/Node.js', 
        'https://en.wikipedia.org/wiki/The_C_Programming_Language', 
        'https://en.wikipedia.org/wiki/Go_(programming_language)' 
    ]
    start_time = time.perf_counter()
    download_all(sites)
    end_time = time.perf_counter()
    print(f"Download {len(sites)} sites in {end_time - start_time} seconds")

## 改进版

In [None]:
import concurrent.futures
import requests
import time


def download_one(url):
    resp = requests.get(url)
    print(f"Read {len(resp.content)} from {url}")

def download_all(sites):
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        to_do = []
        for site in sites:
            future = executor.submit(download_one, site)
            to_do.append(future)
        
        for future in concurrent.futures.as_completed(to_do):
            future.result()
            

def main():
    sites = [ 
        'https://en.wikipedia.org/wiki/Portal:Arts', 
        'https://en.wikipedia.org/wiki/Portal:History', 
        'https://en.wikipedia.org/wiki/Portal:Society', 
        'https://en.wikipedia.org/wiki/Portal:Biography', 
        'https://en.wikipedia.org/wiki/Portal:Mathematics', 
        'https://en.wikipedia.org/wiki/Portal:Technology', 
        'https://en.wikipedia.org/wiki/Portal:Geography', 
        'https://en.wikipedia.org/wiki/Portal:Science', 
        'https://en.wikipedia.org/wiki/Computer_science', 
        'https://en.wikipedia.org/wiki/Python_(programming_language)', 
        'https://en.wikipedia.org/wiki/Java_(programming_language)', 
        'https://en.wikipedia.org/wiki/PHP', 
        'https://en.wikipedia.org/wiki/Node.js', 
        'https://en.wikipedia.org/wiki/The_C_Programming_Language', 
        'https://en.wikipedia.org/wiki/Go_(programming_language)' 
    ]
    start_time = time.perf_counter()
    download_all(sites)
    end_time = time.perf_counter()
    print(f"Download {len(sites)} sites in {end_time - start_time} seconds")