单线程实例

In [1]:
import time
import requests

def main():
    sites = [ 
        'https://www.baidu.com',
        'https://www.sina.com.cn',
        'https://www.163.com',
        'https://www.qq.com',
        'https://www.taobao.com',
    ] * 10
    start_time = time.perf_counter()
    download_all_sites(sites)
    end_time = time.perf_counter() - start_time
    print(f"下载完成，用时{end_time}秒")

def download_all_sites(sites):
    with requests.Session() as session: 
        for url in sites:
            download_site(url,session)

def download_site(url,session):
    with session.get(url) as response:
        print(f"Read {len(response.content)} bytes from {url}")

if __name__ == "__main__":
    main()

Read 2443 bytes from https://www.baidu.com
Read 395673 bytes from https://www.sina.com.cn
Read 311 bytes from https://www.163.com
Read 326 bytes from https://www.qq.com
Read 90674 bytes from https://www.taobao.com
Read 2443 bytes from https://www.baidu.com
Read 395673 bytes from https://www.sina.com.cn
Read 311 bytes from https://www.163.com
Read 326 bytes from https://www.qq.com
Read 90674 bytes from https://www.taobao.com
Read 2443 bytes from https://www.baidu.com
Read 395673 bytes from https://www.sina.com.cn
Read 311 bytes from https://www.163.com
Read 326 bytes from https://www.qq.com
Read 90674 bytes from https://www.taobao.com
Read 2443 bytes from https://www.baidu.com
Read 395673 bytes from https://www.sina.com.cn
Read 311 bytes from https://www.163.com
Read 326 bytes from https://www.qq.com
Read 90674 bytes from https://www.taobao.com
Read 2443 bytes from https://www.baidu.com
Read 395673 bytes from https://www.sina.com.cn
Read 311 bytes from https://www.163.com
Read 326 bytes

多线程版本

In [2]:
import threading
import time 
from concurrent.futures import ThreadPoolExecutor
import requests

thread_local = threading.local()

def main():
    sites = [
        'https://www.baidu.com',
        'https://www.sina.com.cn',
        'https://www.163.com',
        'https://www.qq.com',
        'https://www.taobao.com',
    ] * 10
    start_time = time.perf_counter()
    download_all_sites(sites)
    end_time = time.perf_counter() - start_time
    print(f"下载完成，用时{end_time}秒")


def download_all_sites(sites):
    with ThreadPoolExecutor(max_workers=5) as executor:
        executor.map(download_site,sites)
        ## map 会把 sites 里的每个元素依次传给 download_site 函数

def download_site(url):
    session = get_session_for_thread() # 获取「当前线程专用」的 Session
    with session.get(url) as response:
        print(f'Read {len(response.content)} bytes from {url}')

def get_session_for_thread():
    if not hasattr(thread_local,"session"): # 如果当前线程还没有 session
        thread_local.session = requests.Session()
    return thread_local.session

if __name__ == "__main__":
    main()


Read 311 bytes from https://www.163.com
Read 90674 bytes from https://www.taobao.com
Read 326 bytes from https://www.qq.com
Read 2443 bytes from https://www.baidu.com
Read 395673 bytes from https://www.sina.com.cn
Read 395673 bytes from https://www.sina.com.cn
Read 2443 bytes from https://www.baidu.com
Read 90674 bytes from https://www.taobao.com
Read 326 bytes from https://www.qq.com
Read 326 bytes from https://www.qq.com
Read 311 bytes from https://www.163.com
Read 395673 bytes from https://www.sina.com.cn
Read 395673 bytes from https://www.sina.com.cn
Read 311 bytes from https://www.163.com
Read 2443 bytes from https://www.baidu.com
Read 311 bytes from https://www.163.com
Read 90674 bytes from https://www.taobao.com
Read 2443 bytes from https://www.baidu.com
Read 395673 bytes from https://www.sina.com.cn
Read 311 bytes from https://www.163.com
Read 90674 bytes from https://www.taobao.com
Read 2443 bytes from https://www.baidu.com
Read 2443 bytes from https://www.baidu.com
Read 311 b