# 多任务携程

注意: 如果一部携程过程中出现了同步模块的相关的代码，那么就无法实现异步

In [None]:
import time 
import asyncio
import nest_asyncio
nest_asyncio.apply()
import os
import requests
import aiohttp
from lxml import etree
import aiofiles

In [None]:
async def request(url):
    print('正在下载', url)
    # time.sleep(2) # 这个还是同步代码【无法实现异步】
    # 在 asyncio 中遇到阻塞操作必须进行手动挂起
    await asyncio.sleep(2)
    print('下载完成', url)

start_time = time.time()
urls = [
    'www.badicu.com',
    'www.google.com',
    'www.sougou.com'
]

# 存储任务列表
tasks = []
for url in urls:
    c = request(url)
    task = asyncio.ensure_future(c)
    tasks.append(task)
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))

end_time = time.time()
print(end_time - start_time)


# 多任务异步【自定义开启服务器】

注意: requests 是同步请求，在异步请求中使用 requests 就变成同步执行了

In [None]:
start = time.time()
urls = [
    'http://127.0.0.1:5000/bobo','http://127.0.0.1:5000/jay','http://127.0.0.1:5000/tom'
]

async def get_page(url):
    print('正在下载',url)
    #requests.get是基于同步，必须使用基于异步的网络请求模块进行指定url的请求发送
    #aiohttp:基于异步网络请求的模块
    response = requests.get(url=url)
    print('下载完毕：',response.text)

tasks = []

for url in urls:
    c = get_page(url)
    task = asyncio.ensure_future(c)
    tasks.append(task)

loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))

end = time.time()

print('总耗时:',end-start)

# aiohttp 实现多任务异步爬虫

失败【原因未知，那么多图片只有那么几张能爬下来】

In [None]:
# 使用 axiohttp 中的 ClientSession 
start = time.time()
urls = [
    'http://127.0.0.1:5000/bobo','http://127.0.0.1:5000/jay','http://127.0.0.1:5000/tom'
]

async def get_page(url):
    async with aiohttp.ClientSession() as session:
        async with await session.get(url) as response:
            # text() 方法返回字符串形式的响应数据
            # read() 方法返回二进制形式的响应数据
            # jsom() 方法返回 json 对象
            page_text = await response.text()
            print(page_text)

tasks = []

for url in urls:
    c = get_page(url)
    task = asyncio.ensure_future(c)
    tasks.append(task)

loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))

end = time.time()

print('总耗时:',end-start)

In [None]:
# 异步爬取 abyss 壁纸
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
}

params = {
    'id': '3',
    'name': 'Anime Wallpapers',
    'page': '2'
}

cookies = { 'cookie': '__gpi=UID=0000078812e12323:T=1657417742:RT=1660474479:S=ALNI_MYzZBpye3NgSRNGkLom2785IzBzcQ; _ga=GA1.2.778128166.1657538555; __gads=ID=063c61a94a7eb059:T=1657417742:S=ALNI_MZjR-8khMQmnHa_-NAlgX8WRqUezw; _ga_HL65XQTV30=GS1.1.1660474480.9.1.1660475390.0; cookieconsent_status=allow; AlphaCodersView=paged; Sorting=newest; wa_session=bfvnlmd4fncr243rcsea68qebm' }

url = 'https://wall.alphacoders.com/by_category.php?id=3&name=Anime+Wallpapers&page=2'

In [None]:
page_text = requests.get(url = url, headers = headers, params = params, cookies = cookies).text

tree = etree.HTML(page_text)
src_list = tree.xpath('//div[@class="boxgrid"]//img/@src')

# async def get_image(src):
#     imgName =  'D:/Image/LIKES/Animation/WallPaper/GIRL07/' + src.split('/')[-1]
#     src = 'https://images2.alphacoders.com/126/' + src.split('/')[-1]
#     async with aiohttp.ClientSession(headers = headers) as session:
#         async with await session.get(url = src, headers = headers) as response:
#             # img = 
#             print('开始下载')
#             async with aiofiles.open(imgName, 'wb') as afp:
#                 await afp.write(await response.content.read())
#                 print(imgName, '下载完成')

# tasks = []

# for src in src_list:
#     src = src.split('/')[-1]
#     c = get_image(src)
#     task = asyncio.ensure_future(c)
#     tasks.append(task)
    
# loop = asyncio.get_event_loop()
# loop.run_until_complete(asyncio.wait(tasks))
    
# print('所有图片下载完毕')