In [32]:
"""
该文件用来临时测试，有需要的话可以删除
"""
import os
import asyncio
import nest_asyncio
import aiofiles

from typing import BinaryIO

from aiohttp import ClientSession


nest_asyncio.apply()


class BiliUserAlbumCrawler():
    def __init__(self, session: ClientSession, settings: dict = None):
        self.session = session
        
        # 一些请求参数，如代理、超时时间等
        self.settings = settings or {}
        
        self.default_headers = {
            'user-agent': ('user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                           'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
                          )
        }
    
    async def get_one(self, uid: str, pn: int, ps: int = 30):
        """
        获取一页b站用户相册数据
        :param: uid 用户 uid
        :param: pn 相册的页数，第一页的下标为 0
        :param: ps 一页返回的数据个数，最大值为 50，大于这个值会报错
        
        :return: 请求的接口数据，格式参考 https://api.bilibili.com/x/dynamic/feed/draw/doc_list?uid=2
        """
        api = f'https://api.bilibili.com/x/dynamic/feed/draw/doc_list?uid={uid}&page_num={pn}&page_size={ps}'
        
        async with self.session.get(api, headers=self.default_headers, **self.settings) as resp:
            return await resp.json()
        
    async def get_many(self, uid: str, begin: int, end: int, ps: int = 30, coro: int = 5):
        """
        获取多页b站用户相册数据
        :param: begin 起始页
        :param: end 结束页  范围是 [begin, end)
        :param: coro 最大并发请求数
        
        :return: 一个 async 迭代器，使用 async for 遍历，数据项为 (当前页数, 接口数据)
        """
        semaphore = asyncio.Semaphore(coro)
        
        async def worker(pn):
            async with semaphore:
                return await self.get_one(uid, pn, ps)
        
        tasks = [asyncio.create_task(worker(pn)) for pn in range(begin, end)]
        
        for pn, task in enumerate(tasks):
            yield pn, await task
        

async def run(crawler: BiliUserAlbumCrawler, uid: str, begin: int, end: int, ps: int = 30):
    print(f'开始获取 {uid} 的相簿数据...')
    
    async for idx, data in crawler.get_many(uid, begin=begin, end=end, ps=ps):    
        code, message = data['code'], data['message']

        if code != 0:
            print(f'接口返回值出错: {code}, Message: {message}')
            break

        if not data['data']:
            print(f'用户 {uid} 第 {idx} 页无数据，退出')
            break

        items = data['data']['items']

        for item in items:
            for pic in item['pictures']:
                yield pic['img_src']
                
                     
# 需要的参数
# 1. uid 必填
# 2. begin 必填
# 3. end 选填
# 4. coro 选填        
        
async def main():
    async with ClientSession() as session:
        crawler = BiliUserAlbumCrawler(session=session)        
        async for url in run(crawler, '343118157', begin=0, end=1, ps=50):
            filename = url[url.rfind('/') + 1:]
            if filename:
                dirpath = os.path.join(os.getcwd(), 'bot')
                async with session.get(url) as resp:
                    await save_file(dirpath, filename, await resp.read())
        
                    
asyncio.run(main())                

开始获取 343118157 的相簿数据...
C:\Users\admin\Desktop\Python\Crawlers\BiliUserAlbumCrawler\bot\3a40afba1470d05b1f854222e37c28725a217839.png is already exists
C:\Users\admin\Desktop\Python\Crawlers\BiliUserAlbumCrawler\bot\2a455886a205ad35d9abecbb4c41fe82cecd919c.png is already exists
C:\Users\admin\Desktop\Python\Crawlers\BiliUserAlbumCrawler\bot\300e33bbf7487f4ac75f71ccdb9c6e79d52c573e.jpg is already exists
C:\Users\admin\Desktop\Python\Crawlers\BiliUserAlbumCrawler\bot\657ccd6e8e74b03d1a22ae74e374ff8f84b1bb26.png is already exists
C:\Users\admin\Desktop\Python\Crawlers\BiliUserAlbumCrawler\bot\dbe05e91589b5a7ab33f4c29a704ade2384e296a.jpg is already exists
C:\Users\admin\Desktop\Python\Crawlers\BiliUserAlbumCrawler\bot\5bac7d8c639cf19ee5d9489402d620f7fe800fed.png is already exists
C:\Users\admin\Desktop\Python\Crawlers\BiliUserAlbumCrawler\bot\7ff29e4d87f1aa4c6ebceb564525d0d6a4db1db5.png is already exists
C:\Users\admin\Desktop\Python\Crawlers\BiliUserAlbumCrawler\bot\31cfac54ffbc0f75b9a6498

In [29]:
async def test():
    sem = asyncio.Semaphore(3)
    
    async def worker(n):
        async with sem:
            print(f'worker {n} is working')
            await asyncio.sleep(n)
            # print(f'worker {n} is done')
        
        return f'I am worker {n}'

    delays = [1, 2, 3, 4, 5]
    
    tasks = [asyncio.create_task(worker(delay)) for delay in delays]
    
    for task in tasks:
        yield await task
        
async def run():
    async for task in test():
        print(task)
        
asyncio.run(run())

worker 1 is working
worker 2 is working
worker 3 is working
worker 4 is working
I am worker 1
worker 5 is working
I am worker 2
I am worker 3
I am worker 4
I am worker 5


In [19]:
import typing
dir(typing)

['AbstractSet',
 'Any',
 'AnyStr',
 'AsyncContextManager',
 'AsyncGenerator',
 'AsyncIterable',
 'AsyncIterator',
 'Awaitable',
 'BinaryIO',
 'ByteString',
 'CT_co',
 'Callable',
 'ChainMap',
 'ClassVar',
 'Collection',
 'Container',
 'ContextManager',
 'Coroutine',
 'Counter',
 'DefaultDict',
 'Deque',
 'Dict',
 'ForwardRef',
 'FrozenSet',
 'Generator',
 'Generic',
 'Hashable',
 'IO',
 'ItemsView',
 'Iterable',
 'Iterator',
 'KT',
 'KeysView',
 'List',
 'Mapping',
 'MappingView',
 'Match',
 'MethodDescriptorType',
 'MethodWrapperType',
 'MutableMapping',
 'MutableSequence',
 'MutableSet',
 'NamedTuple',
 'NamedTupleMeta',
 'NewType',
 'NoReturn',
 'Optional',
 'OrderedDict',
 'Pattern',
 'Reversible',
 'Sequence',
 'Set',
 'Sized',
 'SupportsAbs',
 'SupportsBytes',
 'SupportsComplex',
 'SupportsFloat',
 'SupportsInt',
 'SupportsRound',
 'T',
 'TYPE_CHECKING',
 'T_co',
 'T_contra',
 'Text',
 'TextIO',
 'Tuple',
 'Type',
 'TypeVar',
 'Union',
 'VT',
 'VT_co',
 'V_co',
 'ValuesView',
 'W

In [30]:
dir(os.path)

['__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_abspath_fallback',
 '_get_bothseps',
 '_getfinalpathname',
 '_getfullpathname',
 '_getvolumepathname',
 'abspath',
 'altsep',
 'basename',
 'commonpath',
 'commonprefix',
 'curdir',
 'defpath',
 'devnull',
 'dirname',
 'exists',
 'expanduser',
 'expandvars',
 'extsep',
 'genericpath',
 'getatime',
 'getctime',
 'getmtime',
 'getsize',
 'isabs',
 'isdir',
 'isfile',
 'islink',
 'ismount',
 'join',
 'lexists',
 'normcase',
 'normpath',
 'os',
 'pardir',
 'pathsep',
 'realpath',
 'relpath',
 'samefile',
 'sameopenfile',
 'samestat',
 'sep',
 'split',
 'splitdrive',
 'splitext',
 'stat',
 'supports_unicode_filenames',
 'sys']