In [1]:
import asyncio
import itertools
from collections.abc import Iterator
from typing import Any

import aiohttp

In [36]:
from unittest.mock import patch, MagicMock, AsyncMock

mock = MagicMock()
mock.__aiter__.return_value = [1, 2, 3]

In [37]:
async for i in mock:
    print(i)

1
2
3


In [56]:
with open("./urls.txt", encoding="utf-8") as filestream:
    urls = filestream.read().rstrip("\n").split("\n")

len(urls)

112

In [71]:
with open("./urls.txt", encoding="utf-8") as filestream:
    try:
        iter_file = iter(filestream)
        for _ in range(10):
            print(next(iter_file).rstrip("\n"))
    except StopIteration:
        print("done")

https://docs.python.org/3/whatsnew/3.11.html
https://docs.python.org/3/library/http.client.html
https://docs.python.org/3/library/urllib.request.html
https://artifacthub.io/
https://github.com/rsennrich/subword-nmt
https://lightning.ai/docs/pytorch/stable/notebooks/course_UvA-DL/05-transformers-and-MH-attention.html
https://github.com/gruns/icecream
https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html
https://pytorch.org/docs/stable/generated/torch.nn.Softmax.html
https://containerd.io/


In [63]:
urls[:9]

['https://docs.python.org/3/whatsnew/3.11.html',
 'https://docs.python.org/3/library/http.client.html',
 'https://docs.python.org/3/library/urllib.request.html',
 'https://artifacthub.io/',
 'https://github.com/rsennrich/subword-nmt',
 'https://lightning.ai/docs/pytorch/stable/notebooks/course_UvA-DL/05-transformers-and-MH-attention.html',
 'https://github.com/gruns/icecream',
 'https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html',
 'https://pytorch.org/docs/stable/generated/torch.nn.Softmax.html']

In [64]:
URL = "https://docs.python.org/3/whatsnew/3.11.html"
URLS = [URL] * 20


async def fetch_url(url):
    async with aiohttp.ClientSession() as session:
        async with session.get(url) as resp:
            return resp.status


async def batch_fetch(urls):
    tasks = [asyncio.create_task(fetch_url(url)) for url in urls]
    await asyncio.gather(*tasks)

In [66]:
await batch_fetch(urls[:10])
await batch_fetch(urls[11:22])

In [61]:
URL = "https://docs.python.org/3/whatsnew/3.11.html"
URLS = [URL] * 20

sem = asyncio.Semaphore(20)


async def fetch_url(url, sem):
    async with aiohttp.ClientSession() as session:
        async with sem:
            async with session.get(url) as resp:
                return resp.status


async def batch_fetch(urls, sem):
    tasks = [fetch_url(url, sem) for url in urls]
    await asyncio.gather(*tasks)

In [62]:
await batch_fetch(urls, sem)

In [47]:
async with aiohttp.ClientSession() as session:
    async with session.get(URL) as resp:
        body = await resp.read()
        print(resp)

<ClientResponse(https://docs.python.org/3/whatsnew/3.11.html) [200 OK]>
<CIMultiDictProxy('Connection': 'keep-alive', 'Content-Length': '54906', 'Server': 'nginx', 'Content-Type': 'text/html', 'Last-Modified': 'Thu, 09 Nov 2023 10:11:20 GMT', 'Etag': '"654cb048-5493b"', 'x-clacks-overhead': 'GNU Terry Pratchett', 'strict-transport-security': 'max-age=315360000; includeSubDomains; preload', 'Content-Encoding': 'gzip', 'Via': '1.1 varnish, 1.1 varnish', 'Accept-Ranges': 'bytes', 'Date': 'Thu, 09 Nov 2023 13:50:16 GMT', 'Age': '13083', 'X-Served-By': 'cache-lga21964-LGA, cache-fra-etou8220103-FRA', 'X-Cache': 'HIT, HIT', 'X-Cache-Hits': '1, 1', 'X-Timer': 'S1699537817.954127,VS0,VE1', 'Vary': 'Accept-Encoding')>



In [None]:
body

In [23]:
import argparse


parser = argparse.ArgumentParser(prog="Скрипт для асинхронной обкачки урлов")
parser.add_argument("-c", "--connections", type=int, default=10)
parser.add_argument("file", type=open)

_StoreAction(option_strings=[], dest='file', nargs=None, const=None, default=None, type=<function open at 0x7f1d5ce3f490>, choices=None, required=True, help=None, metavar=None)

In [25]:
args = parser.parse_args("-c 15 urls.txt".split())
args

Namespace(connections=15, file=<_io.TextIOWrapper name='urls.txt' mode='r' encoding='UTF-8'>)