## asynchronous or threaded - 2500 req in 2 sec with asynchronous and wait

### Requirements:

- aiohttp
- requests-futures
- requests~=2.27.1

### Terms

- Threading is for working in parallel, and async is for waiting in parallel.
- I will evaluate thee processing time to understand this.

### Threading

In [6]:
from concurrent.futures import ThreadPoolExecutor
from time import perf_counter
import requests
start = perf_counter()
urls = range(1, 25000)

def get_data(url):
    r = requests.get(f'http://127.0.0.1:8000/items/{url}')
    print(r.json())

with ThreadPoolExecutor() as executor:
    executor.map(get_data, urls)

stop = perf_counter()
print("time taken:", stop - start)

# time taken: 37.273589322998305

time taken: 39.466088334999995


## Asynchronous

In [8]:
import asyncio
from time import perf_counter
import aiohttp

async def fetch(s, url):
    async with s.get(f'http://127.0.0.1:8000/items/{url}') as r:
        if r.status != 200:
            r.raise_for_status()
        return await r.text()


async def fetch_all(s, urls):
    tasks = []
    for url in urls:
        task = asyncio.create_task(fetch(s, url))
        tasks.append(task)
    res = await asyncio.gather(*tasks)
    return res


async def main():
    urls = range(1, 25000)
    async with aiohttp.ClientSession() as session:
        htmls = await fetch_all(session, urls)
        print(htmls)


if __name__ == '__main__':
    start = perf_counter()
    asyncio.run(main())
    stop = perf_counter()
    print("time taken:", stop - start)
    # time taken: 14.692326207994483

RuntimeError: asyncio.run() cannot be called from a running event loop

### Blocking

In [7]:
import requests
from time import perf_counter
start = perf_counter()

for x in range(1, 2500):
    r = requests.get(f'http://127.0.0.1:8000/items/{x}')
    print(r.json())

stop = perf_counter()
print("time taken:", stop - start)
# time taken: 58.477935733004415

ConnectionError: HTTPConnectionPool(host='127.0.0.1', port=8000): Max retries exceeded with url: /items/1 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x79c71ca7ca50>: Failed to establish a new connection: [Errno 111] Connection refused'))

### image scrapping from web

In [9]:
import requests
from bs4 import BeautifulSoup
import os

#url = 'https://www.airbnb.co.uk/s/Ljubljana--Slovenia/homes?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&query=Ljubljana%2C%20Slovenia&place_id=ChIJ0YaYlvUxZUcRIOw_ghz4AAQ&checkin=2020-11-01&checkout=2020-11-08&source=structured_search_input_header&search_type=autocomplete_click'

def imagedown(url, folder):
    try:
        os.mkdir(os.path.join(os.getcwd(), folder))
    except:
        pass
    os.chdir(os.path.join(os.getcwd(), folder))
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    images = soup.find_all('img')
    for image in images:
        name = image['alt']
        link = image['src']
        with open(name.replace(' ', '-').replace('/', '') + '.jpg', 'wb') as f:
            im = requests.get(link)
            f.write(im.content)
            print('Writing: ', name)

imagedown('https://www.airbnb.co.uk/s/Bratislava--Slovakia/homes?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&place_id=ChIJl2HKCjaJbEcRaEOI_YKbH2M&query=Bratislava%2C%20Slovakia&checkin=2020-11-01&checkout=2020-11-22&source=search_blocks_selector_p1_flow&search_type=search_query', 'bratislava')

Writing:  
Writing:  
Writing:  
Writing:  
Writing:  
Writing:  
Writing:  
Writing:  
Writing:  
Writing:  
Writing:  
Writing:  
Writing:  
Writing:  
Writing:  
Writing:  
Writing:  


KeyError: 'alt'