# Programmation concurrente

In [59]:
from concurrent.futures import ThreadPoolExecutor, as_completed, Future
import os
import requests
import time # sleep
import random

## Module: concurrent.futures
- ordonnanceur de tâches utilisant un pool de process ou de thread (choix taille)
- objet Future gerant le job et vson futur résultat

In [12]:
cpu_count = os.cpu_count()
process_cpu_count = os.process_cpu_count() # même fonction que cpu_count
# default_workers =  min(32, cpu_count + 4) # Python 3.5+
default_workers =  min(32, (process_cpu_count or 1) + 4) # Python 3.13+
print('Nb coeurs:', cpu_count)
print('Nb workers:', default_workers)

Nb coeurs: 16
Nb workers: 20


In [25]:
pool = ThreadPoolExecutor(max_workers=5)
pool

<concurrent.futures.thread.ThreadPoolExecutor at 0x1f5cae60690>

In [29]:
pool.shutdown()

In [15]:
urls = [
    "https://docs.python.org/3/library/concurrent.futures.html",
    "https://numpy.org/doc/stable/reference/index.html",
    "https://pandas.pydata.org/docs/reference/index.html",
    "https://nourl.nourl"
]
    

In [16]:
r = requests.get(urls[0])
r

<Response [200]>

In [17]:
r.status_code

200

In [18]:
r.text



In [34]:
def get_url_v0(url):
    try:
        r = requests.get(url)
        if r.status_code == 200:
            print('OK:', r.text[:50])
        else:
            print(f'Error: {r.status_code}')
    except:
        print(f'Error: exception')

In [80]:
random.randint(2,10)

3

In [82]:
time.sleep(random.randint(2,10))

In [83]:
def get_url(url):
    time.sleep(random.randint(2,10))
    try:
        r = requests.get(url)
        if r.status_code == 200:
            return r.text
        else:
            return None
    except:
        return None

In [44]:
# main thread
with ThreadPoolExecutor(max_workers=5) as pool:
    jobs = [ 
        pool.submit(get_url, url) # started in a different thread
        for url in urls
    ]
    print(jobs)

    # attente explicite (avec timeout eventuel):
    results = [ job.result() for job in jobs ]
        
# __exit__ : pool.shutdown() # defaut: wait sur tous les jobs
print(jobs)
# results = [ job.result() for job in jobs ] # sans attente
[ (r[:10] if r is not None else 'KO') for r in results ]

[<Future at 0x1f5ccb0ac50 state=running>, <Future at 0x1f5ccb08dd0 state=running>, <Future at 0x1f5ccb0b2d0 state=running>, <Future at 0x1f5ccb0bc50 state=running>]
[<Future at 0x1f5ccb0ac50 state=finished returned str>, <Future at 0x1f5ccb08dd0 state=finished returned str>, <Future at 0x1f5ccb0b2d0 state=finished returned str>, <Future at 0x1f5ccb0bc50 state=finished returned NoneType>]


['<!DOCTYPE ', '\n<!DOCTYPE', '\n<!DOCTYPE', 'KO']

In [48]:
# main thread
with ThreadPoolExecutor(max_workers=5) as pool:
    results = list(pool.map(get_url, urls)) # list consomme les résultats et fait l'attente
[ (r[:10] if r is not None else 'KO') for r in results ]

['<!DOCTYPE ', '\n<!DOCTYPE', '\n<!DOCTYPE', 'KO']

### Traitement au fil de l'eau
iterateur: `as_completed`

In [86]:
doc_sources = {
    "python": "https://docs.python.org/3/library/concurrent.futures.html",
    "numpy": "https://numpy.org/doc/stable/reference/index.html",
    "pandas": "https://pandas.pydata.org/docs/reference/index.html",
    "dummy": "https://nourl.nourl",
    "requests": "https://pypi.org/project/requests/",
    "beautiful soup": "https://tedboy.github.io/bs4_doc",
    "scrapy": "https://docs.scrapy.org/en/latest/",
}

In [87]:
with ThreadPoolExecutor(max_workers=3) as pool:
    job_dict: dict[Future, str] = {}
    for doc_name, url in doc_sources.items():
        job = pool.submit(get_url, url)
        job_dict[job] = doc_name
    print('Jobs started:', job_dict)

    # traiter les jobs dans l'ordre de finition
    for job in as_completed(job_dict.keys()):
        source = job_dict[job]
        print(f'Finished [{source}: {job}')

Jobs started: {<Future at 0x1f5cc6347d0 state=running>: 'python', <Future at 0x1f5cc6353d0 state=running>: 'numpy', <Future at 0x1f5cc5e87d0 state=running>: 'pandas', <Future at 0x1f5cc634750 state=pending>: 'dummy', <Future at 0x1f5cc636cd0 state=pending>: 'requests', <Future at 0x1f5cc5eb950 state=pending>: 'beautiful soup', <Future at 0x1f5cc5ea250 state=pending>: 'scrapy'}
Finished [numpy: <Future at 0x1f5cc6353d0 state=finished returned str>
Finished [pandas: <Future at 0x1f5cc5e87d0 state=finished returned str>
Finished [requests: <Future at 0x1f5cc636cd0 state=finished returned str>
Finished [python: <Future at 0x1f5cc6347d0 state=finished returned str>
Finished [dummy: <Future at 0x1f5cc634750 state=finished returned NoneType>
Finished [beautiful soup: <Future at 0x1f5cc5eb950 state=finished returned str>
Finished [scrapy: <Future at 0x1f5cc5ea250 state=finished returned str>
