In [9]:
from threading import Thread
import time
import requests
import concurrent.futures
import json

In [None]:

def get_wiki_page_existence(wiki_page_url, timeout=10):
    response = requests.get(url=wiki_page_url, timeout=timeout)

    page_status = "unknown"
    if response.status_code == 200:
        page_status = "exists"
    elif response.status_code == 404:
        page_status = "does not exist"

    return wiki_page_url + " - " + page_status

In [None]:
url = "https://en.wikipedia.org/wiki/Ocean"
print(get_wiki_page_existence(wiki_page_url=url))

In [None]:
wiki_page_urls = ["https://en.wikipedia.org/wiki/" + str(i) for i in range(50)]

print("Running without threads:")
without_threads_start = time.time()
for url in wiki_page_urls:
    print(get_wiki_page_existence(wiki_page_url=url))
print("Without threads time:", time.time() - without_threads_start)

In [None]:
wiki_page_urls = ["https://en.wikipedia.org/wiki/" + str(i) for i in range(50)]

print("Running threaded:")
threaded_start = time.time()
with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = []
    for url in wiki_page_urls:
        futures.append(executor.submit(get_wiki_page_existence, wiki_page_url=url))
    for future in concurrent.futures.as_completed(futures):
        print(future.result())
print("Threaded time:", time.time() - threaded_start)

In [24]:
def getBooksData(bookId, timeout=10):
    # booksData = []
    # for book in listBooks:
    #     if book%100 == 0:
            # print("{} %".format(book/100))
    response_API = requests.get('https://gutendex.com/books/{}'.format(bookId), timeout=timeout)
    #print(response_API.status_code)
    data = response_API.text
    parse_json = json.loads(data)
    if parse_json.get('detail') != None:
        print(bookId)
        pass
    # booksData.append(parse_json)
    return parse_json 

In [25]:
listBooks = [l for l in range(1,51)]
print("Running threaded:")
threaded_start = time.time()
booksData = []
with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = []
    for bookId in listBooks:
        futures.append(executor.submit(getBooksData, bookId))
    for future in concurrent.futures.as_completed(futures):
        booksData.append(future.result())
print(booksData)
print("Threaded time:", time.time() - threaded_start)

Running threaded:
{'id': 8, 'title': "Abraham Lincoln's Second Inaugural Address", 'authors': [{'name': 'Lincoln, Abraham', 'birth_year': 1809, 'death_year': 1865}], 'translators': [], 'subjects': ['Presidents -- United States -- Inaugural addresses', 'United States -- Politics and government -- 1861-1865'], 'bookshelves': ['US Civil War'], 'languages': ['en'], 'copyright': False, 'media_type': 'Text', 'formats': {'application/x-mobipocket-ebook': 'https://www.gutenberg.org/ebooks/8.kf8.images', 'application/epub+zip': 'https://www.gutenberg.org/ebooks/8.epub3.images', 'text/html; charset=us-ascii': 'https://www.gutenberg.org/files/8/8-h/8-h.htm', 'image/jpeg': 'https://www.gutenberg.org/cache/epub/8/pg8.cover.medium.jpg', 'text/plain; charset=us-ascii': 'https://www.gutenberg.org/files/8/8.txt', 'text/html': 'https://www.gutenberg.org/ebooks/8.html.images', 'text/plain': 'https://www.gutenberg.org/ebooks/8.txt.utf-8', 'application/rdf+xml': 'https://www.gutenberg.org/ebooks/8.rdf'}, '