<a href="https://www.kaggle.com/code/nigamshitij/parse-book-reviews-using-duckduckgo?scriptVersionId=195239794" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [18]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/book-review-samples/goodreads_fantasy.csv
/kaggle/input/book-review-samples/goodreads_all_genres.csv
/kaggle/input/book-review-samples/Darth_Plagueis_reviews(4).csv
/kaggle/input/book-review-samples/goodreads_all_genres_final.csv
/kaggle/input/book-review-samples/Dune_Book_1_reviews.csv
/kaggle/input/book-review-samples/Dune_Book_1_reviews(4).csv
/kaggle/input/book-review-samples/goodreads_genres_complete.csv


In [19]:
pip install duckduckgo-search

Note: you may need to restart the kernel to use updated packages.


In [21]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import time
import random
import re
import html
from duckduckgo_search import DDGS
import glob
import csv
from tqdm import tqdm

In [22]:
# boilerplate removal

def remove_boilerplate(text):
    boilerplate = ['cookie policy', 'privacy policy', 'terms of service', 'all rights reserved', '\n']
    for phrase in boilerplate:
        text = re.sub(r'(?i)' + re.escape(phrase) + r'.*', '', text)
    return text

In [23]:
# comment truncation (20% threshold default)

def truncate_at_comments(review_text, threshold_percentage=20, min_review_length=100):
    comment_identifiers = [
        'Comments', 'Leave a comment', 'Reader comments', 
        'What do you think?', 'Join the discussion', 'Add a comment',
        'Post a comment', 'Write a comment', 'Show comments'
    ]
    
    lower_text = review_text.lower()
    text_length = len(lower_text)
    threshold = max(int(text_length * (threshold_percentage / 100)), min_review_length)

    # Check for comment identifiers
    for identifier in comment_identifiers:
        index = lower_text.find(identifier.lower())
        if index != -1 and index > threshold:
            return review_text[:index].strip()
    
    # If no identifiers found, try to detect comment-like structures
    paragraphs = review_text.split('\n\n')
    filtered_paragraphs = []
    
    for paragraph in paragraphs:
        # Skip short paragraphs that might be comments
        if len(paragraph) < 50:
            continue
        
        # Skip paragraphs that start with common comment patterns
        if re.match(r'^(Posted by|From|User|Anonymous|[\d/]+:)', paragraph.strip()):
            continue
        
        filtered_paragraphs.append(paragraph)
    
    # If we've removed some paragraphs, join the remaining ones
    if len(filtered_paragraphs) < len(paragraphs):
        return '\n\n'.join(filtered_paragraphs).strip()
    
    # If we haven't removed any paragraphs, return at least the first part of the text
    return review_text[:max(threshold, len(review_text) // 2)].strip()

In [24]:
# remove comment classes

def remove_comments(review_soup):
    # Common class names for comment sections
    comment_classes = ['comment', 'comments-list', 'comments-area', 'comments', 'comment-section', 'user-comments', 'disqus_thread']
    
    for class_name in comment_classes:
        comment_section = review_soup.find('div', class_=class_name)
        if comment_section:
            comment_section.decompose()  # This removes the element from the soup
    
    return review_soup

In [25]:
# paragraph scoring for review content

def score_paragraph(paragraph):
    review_keywords = ['review', 'book', 'read', 'author', 'story', 'character', 'plot', 'recommend']
    return sum(keyword in paragraph.lower() for keyword in review_keywords)

In [26]:
# add retries with timeouts selectively

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from urllib3.exceptions import ReadTimeoutError
from bs4 import BeautifulSoup
import time
import random

def create_session_with_retries():
    session = requests.Session()
    retries = Retry(total=5, 
                    backoff_factor=1, 
                    status_forcelist=[429, 500, 502, 503, 504],
                    allowed_methods=["HEAD", "GET", "OPTIONS"])
    adapter = HTTPAdapter(max_retries=retries)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

In [27]:
exceptions = ('google.', 'goodreads.com', 'amazon.', 'reddit.com', 'thestorygraph', 'youtube','.tv', 'barnesandnoble.com','wikipedia.','quora.com','sparknotes.com', 'grimdarkmagazine.', 'washingtonpost.')

def process_review_url(review_url, headers):
    reviews = []
    # Skip known non-review sites
    # if any(site in review_url for site in ['google.com', 'wikipedia.org']):
    if any(site in review_url.lower() for site in exceptions):
        return reviews
    
    session = create_session_with_retries()
    
    try:
        # Fetch the review page
        # review_response = requests.get(review_url, headers=headers, timeout=10)
        review_response = session.get(review_url, headers=headers, timeout=10)  # Increased timeout to 20 seconds
        review_soup = BeautifulSoup(review_response.text, 'html.parser')
        
        # Remove Comments
        review_soup = remove_comments(review_soup)
        
        # Review elements
        review_elements = review_soup.find_all(['main','p'])
        scored_paragraphs = [(elem, score_paragraph(elem.text)) for elem in review_elements]
        
        # Extract review text
        review_paragraphs = [elem.text for elem, score in sorted(scored_paragraphs, key=lambda x: x[1], reverse=True)[:3]]
        
        # Search for relevant text
        review_text = ' '.join(review_paragraphs)
        
        # Remove boilerplate
        review_text = review_text.replace('\n', ' ')
        review_text = remove_boilerplate(review_text)
        
        # Truncate at Comments
        review_text = truncate_at_comments(review_text)
        
        review_date = 'Unknown'
        
        reviews.append({
            'review_text': review_text[:5000],  # Limit to first 5000 characters
            'review_date': review_date,
            'review_website': review_url
        })
        
    except ReadTimeoutError as e:
        print(f"Read timeout error for {review_url}: {str(e)}")
    except requests.exceptions.RequestException as e:
        print(f"Error processing {review_url}: {str(e)}")
    except Exception as e:
        print(f"Unexpected error processing {review_url}: {str(e)}")
    
    # Be polite to servers
    time.sleep(random.uniform(1, 3))
    
    return reviews

In [28]:
# search_book_review test

def search_book_reviews(book_name, author):
    # Combine book name and author for search query
    search_query = f"{book_name} {author} book review"
    
    # print(f"Searching for: {search_query}")
    
    reviews = []
    
    # Define headers
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
    
    # Use DDGS for searching
    with DDGS() as ddgs:
        results = ddgs.text(search_query, max_results=10)  # Adjust max_results as needed
        
        for result in results:
            review_url = result['href']
            reviews.extend(process_review_url(review_url, headers))
    
    # Create DataFrame
    df = pd.DataFrame(reviews)
    
    return df

In [29]:
# # Example usage
# book_name = "Darth Plagueis"
# author = "James Luceno"

# df = search_book_reviews(book_name, author)

# # Save to CSV
# df.to_csv(f"{book_name.replace(' ', '_')}_reviews.csv", index=False)
# print(f"Reviews saved to {book_name.replace(' ', '_')}_reviews.csv")

# df.head()

In [30]:
# input_csv = '/kaggle/input/book-review-samples/goodreads_all_genres_final.csv'
# output_csv = 'book_reviews.csv'

input_csv = '/kaggle/input/book-review-samples/goodreads_all_genres_final.csv'
output_directory = '/kaggle/working/'
output_csv = 'all_book_reviews.csv'

df = pd.read_csv(input_csv)
df_unique = df.drop_duplicates(subset=['Title', 'Authors'], keep='first')
num_duplicates = len(df) - len(df_unique)
num_total = len(df)
print(f"Removed {num_duplicates} duplicate entries out of {num_total}")

Removed 0 duplicate entries out of 35597


In [31]:
def process_book_list_old(input_csv, output_csv):
    # Read the input CSV
    df = pd.read_csv(input_csv)
    
    # remove duplicates
    df_unique = df.drop_duplicates(subset=['Title', 'Authors'], keep='first')
    
    # Print information about removed duplicates
    num_duplicates = len(df) - len(df_unique)
    num_total = len(df)
    print(f"Removed {num_duplicates} duplicate entries out of {num_total}")
    
    all_reviews = []
    
    # Iterate through each row in the dataframe
    for index, row in tqdm(df_unique.iterrows(), total=df_unique.shape[0], desc="Processing books"):
        title = row['Title']
        authors = row['Authors']
        
        try:
            # Get reviews for this book
            reviews_df = search_book_reviews(title, authors)
            
            # Add book information to each review
            reviews_df['Title'] = title
            reviews_df['Authors'] = authors
            reviews_df['Avg Ratings'] = row['Avg Ratings']
            reviews_df['Rating'] = row['Rating']
            reviews_df['Published_year'] = row['Published_year']
            
            all_reviews.append(reviews_df)
        
        except Exception as e:
            print(f"Error processing {title} by {authors}: {str(e)}")
    
    # Combine all reviews into a single dataframe
    if all_reviews:
        final_df = pd.concat(all_reviews, ignore_index=True)
        
        # Save to CSV
        final_df.to_csv(output_csv, index=False, quoting=csv.QUOTE_ALL)
        print(f"Reviews saved to {output_csv}")
    else:
        print("No reviews were collected.")

In [32]:
def process_book_list_old_2(input_csv, output_csv):
    # Read the input CSV
    df = pd.read_csv(input_csv)
    
    # remove duplicates
    df_unique = df.drop_duplicates(subset=['Title', 'Authors'], keep='first')
    
    # Print information about removed duplicates
    num_duplicates = len(df) - len(df_unique)
    num_total = len(df)
    print(f"Removed {num_duplicates} duplicate entries out of {num_total}")
    
    all_reviews = []
    batch_size = 10
    batch_count = 0
    
    # Iterate through each row in the dataframe
    for index, row in tqdm(df_unique.iterrows(), total=df_unique.shape[0], desc="Processing books"):
        title = row['Title']
        authors = row['Authors']
        
        try:
            # Get reviews for this book
            reviews_df = search_book_reviews(title, authors)
            
            # Add book information to each review
            reviews_df['Title'] = title
            reviews_df['Authors'] = authors
            reviews_df['Avg Ratings'] = row['Avg Ratings']
            reviews_df['Rating'] = row['Rating']
            reviews_df['Published_year'] = row['Published_year']
            
            all_reviews.append(reviews_df)
        
        except Exception as e:
            print(f"Error processing {title} by {authors}: {str(e)}")
        
        # Save batch to CSV every 10 books
        if len(all_reviews) == batch_size:
            batch_df = pd.concat(all_reviews, ignore_index=True)
            batch_filename = f"/kaggle/working/batch_{batch_count}.csv"
            batch_df.to_csv(
                batch_filename, 
                index=False, 
                quoting=csv.QUOTE_ALL,
                escapechar='\\',
                doublequote=False
            )
            print(f"Batch {batch_count} saved to {batch_filename}")
            all_reviews = []
            batch_count += 1
    
    # Save any remaining reviews
    if all_reviews:
        batch_df = pd.concat(all_reviews, ignore_index=True)
        batch_filename = f"/kaggle/working/batch_{batch_count}.csv"
        batch_df.to_csv(batch_filename, index=False, quoting=csv.QUOTE_ALL)
        print(f"Final batch saved to {batch_filename}")
    
    # Combine all batches
    all_files = glob.glob("/kaggle/working/batch_*.csv")
    combined_df = pd.concat((pd.read_csv(f) for f in all_files), ignore_index=True)
    
    # Save combined results
    combined_df.to_csv(
        output_csv, 
        index=False, 
        quoting=csv.QUOTE_ALL,
        escapechar='\\',
        doublequote=False
    )
    print(f"All reviews combined and saved to {output_csv}")
    
    # Optionally, remove batch files
    for f in all_files:
        os.remove(f)
    print("Batch files removed")

In [33]:
def process_book_list(input_csv, output_csv):
    # Read the input CSV
    df = pd.read_csv(input_csv)
    
    # Remove duplicates
    df_unique = df.drop_duplicates(subset=['Title', 'Authors'], keep='first')
    
    # Print information about removed duplicates
    num_duplicates = len(df) - len(df_unique)
    num_total = len(df)
    print(f"Removed {num_duplicates} duplicate entries out of {num_total}")
    
    # Find the last processed batch
    existing_batches = glob.glob("/kaggle/working/batch_*.csv")
    if existing_batches:
        last_batch = max(existing_batches, key=os.path.getctime)
        last_batch_number = int(last_batch.split('_')[1].split('.')[0])
        start_index = (last_batch_number + 1) * 10
    else:
        start_index = 0
    
    print(f"Resuming processing from index {start_index}")
    
    batch_size = 10
    batch_count = start_index // 10
    
    # Iterate through each row in the dataframe, starting from start_index
    for index in tqdm(range(start_index, len(df_unique)), total=len(df_unique)-start_index, desc="Processing books"):
        row = df_unique.iloc[index]
        title = row['Title']
        authors = row['Authors']
        
        try:
            # Get reviews for this book
            reviews_df = search_book_reviews(title, authors)
            
            # Add book information to each review
            reviews_df['Title'] = title
            reviews_df['Authors'] = authors
            reviews_df['Avg Ratings'] = row['Avg Ratings']
            reviews_df['Rating'] = row['Rating']
            reviews_df['Published_year'] = row['Published_year']
            
            # Save batch to CSV every 10 books
            if (index + 1) % batch_size == 0 or index == len(df_unique) - 1:
                batch_filename = f"/kaggle/working/batch_{batch_count}.csv"
                reviews_df.to_csv(batch_filename, index=False, quoting=csv.QUOTE_ALL, escapechar='\\', doublequote=False)
                print(f"Batch {batch_count} saved to {batch_filename}")
                batch_count += 1
        
        except Exception as e:
            print(f"Error processing {title} by {authors}: {str(e)}")
    
    # Combine all batches
    all_files = glob.glob("/kaggle/working/batch_*.csv")
    combined_df = pd.concat((pd.read_csv(f) for f in all_files), ignore_index=True)
    
    # Save combined results
    combined_df.to_csv(output_csv, index=False, quoting=csv.QUOTE_ALL, escapechar='\\', doublequote=False)
    print(f"All reviews combined and saved to {output_csv}")
    
    # Optionally, remove batch files
    for f in all_files:
        os.remove(f)
    print("Batch files removed")

In [None]:
# Usage
process_book_list(input_csv, output_csv)

Removed 0 duplicate entries out of 35597
Resuming processing from index 920


Processing books:   0%|          | 10/34677 [02:08<103:44:25, 10.77s/it]

Batch 92 saved to /kaggle/working/batch_92.csv


Processing books:   0%|          | 20/34677 [04:35<160:16:01, 16.65s/it]

Batch 93 saved to /kaggle/working/batch_93.csv


Processing books:   0%|          | 30/34677 [07:12<150:12:35, 15.61s/it]

Batch 94 saved to /kaggle/working/batch_94.csv


Processing books:   0%|          | 40/34677 [08:39<65:03:23,  6.76s/it] 

Batch 95 saved to /kaggle/working/batch_95.csv


Processing books:   0%|          | 44/34677 [09:16<78:49:43,  8.19s/it]

Error processing https://beyond8figures.com/book-review/financial-intelligence-by-karen-berman-joe-knight-and-john-case/: HTTPSConnectionPool(host='beyond8figures.com', port=443): Max retries exceeded with url: /book-review/financial-intelligence-by-karen-berman-joe-knight-and-john-case/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing https://execleadercoach.com/2010/01/12/book-review-financial-intelligence-karen-berman-joe-knight/: HTTPSConnectionPool(host='execleadercoach.com', port=443): Max retries exceeded with url: /2010/01/12/book-review-financial-intelligence-karen-berman-joe-knight/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   0%|          | 50/34677 [11:46<149:24:15, 15.53s/it]

Batch 96 saved to /kaggle/working/batch_96.csv


Processing books:   0%|          | 60/34677 [13:18<76:19:43,  7.94s/it] 

Batch 97 saved to /kaggle/working/batch_97.csv


Processing books:   0%|          | 63/34677 [13:56<116:11:30, 12.08s/it]

Error processing https://koha.ucu.edu.ph/cgi-bin/koha/opac-detail.pl?biblionumber=236: HTTPSConnectionPool(host='koha.ucu.edu.ph', port=443): Max retries exceeded with url: /cgi-bin/koha/opac-detail.pl?biblionumber=236 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='koha.ucu.edu.ph', port=443): Read timed out. (read timeout=10)"))


Processing books:   0%|          | 70/34677 [17:09<144:15:27, 15.01s/it]

Batch 98 saved to /kaggle/working/batch_98.csv


Processing books:   0%|          | 77/34677 [18:14<114:44:24, 11.94s/it]

Error processing https://koha.ucu.edu.ph/cgi-bin/koha/opac-detail.pl?biblionumber=1111: HTTPSConnectionPool(host='koha.ucu.edu.ph', port=443): Max retries exceeded with url: /cgi-bin/koha/opac-detail.pl?biblionumber=1111 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='koha.ucu.edu.ph', port=443): Read timed out. (read timeout=10)"))


Processing books:   0%|          | 80/34677 [20:55<279:06:23, 29.04s/it]

Batch 99 saved to /kaggle/working/batch_99.csv


Processing books:   0%|          | 81/34677 [21:05<223:31:47, 23.26s/it]

Error processing https://www.library.cbn.gov.ng:8088/cgi-bin/koha/opac-detail.pl?biblionumber=7112: HTTPSConnectionPool(host='www.library.cbn.gov.ng', port=8088): Max retries exceeded with url: /cgi-bin/koha/opac-detail.pl?biblionumber=7112 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')))
Error processing https://www.library.cbn.gov.ng/vufind/Record/18184: HTTPSConnectionPool(host='www.library.cbn.gov.ng', port=443): Max retries exceeded with url: /vufind/Record/18184 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')))
Error processing https://library.cbn.gov.ng/vufind/Record/7538/Details: HTTPSConnectionPool(host='library.cbn.gov.ng', port=443): Max retries exceeded with url: /vufind/Record/7538/Details (Caused by SSLError(SSLCertVerificationError(1, '[SSL: 

Processing books:   0%|          | 90/34677 [25:00<157:15:56, 16.37s/it]

Batch 100 saved to /kaggle/working/batch_100.csv


Processing books:   0%|          | 100/34677 [28:01<156:54:53, 16.34s/it]

Batch 101 saved to /kaggle/working/batch_101.csv


Processing books:   0%|          | 110/34677 [30:38<156:09:02, 16.26s/it]

Batch 102 saved to /kaggle/working/batch_102.csv


Processing books:   0%|          | 120/34677 [32:23<129:33:28, 13.50s/it]

Batch 103 saved to /kaggle/working/batch_103.csv


Processing books:   0%|          | 130/34677 [33:49<72:46:40,  7.58s/it] 

Batch 104 saved to /kaggle/working/batch_104.csv


Processing books:   0%|          | 140/34677 [34:58<69:26:05,  7.24s/it]

Batch 105 saved to /kaggle/working/batch_105.csv


Processing books:   0%|          | 150/34677 [36:17<78:47:31,  8.22s/it]

Batch 106 saved to /kaggle/working/batch_106.csv


Processing books:   0%|          | 160/34677 [38:28<106:36:52, 11.12s/it]

Batch 107 saved to /kaggle/working/batch_107.csv
Error processing https://carolelindstrom.com/books/we-are-water-protectors/: HTTPSConnectionPool(host='carolelindstrom.com', port=443): Max retries exceeded with url: /books/we-are-water-protectors/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   0%|          | 170/34677 [41:28<142:56:40, 14.91s/it]

Batch 108 saved to /kaggle/working/batch_108.csv


Processing books:   1%|          | 180/34677 [44:04<149:27:36, 15.60s/it]

Batch 109 saved to /kaggle/working/batch_109.csv


Processing books:   1%|          | 184/34677 [44:46<108:49:44, 11.36s/it]

Error processing http://paperbacksocial.com/2018/06/16/no-logo-naomi-klein/: HTTPConnectionPool(host='paperbacksocial.com', port=80): Max retries exceeded with url: /2018/06/16/no-logo-naomi-klein/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   1%|          | 190/34677 [47:03<184:50:19, 19.29s/it]

Batch 110 saved to /kaggle/working/batch_110.csv


Processing books:   1%|          | 191/34677 [47:12<155:33:38, 16.24s/it]

Error processing https://www.frontlist.in/reviews/thirteen-reasons-why-by-jay-asher-book-review: HTTPSConnectionPool(host='www.frontlist.in', port=443): Max retries exceeded with url: /reviews/thirteen-reasons-why-by-jay-asher-book-review (Caused by ResponseError('too many 500 error responses'))


Processing books:   1%|          | 198/34677 [50:14<167:51:24, 17.53s/it]

Error processing https://popgoesthereader.com/review-sharp-objects-by-gillian-flynn/: HTTPSConnectionPool(host='popgoesthereader.com', port=443): Max retries exceeded with url: /review-sharp-objects-by-gillian-flynn/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   1%|          | 200/34677 [51:21<227:11:32, 23.72s/it]

Batch 111 saved to /kaggle/working/batch_111.csv


Processing books:   1%|          | 210/34677 [53:46<111:32:35, 11.65s/it]

Batch 112 saved to /kaggle/working/batch_112.csv


Processing books:   1%|          | 212/34677 [54:12<122:10:10, 12.76s/it]

Error processing https://www.erstrategies.org/news/book-review-math-curse/: HTTPSConnectionPool(host='www.erstrategies.org', port=443): Max retries exceeded with url: /news/book-review-math-curse/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   1%|          | 220/34677 [56:38<137:23:17, 14.35s/it]

Batch 113 saved to /kaggle/working/batch_113.csv


Processing books:   1%|          | 230/34677 [58:33<116:22:35, 12.16s/it]

Batch 114 saved to /kaggle/working/batch_114.csv


Processing books:   1%|          | 240/34677 [1:00:44<134:32:56, 14.07s/it]

Batch 115 saved to /kaggle/working/batch_115.csv


Processing books:   1%|          | 242/34677 [1:00:57<96:39:09, 10.10s/it] 

Error processing https://www.flipkart.com/new-enjoying-mathematics-class-5/p/itmeshb8xc8u4sat: HTTPSConnectionPool(host='www.flipkart.com', port=443): Max retries exceeded with url: /new-enjoying-mathematics-class-5/p/itmeshb8xc8u4sat (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.flipkart.com', port=443): Read timed out. (read timeout=10)"))
Error processing https://www.flipkart.com/new-enjoying-mathematics-class-5/p/itm3e79801c681ae?pid=9786630091281&lid=LSTBOK9786630091281FVVUSV&marketplace=FLIPKART: HTTPSConnectionPool(host='www.flipkart.com', port=443): Max retries exceeded with url: /new-enjoying-mathematics-class-5/p/itm3e79801c681ae?pid=9786630091281&lid=LSTBOK9786630091281FVVUSV&marketplace=FLIPKART (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.flipkart.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   1%|          | 250/34677 [1:05:39<136:17:25, 14.25s/it]

Batch 116 saved to /kaggle/working/batch_116.csv


Processing books:   1%|          | 260/34677 [1:07:17<77:06:17,  8.07s/it] 

Batch 117 saved to /kaggle/working/batch_117.csv


Processing books:   1%|          | 270/34677 [1:08:45<84:43:57,  8.87s/it] 

Batch 118 saved to /kaggle/working/batch_118.csv


Processing books:   1%|          | 280/34677 [1:11:12<140:58:52, 14.76s/it]

Batch 119 saved to /kaggle/working/batch_119.csv


Processing books:   1%|          | 290/34677 [1:12:53<99:02:27, 10.37s/it] 

Batch 120 saved to /kaggle/working/batch_120.csv


Processing books:   1%|          | 292/34677 [1:13:05<76:31:58,  8.01s/it]

Error processing https://catalog.library.phila.gov/Record/2671809: HTTPSConnectionPool(host='catalog.library.phila.gov', port=443): Max retries exceeded with url: /Record/2671809 (Caused by SSLError(SSLCertVerificationError(1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'catalog.library.phila.gov'. (_ssl.c:1007)")))


Processing books:   1%|          | 299/34677 [1:15:03<122:22:43, 12.82s/it]

Error processing https://www.livingwithadd.com/books/what-does-everybody-else-know-that-i-dont-social-skills-help-for-adults-with-attention-deficithyperactivity-disorder/: HTTPSConnectionPool(host='www.livingwithadd.com', port=443): Max retries exceeded with url: /books/what-does-everybody-else-know-that-i-dont-social-skills-help-for-adults-with-attention-deficithyperactivity-disorder/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   1%|          | 300/34677 [1:15:56<239:00:45, 25.03s/it]

Batch 121 saved to /kaggle/working/batch_121.csv


Processing books:   1%|          | 302/34677 [1:16:17<173:29:26, 18.17s/it]

Error processing https://boyunderthebridge.com/wp-content/uploads/2019/01/Your-Life-Can-Be-Better_-using-strategies-for-Adult-ADD_ADHD-Notebook.pdf: HTTPSConnectionPool(host='boyunderthebridge.com', port=443): Max retries exceeded with url: /wp-content/uploads/2019/01/Your-Life-Can-Be-Better_-using-strategies-for-Adult-ADD_ADHD-Notebook.pdf (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing https://www.livingwithadd.com/books/your-life-can-be-better-using-strategies-for-adult-addadhd/: HTTPSConnectionPool(host='www.livingwithadd.com', port=443): Max retries exceeded with url: /books/your-life-can-be-better-using-strategies-for-adult-addadhd/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   1%|          | 309/34677 [1:18:45<134:03:59, 14.04s/it]

Error processing https://www.livingwithadd.com/books/adult-adhd-how-to-succeed-as-a-hunter-in-a-farmers-world/: HTTPSConnectionPool(host='www.livingwithadd.com', port=443): Max retries exceeded with url: /books/adult-adhd-how-to-succeed-as-a-hunter-in-a-farmers-world/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   1%|          | 310/34677 [1:19:25<209:37:19, 21.96s/it]

Batch 122 saved to /kaggle/working/batch_122.csv


Processing books:   1%|          | 320/34677 [1:21:27<97:19:18, 10.20s/it] 

Batch 123 saved to /kaggle/working/batch_123.csv


Processing books:   1%|          | 330/34677 [1:24:35<227:14:18, 23.82s/it]

Batch 124 saved to /kaggle/working/batch_124.csv


Processing books:   1%|          | 340/34677 [1:27:03<151:46:36, 15.91s/it]

Batch 125 saved to /kaggle/working/batch_125.csv


Processing books:   1%|          | 343/34677 [1:27:55<153:50:44, 16.13s/it]

Error processing https://bookishmusings.com/review/greenglass-house-book-review/: HTTPSConnectionPool(host='bookishmusings.com', port=443): Max retries exceeded with url: /review/greenglass-house-book-review/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   1%|          | 349/34677 [1:29:53<141:03:12, 14.79s/it]

Error processing https://www.fullofbooks.com/digging-to-america-by-anne-tyler-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /digging-to-america-by-anne-tyler-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   1%|          | 350/34677 [1:31:52<441:16:03, 46.28s/it]

Batch 126 saved to /kaggle/working/batch_126.csv


Processing books:   1%|          | 360/34677 [1:34:12<137:01:25, 14.37s/it]

Batch 127 saved to /kaggle/working/batch_127.csv


Processing books:   1%|          | 370/34677 [1:36:19<131:57:53, 13.85s/it]

Batch 128 saved to /kaggle/working/batch_128.csv


Processing books:   1%|          | 380/34677 [1:38:45<163:39:38, 17.18s/it]

Batch 129 saved to /kaggle/working/batch_129.csv


Processing books:   1%|          | 390/34677 [1:41:52<102:30:40, 10.76s/it]

Batch 130 saved to /kaggle/working/batch_130.csv


Processing books:   1%|          | 400/34677 [1:44:31<131:24:05, 13.80s/it]

Batch 131 saved to /kaggle/working/batch_131.csv


Processing books:   1%|          | 410/34677 [1:46:25<129:14:46, 13.58s/it]

Batch 132 saved to /kaggle/working/batch_132.csv


Processing books:   1%|          | 420/34677 [1:48:24<114:45:58, 12.06s/it]

Batch 133 saved to /kaggle/working/batch_133.csv


Processing books:   1%|          | 430/34677 [1:50:35<123:03:14, 12.94s/it]

Batch 134 saved to /kaggle/working/batch_134.csv


Processing books:   1%|▏         | 440/34677 [1:53:08<192:14:06, 20.21s/it]

Batch 135 saved to /kaggle/working/batch_135.csv


Processing books:   1%|▏         | 450/34677 [1:55:21<141:19:44, 14.87s/it]

Batch 136 saved to /kaggle/working/batch_136.csv


Processing books:   1%|▏         | 458/34677 [1:56:50<118:43:40, 12.49s/it]

Error processing https://www.bewareofthereader.com/the-nightingale-by-kristin-hannah-review-of-an-outstanding-book-that-left-me-bereft-haunted-and-with-a-huge-book-hangover/: HTTPSConnectionPool(host='www.bewareofthereader.com', port=443): Max retries exceeded with url: /the-nightingale-by-kristin-hannah-review-of-an-outstanding-book-that-left-me-bereft-haunted-and-with-a-huge-book-hangover/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   1%|▏         | 459/34677 [1:57:42<234:08:55, 24.63s/it]

Error processing https://www.fullofbooks.com/the-vanishing-half-by-brit-bennett-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /the-vanishing-half-by-brit-bennett-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   1%|▏         | 460/34677 [1:59:36<488:19:49, 51.38s/it]

Batch 137 saved to /kaggle/working/batch_137.csv
Error processing https://popgoesthereader.com/review-landline-by-rainbow-rowell/: HTTPSConnectionPool(host='popgoesthereader.com', port=443): Max retries exceeded with url: /review-landline-by-rainbow-rowell/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing https://www.fullofbooks.com/landline-by-rainbow-rowell-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /landline-by-rainbow-rowell-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   1%|▏         | 464/34677 [2:02:33<310:26:42, 32.67s/it]

Error processing https://www.betterthandreams.com/2012/01/book-review-princess-bride-by-willia/: HTTPSConnectionPool(host='www.betterthandreams.com', port=443): Max retries exceeded with url: /2012/01/book-review-princess-bride-by-willia/ (Caused by SSLError(SSLCertVerificationError(1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'www.betterthandreams.com'. (_ssl.c:1007)")))


Processing books:   1%|▏         | 470/34677 [2:06:00<256:05:54, 26.95s/it]

Batch 138 saved to /kaggle/working/batch_138.csv


Processing books:   1%|▏         | 472/34677 [2:06:16<165:45:08, 17.45s/it]

Error processing https://kristincashore.com/books/graceling/: HTTPSConnectionPool(host='kristincashore.com', port=443): Max retries exceeded with url: /books/graceling/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   1%|▏         | 480/34677 [2:07:58<81:09:34,  8.54s/it] 

Batch 139 saved to /kaggle/working/batch_139.csv


Processing books:   1%|▏         | 490/34677 [2:09:09<51:55:39,  5.47s/it] 

Batch 140 saved to /kaggle/working/batch_140.csv


Processing books:   1%|▏         | 500/34677 [2:10:57<86:22:19,  9.10s/it] 

Batch 141 saved to /kaggle/working/batch_141.csv


Processing books:   1%|▏         | 510/34677 [2:12:29<70:42:03,  7.45s/it] 

Batch 142 saved to /kaggle/working/batch_142.csv


Processing books:   1%|▏         | 520/34677 [2:13:52<90:05:27,  9.50s/it]

Batch 143 saved to /kaggle/working/batch_143.csv


Processing books:   2%|▏         | 530/34677 [2:16:02<106:17:30, 11.21s/it]

Batch 144 saved to /kaggle/working/batch_144.csv


Processing books:   2%|▏         | 539/34677 [2:18:17<135:44:57, 14.32s/it]

Error processing https://press.jhu.edu/books/title/8516/high-speed-dreams: HTTPSConnectionPool(host='press.jhu.edu', port=443): Max retries exceeded with url: /books/title/8516/high-speed-dreams (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')))


Processing books:   2%|▏         | 540/34677 [2:19:06<235:37:51, 24.85s/it]

Batch 145 saved to /kaggle/working/batch_145.csv


Processing books:   2%|▏         | 550/34677 [2:20:48<114:00:49, 12.03s/it]

Batch 146 saved to /kaggle/working/batch_146.csv


Processing books:   2%|▏         | 560/34677 [2:22:48<155:01:42, 16.36s/it]

Batch 147 saved to /kaggle/working/batch_147.csv


Processing books:   2%|▏         | 570/34677 [2:25:24<169:04:07, 17.85s/it]

Batch 148 saved to /kaggle/working/batch_148.csv


Processing books:   2%|▏         | 580/34677 [2:27:28<136:18:02, 14.39s/it]

Batch 149 saved to /kaggle/working/batch_149.csv


Processing books:   2%|▏         | 590/34677 [2:29:48<134:13:18, 14.18s/it]

Batch 150 saved to /kaggle/working/batch_150.csv


Processing books:   2%|▏         | 599/34677 [2:32:15<110:13:53, 11.64s/it]

Error processing https://staging.cambridge-biomedical.com/form-library/Resources/download/a_bend_in_the_river_vs_naipaul.pdf: HTTPSConnectionPool(host='staging.cambridge-biomedical.com', port=443): Max retries exceeded with url: /form-library/Resources/download/a_bend_in_the_river_vs_naipaul.pdf (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e961273fc70>, 'Connection to staging.cambridge-biomedical.com timed out. (connect timeout=10)'))
Error processing https://birfph.org/public/scholarship/Documents/a_bend_in_the_river_vs_naipaul.pdf: HTTPSConnectionPool(host='birfph.org', port=443): Max retries exceeded with url: /public/scholarship/Documents/a_bend_in_the_river_vs_naipaul.pdf (Caused by ReadTimeoutError("HTTPSConnectionPool(host='birfph.org', port=443): Read timed out. (read timeout=10)"))
Error processing https://prod2.galleries.thebarnyardstore.com/public/Resources/Documents/A_Bend_In_The_River_Vs_Naipaul.pdf: HTTPSConnectionPool(host='prod2.galler

Processing books:   2%|▏         | 600/34677 [2:37:13<924:23:30, 97.66s/it]

Batch 151 saved to /kaggle/working/batch_151.csv


Processing books:   2%|▏         | 610/34677 [2:39:29<127:55:09, 13.52s/it]

Batch 152 saved to /kaggle/working/batch_152.csv


Processing books:   2%|▏         | 620/34677 [2:42:27<212:02:56, 22.41s/it]

Batch 153 saved to /kaggle/working/batch_153.csv


Processing books:   2%|▏         | 630/34677 [2:45:21<154:41:28, 16.36s/it]

Batch 154 saved to /kaggle/working/batch_154.csv


Processing books:   2%|▏         | 640/34677 [2:48:02<132:17:34, 13.99s/it]

Batch 155 saved to /kaggle/working/batch_155.csv


Processing books:   2%|▏         | 650/34677 [2:50:04<123:32:47, 13.07s/it]

Batch 156 saved to /kaggle/working/batch_156.csv


Processing books:   2%|▏         | 652/34677 [2:50:25<111:58:56, 11.85s/it]

Error processing https://www.kirkusreviews.com/book-reviews/kim-mclarin/james-baldwins-another-country/: HTTPSConnectionPool(host='www.kirkusreviews.com', port=443): Max retries exceeded with url: /book-reviews/kim-mclarin/james-baldwins-another-country/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.kirkusreviews.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   2%|▏         | 659/34677 [2:54:00<192:28:10, 20.37s/it]

Error processing https://paperbacksocial.com/2021/01/30/the-women-of-brewster-place-by-gloria-naylor-review/: HTTPSConnectionPool(host='paperbacksocial.com', port=443): Max retries exceeded with url: /2021/01/30/the-women-of-brewster-place-by-gloria-naylor-review/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   2%|▏         | 660/34677 [2:54:58<297:07:11, 31.44s/it]

Batch 157 saved to /kaggle/working/batch_157.csv
Error processing https://www.fullofbooks.com/love-by-toni-morrison-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /love-by-toni-morrison-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   2%|▏         | 667/34677 [2:58:48<221:06:02, 23.40s/it]

Error processing https://www.beverlyjenkins.net/books/old-west-series/forbidden/: HTTPSConnectionPool(host='www.beverlyjenkins.net', port=443): Max retries exceeded with url: /books/old-west-series/forbidden/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   2%|▏         | 670/34677 [3:00:21<235:29:38, 24.93s/it]

Batch 158 saved to /kaggle/working/batch_158.csv


Processing books:   2%|▏         | 680/34677 [3:02:30<118:43:24, 12.57s/it]

Batch 159 saved to /kaggle/working/batch_159.csv


Processing books:   2%|▏         | 690/34677 [3:04:33<113:40:43, 12.04s/it]

Batch 160 saved to /kaggle/working/batch_160.csv


Processing books:   2%|▏         | 700/34677 [3:06:07<99:25:59, 10.54s/it] 

Batch 161 saved to /kaggle/working/batch_161.csv


Processing books:   2%|▏         | 705/34677 [3:07:07<104:50:37, 11.11s/it]

Error processing https://reviewmeta.com/amazon/B000SEHCWM: HTTPSConnectionPool(host='reviewmeta.com', port=443): Max retries exceeded with url: /amazon/B000SEHCWM (Caused by ReadTimeoutError("HTTPSConnectionPool(host='reviewmeta.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   2%|▏         | 708/34677 [3:09:09<222:16:04, 23.56s/it]

Error processing https://www.beverlyjenkins.net/books/women-who-dare-series/rebel/: HTTPSConnectionPool(host='www.beverlyjenkins.net', port=443): Max retries exceeded with url: /books/women-who-dare-series/rebel/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing https://www.beverlyjenkins.net/books/women-who-dare-series/: HTTPSConnectionPool(host='www.beverlyjenkins.net', port=443): Max retries exceeded with url: /books/women-who-dare-series/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   2%|▏         | 710/34677 [3:10:25<263:14:00, 27.90s/it]

Batch 162 saved to /kaggle/working/batch_162.csv


Processing books:   2%|▏         | 715/34677 [3:11:18<117:51:22, 12.49s/it]

Error processing https://www.kirandellimore.com/book-review-a-grain-of-wheat-ngugi-wa-thiongo/: HTTPSConnectionPool(host='www.kirandellimore.com', port=443): Max retries exceeded with url: /book-review-a-grain-of-wheat-ngugi-wa-thiongo/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   2%|▏         | 720/34677 [3:13:03<137:47:00, 14.61s/it]

Batch 163 saved to /kaggle/working/batch_163.csv


Processing books:   2%|▏         | 730/34677 [3:15:26<170:25:58, 18.07s/it]

Batch 164 saved to /kaggle/working/batch_164.csv
Error processing https://www.kirandellimore.com/book-review-julys-people-nadine-gordimer/: HTTPSConnectionPool(host='www.kirandellimore.com', port=443): Max retries exceeded with url: /book-review-julys-people-nadine-gordimer/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   2%|▏         | 736/34677 [3:17:16<159:55:58, 16.96s/it]

Error processing https://paperbacksocial.com/2022/01/19/woman-at-point-zero-by-nawal-el-saadawi-book-review/: HTTPSConnectionPool(host='paperbacksocial.com', port=443): Max retries exceeded with url: /2022/01/19/woman-at-point-zero-by-nawal-el-saadawi-book-review/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   2%|▏         | 740/34677 [3:18:31<143:25:25, 15.21s/it]

Batch 165 saved to /kaggle/working/batch_165.csv


Processing books:   2%|▏         | 750/34677 [3:20:38<120:48:20, 12.82s/it]

Batch 166 saved to /kaggle/working/batch_166.csv


Processing books:   2%|▏         | 757/34677 [3:22:09<118:13:00, 12.55s/it]

Error processing https://diversebooks.org/qa-with-mark-oshiro-the-insiders/: HTTPSConnectionPool(host='diversebooks.org', port=443): Max retries exceeded with url: /qa-with-mark-oshiro-the-insiders/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   2%|▏         | 760/34677 [3:23:27<180:52:54, 19.20s/it]

Batch 167 saved to /kaggle/working/batch_167.csv


Processing books:   2%|▏         | 770/34677 [3:26:13<135:32:08, 14.39s/it]

Batch 168 saved to /kaggle/working/batch_168.csv


Processing books:   2%|▏         | 780/34677 [3:28:31<101:25:39, 10.77s/it]

Batch 169 saved to /kaggle/working/batch_169.csv


Processing books:   2%|▏         | 790/34677 [3:31:03<184:16:15, 19.58s/it]

Batch 170 saved to /kaggle/working/batch_170.csv


Processing books:   2%|▏         | 800/34677 [3:33:10<86:57:21,  9.24s/it] 

Batch 171 saved to /kaggle/working/batch_171.csv
Error processing https://www.tagari-usa.com/product/permaculture-a-designers-manual/: HTTPSConnectionPool(host='www.tagari-usa.com', port=443): Max retries exceeded with url: /product/permaculture-a-designers-manual/ (Caused by ResponseError('too many 500 error responses'))


Processing books:   2%|▏         | 801/34677 [3:34:45<330:44:36, 35.15s/it]

Error processing https://www.sustainablemarketfarming.com/2014/04/29/review-of-the-market-gardener-by-jean-martin-fortier/: HTTPSConnectionPool(host='www.sustainablemarketfarming.com', port=443): Max retries exceeded with url: /2014/04/29/review-of-the-market-gardener-by-jean-martin-fortier/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   2%|▏         | 810/34677 [3:37:15<153:16:30, 16.29s/it]

Batch 172 saved to /kaggle/working/batch_172.csv
Error processing https://www.seattletimes.com/pacific-nw-magazine/pro-tips-for-saving-seeds-from-your-favorite-plants-for-next-years-garden/: HTTPSConnectionPool(host='www.seattletimes.com', port=443): Max retries exceeded with url: /pacific-nw-magazine/pro-tips-for-saving-seeds-from-your-favorite-plants-for-next-years-garden/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.seattletimes.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   2%|▏         | 820/34677 [3:40:54<127:24:56, 13.55s/it]

Batch 173 saved to /kaggle/working/batch_173.csv


Processing books:   2%|▏         | 827/34677 [3:42:09<86:28:04,  9.20s/it] 

Error processing https://opac.nwic.edu/eg/opac/record/36250: HTTPSConnectionPool(host='opac.nwic.edu', port=443): Max retries exceeded with url: /eg/opac/record/36250 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')))


Processing books:   2%|▏         | 830/34677 [3:43:22<157:55:20, 16.80s/it]

Batch 174 saved to /kaggle/working/batch_174.csv


Processing books:   2%|▏         | 840/34677 [3:44:48<102:06:44, 10.86s/it]

Batch 175 saved to /kaggle/working/batch_175.csv


Processing books:   2%|▏         | 850/34677 [3:46:28<130:27:59, 13.88s/it]

Batch 176 saved to /kaggle/working/batch_176.csv


Processing books:   2%|▏         | 860/34677 [3:48:03<122:21:14, 13.03s/it]

Batch 177 saved to /kaggle/working/batch_177.csv


Processing books:   3%|▎         | 870/34677 [3:49:47<102:02:33, 10.87s/it]

Batch 178 saved to /kaggle/working/batch_178.csv


Processing books:   3%|▎         | 871/34677 [3:50:11<139:50:59, 14.89s/it]

Error processing https://www.cybermodeler.com/hobby/ref/osp/book_osp_xp02.shtml: HTTPSConnectionPool(host='www.cybermodeler.com', port=443): Max retries exceeded with url: /hobby/ref/osp/book_osp_xp02.shtml (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')))


Processing books:   3%|▎         | 880/34677 [3:52:38<130:57:04, 13.95s/it]

Batch 179 saved to /kaggle/working/batch_179.csv


Processing books:   3%|▎         | 890/34677 [3:54:14<100:00:02, 10.66s/it]

Batch 180 saved to /kaggle/working/batch_180.csv


Processing books:   3%|▎         | 899/34677 [3:56:40<149:07:55, 15.89s/it]

Error processing https://escapetotheseventies.com/70s-films/the-incredible-melting-man/: HTTPSConnectionPool(host='escapetotheseventies.com', port=443): Max retries exceeded with url: /70s-films/the-incredible-melting-man/ (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate (_ssl.c:1007)')))


Processing books:   3%|▎         | 900/34677 [3:57:37<264:25:59, 28.18s/it]

Batch 181 saved to /kaggle/working/batch_181.csv


Processing books:   3%|▎         | 910/34677 [4:01:03<185:00:25, 19.72s/it]

Batch 182 saved to /kaggle/working/batch_182.csv


Processing books:   3%|▎         | 920/34677 [4:03:49<149:41:45, 15.96s/it]

Batch 183 saved to /kaggle/working/batch_183.csv


Processing books:   3%|▎         | 923/34677 [4:04:18<102:45:26, 10.96s/it]

Error processing https://www.flipkart.com/physics-module-v-optics-modern/p/itmetpv2fuzsczvh?pid=9789352605279&lid=LSTBOK97893526052792DNWHU&marketplace=FLIPKART: HTTPSConnectionPool(host='www.flipkart.com', port=443): Max retries exceeded with url: /physics-module-v-optics-modern/p/itmetpv2fuzsczvh?pid=9789352605279&lid=LSTBOK97893526052792DNWHU&marketplace=FLIPKART (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.flipkart.com', port=443): Read timed out. (read timeout=10)"))
Error processing https://www.flipkart.com/optics-modern-physics-neet-module-v/p/itmf5rthz3hgehcm?pid=9789387432536&marketplace=FLIPKART: HTTPSConnectionPool(host='www.flipkart.com', port=443): Max retries exceeded with url: /optics-modern-physics-neet-module-v/p/itmf5rthz3hgehcm?pid=9789387432536&marketplace=FLIPKART (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.flipkart.com', port=443): Read timed out. (read timeout=10)"))
Error processing https://www.flipkart.com/physics-module-v-optics-mo

Processing books:   3%|▎         | 925/34677 [4:09:07<622:53:30, 66.44s/it]

Error processing https://travelerswife4life.com/denise-hunter-book-series/: HTTPSConnectionPool(host='travelerswife4life.com', port=443): Max retries exceeded with url: /denise-hunter-book-series/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   3%|▎         | 930/34677 [4:10:45<227:40:48, 24.29s/it]

Batch 184 saved to /kaggle/working/batch_184.csv


Processing books:   3%|▎         | 933/34677 [4:11:19<150:21:21, 16.04s/it]

Error processing https://blackmaskmagazine.com/blog/w-t-ballard-an-interview/: HTTPSConnectionPool(host='blackmaskmagazine.com', port=443): Max retries exceeded with url: /blog/w-t-ballard-an-interview/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   3%|▎         | 940/34677 [4:13:41<176:08:30, 18.80s/it]

Batch 185 saved to /kaggle/working/batch_185.csv


Processing books:   3%|▎         | 947/34677 [4:15:17<138:06:47, 14.74s/it]

Error processing http://www.robynbennis.com/the_guns_above.htm: HTTPSConnectionPool(host='robynbennis.com', port=443): Max retries exceeded with url: /2024site/my-books/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   3%|▎         | 950/34677 [4:16:21<157:13:08, 16.78s/it]

Batch 186 saved to /kaggle/working/batch_186.csv


Processing books:   3%|▎         | 960/34677 [4:17:53<78:59:10,  8.43s/it] 

Batch 187 saved to /kaggle/working/batch_187.csv


Processing books:   3%|▎         | 970/34677 [4:20:00<122:48:32, 13.12s/it]

Batch 188 saved to /kaggle/working/batch_188.csv


Processing books:   3%|▎         | 980/34677 [4:21:21<82:22:08,  8.80s/it] 

Batch 189 saved to /kaggle/working/batch_189.csv


Processing books:   3%|▎         | 990/34677 [4:22:59<121:15:24, 12.96s/it]

Batch 190 saved to /kaggle/working/batch_190.csv


Processing books:   3%|▎         | 1000/34677 [4:25:24<120:53:52, 12.92s/it]

Batch 191 saved to /kaggle/working/batch_191.csv


Processing books:   3%|▎         | 1003/34677 [4:25:53<103:17:46, 11.04s/it]

Error processing https://www.css.mhpbooks.com/About/publication/HomePages/princesha_argjiro_nga_ismail_kadare.pdf: HTTPSConnectionPool(host='www.css.mhpbooks.com', port=443): Max retries exceeded with url: /About/publication/HomePages/princesha_argjiro_nga_ismail_kadare.pdf (Caused by SSLError(SSLCertVerificationError(1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'www.css.mhpbooks.com'. (_ssl.c:1007)")))


Processing books:   3%|▎         | 1010/34677 [4:28:17<162:32:13, 17.38s/it]

Batch 192 saved to /kaggle/working/batch_192.csv


Processing books:   3%|▎         | 1014/34677 [4:29:35<185:35:40, 19.85s/it]

Error processing https://www.andotherstories.org/sworn-virgin-2/: HTTPSConnectionPool(host='www.andotherstories.org', port=443): Max retries exceeded with url: /sworn-virgin-2/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   3%|▎         | 1015/34677 [4:30:25<270:00:02, 28.88s/it]

Error processing https://www.css.mhpbooks.com/files/detail/Documents/tregtar-flamujsh-by-ernest-koliqi-daxiaore.pdf: HTTPSConnectionPool(host='www.css.mhpbooks.com', port=443): Max retries exceeded with url: /files/detail/Documents/tregtar-flamujsh-by-ernest-koliqi-daxiaore.pdf (Caused by SSLError(SSLCertVerificationError(1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'www.css.mhpbooks.com'. (_ssl.c:1007)")))


Processing books:   3%|▎         | 1020/34677 [4:32:29<204:53:49, 21.92s/it]

Batch 193 saved to /kaggle/working/batch_193.csv


Processing books:   3%|▎         | 1021/34677 [4:33:19<283:16:40, 30.30s/it]

Error processing https://albanianhistory.org/albanianliterature/authors/classical/spasse/index.html: HTTPSConnectionPool(host='albanianhistory.org', port=443): Max retries exceeded with url: /albanianliterature/authors/classical/spasse/index.html (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)')))


Processing books:   3%|▎         | 1030/34677 [4:36:40<206:08:05, 22.06s/it]

Batch 194 saved to /kaggle/working/batch_194.csv
Error processing https://fjalashqip.com/naim-frasheri-fjalet-e-qiririt/: HTTPSConnectionPool(host='fjalashqip.com', port=443): Max retries exceeded with url: /naim-frasheri-fjalet-e-qiririt/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   3%|▎         | 1040/34677 [4:40:34<138:32:00, 14.83s/it]

Batch 195 saved to /kaggle/working/batch_195.csv


Processing books:   3%|▎         | 1050/34677 [4:42:36<169:01:22, 18.10s/it]

Batch 196 saved to /kaggle/working/batch_196.csv


Processing books:   3%|▎         | 1060/34677 [4:44:16<71:57:20,  7.71s/it] 

Batch 197 saved to /kaggle/working/batch_197.csv


Processing books:   3%|▎         | 1070/34677 [4:45:34<96:39:58, 10.35s/it] 

Batch 198 saved to /kaggle/working/batch_198.csv
Error processing https://cuddlebuggery.com/blog/2011/11/18/review-liesl-and-po/: HTTPSConnectionPool(host='cuddlebuggery.com', port=443): Max retries exceeded with url: /blog/2011/11/18/review-liesl-and-po/ (Caused by ResponseError('too many 502 error responses'))


Processing books:   3%|▎         | 1080/34677 [4:47:54<115:12:57, 12.35s/it]

Batch 199 saved to /kaggle/working/batch_199.csv


Processing books:   3%|▎         | 1088/34677 [4:49:05<75:15:39,  8.07s/it] 

Error processing https://www.fullofbooks.com/alcohol-explained-by-william-porter-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /alcohol-explained-by-william-porter-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   3%|▎         | 1090/34677 [4:50:51<254:20:24, 27.26s/it]

Batch 200 saved to /kaggle/working/batch_200.csv


Processing books:   3%|▎         | 1094/34677 [4:51:17<94:11:25, 10.10s/it] 

Error processing https://www.curledup.com/dry.htm: HTTPSConnectionPool(host='www.curledup.com', port=443): Max retries exceeded with url: /dry.htm (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e961223f370>, 'Connection to www.curledup.com timed out. (connect timeout=10)'))


Processing books:   3%|▎         | 1100/34677 [4:53:30<103:00:59, 11.05s/it]

Batch 201 saved to /kaggle/working/batch_201.csv


Processing books:   3%|▎         | 1110/34677 [4:56:11<96:19:44, 10.33s/it] 

Batch 202 saved to /kaggle/working/batch_202.csv


Processing books:   3%|▎         | 1120/34677 [4:58:20<144:55:52, 15.55s/it]

Batch 203 saved to /kaggle/working/batch_203.csv
Error processing https://varianjohnson.com/books/playing-the-cards-youre-dealt/: HTTPSConnectionPool(host='varianjohnson.com', port=443): Max retries exceeded with url: /books/playing-the-cards-youre-dealt/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   3%|▎         | 1130/34677 [5:00:16<70:31:46,  7.57s/it] 

Batch 204 saved to /kaggle/working/batch_204.csv


Processing books:   3%|▎         | 1132/34677 [5:01:10<161:30:27, 17.33s/it]

Error processing https://www.fullofbooks.com/flow-down-like-silver-hypatia-of-alexandria-by-ki-longfellow-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /flow-down-like-silver-hypatia-of-alexandria-by-ki-longfellow-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   3%|▎         | 1140/34677 [5:03:37<102:28:13, 11.00s/it]

Batch 205 saved to /kaggle/working/batch_205.csv


Processing books:   3%|▎         | 1150/34677 [5:05:55<144:08:38, 15.48s/it]

Batch 206 saved to /kaggle/working/batch_206.csv


Processing books:   3%|▎         | 1160/34677 [5:07:19<70:21:02,  7.56s/it] 

Batch 207 saved to /kaggle/working/batch_207.csv


Processing books:   3%|▎         | 1170/34677 [5:09:02<115:12:50, 12.38s/it]

Batch 208 saved to /kaggle/working/batch_208.csv


Processing books:   3%|▎         | 1171/34677 [5:09:10<103:21:47, 11.11s/it]

Error processing https://thefanzine.com/2666-by-roberto-bolano-a-review/: HTTPSConnectionPool(host='thefanzine.com', port=443): Max retries exceeded with url: /2666-by-roberto-bolano-a-review/ (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate (_ssl.c:1007)')))


Processing books:   3%|▎         | 1174/34677 [5:10:43<198:00:48, 21.28s/it]

Error processing https://www.fullofbooks.com/roadside-picnic-by-arkady-strugatsky-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /roadside-picnic-by-arkady-strugatsky-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   3%|▎         | 1180/34677 [5:13:05<131:20:20, 14.12s/it]

Batch 209 saved to /kaggle/working/batch_209.csv


Processing books:   3%|▎         | 1190/34677 [5:15:35<165:03:47, 17.75s/it]

Batch 210 saved to /kaggle/working/batch_210.csv


Processing books:   3%|▎         | 1196/34677 [5:17:03<138:28:53, 14.89s/it]

Error processing https://sci-books.com/visual-group-theory-maa-problem-book-series-1st-edition-088385757x/: HTTPSConnectionPool(host='sci-books.com', port=443): Max retries exceeded with url: /visual-group-theory-maa-problem-book-series-1st-edition-088385757x/ (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7e961223e8f0>: Failed to establish a new connection: [Errno 113] No route to host'))


Processing books:   3%|▎         | 1200/34677 [5:18:44<195:02:17, 20.97s/it]

Batch 211 saved to /kaggle/working/batch_211.csv


Processing books:   3%|▎         | 1210/34677 [5:20:41<94:47:00, 10.20s/it] 

Batch 212 saved to /kaggle/working/batch_212.csv


Processing books:   4%|▎         | 1220/34677 [5:22:05<104:48:58, 11.28s/it]

Batch 213 saved to /kaggle/working/batch_213.csv


Processing books:   4%|▎         | 1230/34677 [5:24:06<116:04:48, 12.49s/it]

Batch 214 saved to /kaggle/working/batch_214.csv


Processing books:   4%|▎         | 1237/34677 [5:25:32<121:14:01, 13.05s/it]

Error processing https://www.beeartless.com/blog/2019/book-reviews/the-fall-albert-camus/: HTTPSConnectionPool(host='www.beeartless.com', port=443): Max retries exceeded with url: /blog/2019/book-reviews/the-fall-albert-camus/ (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7e9611168820>: Failed to resolve 'www.beeartless.com' ([Errno -2] Name or service not known)"))


Processing books:   4%|▎         | 1239/34677 [5:26:48<221:12:09, 23.82s/it]

Error processing https://www.curledup.com/swallows.htm: HTTPSConnectionPool(host='www.curledup.com', port=443): Max retries exceeded with url: /swallows.htm (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e9613f18ca0>, 'Connection to www.curledup.com timed out. (connect timeout=10)'))


Processing books:   4%|▎         | 1240/34677 [5:28:43<475:48:54, 51.23s/it]

Batch 215 saved to /kaggle/working/batch_215.csv
Error processing https://www.kirkus.us-east-1.elasticbeanstalk.com/book-reviews/boualem-sansal/the-german-mujahid/: HTTPSConnectionPool(host='www.kirkus.us-east-1.elasticbeanstalk.com', port=443): Max retries exceeded with url: /book-reviews/boualem-sansal/the-german-mujahid/ (Caused by SSLError(SSLCertVerificationError(1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'www.kirkus.us-east-1.elasticbeanstalk.com'. (_ssl.c:1007)")))


Processing books:   4%|▎         | 1241/34677 [5:29:25<450:27:17, 48.50s/it]

Error processing https://www.curledup.com/lastsmr.htm: HTTPSConnectionPool(host='www.curledup.com', port=443): Max retries exceeded with url: /lastsmr.htm (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e96127674f0>, 'Connection to www.curledup.com timed out. (connect timeout=10)'))


Processing books:   4%|▎         | 1250/34677 [5:33:19<144:39:40, 15.58s/it]

Batch 216 saved to /kaggle/working/batch_216.csv


Processing books:   4%|▎         | 1259/34677 [5:35:08<105:04:01, 11.32s/it]

Error processing https://coalhillreview.com/book-review-the-sheltering-sky-by-paul-bowles/: HTTPSConnectionPool(host='coalhillreview.com', port=443): Max retries exceeded with url: /book-review-the-sheltering-sky-by-paul-bowles/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   4%|▎         | 1260/34677 [5:36:18<267:19:44, 28.80s/it]

Batch 217 saved to /kaggle/working/batch_217.csv


Processing books:   4%|▎         | 1270/34677 [5:38:32<124:33:19, 13.42s/it]

Batch 218 saved to /kaggle/working/batch_218.csv


Processing books:   4%|▎         | 1276/34677 [5:39:52<121:21:40, 13.08s/it]

Error processing https://babzman.com/entretien-avec-samir-toumi-auteur-de-alger-le-cri/: HTTPSConnectionPool(host='babzman.com', port=443): Max retries exceeded with url: /entretien-avec-samir-toumi-auteur-de-alger-le-cri/ (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7e9613f03cd0>: Failed to resolve 'babzman.com' ([Errno -2] Name or service not known)"))


Processing books:   4%|▎         | 1280/34677 [5:42:24<220:10:02, 23.73s/it]

Batch 219 saved to /kaggle/working/batch_219.csv


Processing books:   4%|▎         | 1290/34677 [5:45:19<135:24:47, 14.60s/it]

Batch 220 saved to /kaggle/working/batch_220.csv


Processing books:   4%|▎         | 1300/34677 [5:46:45<60:46:33,  6.56s/it] 

Batch 221 saved to /kaggle/working/batch_221.csv


Processing books:   4%|▍         | 1310/34677 [5:49:29<150:15:03, 16.21s/it]

Batch 222 saved to /kaggle/working/batch_222.csv


Processing books:   4%|▍         | 1320/34677 [5:51:23<106:25:01, 11.48s/it]

Batch 223 saved to /kaggle/working/batch_223.csv


Processing books:   4%|▍         | 1330/34677 [5:53:08<106:32:49, 11.50s/it]

Batch 224 saved to /kaggle/working/batch_224.csv


Processing books:   4%|▍         | 1340/34677 [5:55:18<87:24:57,  9.44s/it] 

Batch 225 saved to /kaggle/working/batch_225.csv


Processing books:   4%|▍         | 1342/34677 [5:55:42<90:11:10,  9.74s/it] 

Error processing https://booksandblurbs.com/ice-planet-barbarians-by-ruby-dixon-a-book-review/: HTTPSConnectionPool(host='booksandblurbs.com', port=443): Max retries exceeded with url: /ice-planet-barbarians-by-ruby-dixon-a-book-review/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   4%|▍         | 1350/34677 [5:58:07<84:29:42,  9.13s/it] 

Batch 226 saved to /kaggle/working/batch_226.csv


Processing books:   4%|▍         | 1360/34677 [5:59:54<85:56:30,  9.29s/it] 

Batch 227 saved to /kaggle/working/batch_227.csv


Processing books:   4%|▍         | 1370/34677 [6:01:19<87:40:40,  9.48s/it]

Batch 228 saved to /kaggle/working/batch_228.csv


Processing books:   4%|▍         | 1371/34677 [6:01:27<83:50:46,  9.06s/it]

Error processing https://theghastlygrimoire.com/2019/01/31/book-review-the-last-hour-of-gann-by-r-lee-smith/: HTTPSConnectionPool(host='theghastlygrimoire.com', port=443): Max retries exceeded with url: /2019/01/31/book-review-the-last-hour-of-gann-by-r-lee-smith/ (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7e9610dbe6b0>: Failed to resolve 'theghastlygrimoire.com' ([Errno -2] Name or service not known)"))


Processing books:   4%|▍         | 1380/34677 [6:03:40<115:24:31, 12.48s/it]

Batch 229 saved to /kaggle/working/batch_229.csv


Processing books:   4%|▍         | 1390/34677 [6:05:19<84:01:38,  9.09s/it] 

Batch 230 saved to /kaggle/working/batch_230.csv


Processing books:   4%|▍         | 1400/34677 [6:07:14<81:46:50,  8.85s/it] 

Batch 231 saved to /kaggle/working/batch_231.csv


Processing books:   4%|▍         | 1410/34677 [6:09:02<98:18:54, 10.64s/it] 

Batch 232 saved to /kaggle/working/batch_232.csv


Processing books:   4%|▍         | 1415/34677 [6:09:56<91:01:12,  9.85s/it] 

Error processing https://smstirling.com/books/island-in-the-sea-of-time/: HTTPSConnectionPool(host='smstirling.com', port=443): Max retries exceeded with url: /books/island-in-the-sea-of-time/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   4%|▍         | 1420/34677 [6:11:06<94:55:35, 10.28s/it] 

Batch 233 saved to /kaggle/working/batch_233.csv


Processing books:   4%|▍         | 1423/34677 [6:11:25<69:31:09,  7.53s/it]

Error processing http://lovevampires.com/knannodracula.html: HTTPConnectionPool(host='lovevampires.com', port=80): Max retries exceeded with url: /knannodracula.html (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing https://www.fullofbooks.com/anno-dracula-by-kim-newman-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /anno-dracula-by-kim-newman-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   4%|▍         | 1430/34677 [6:14:16<116:01:16, 12.56s/it]

Batch 234 saved to /kaggle/working/batch_234.csv


Processing books:   4%|▍         | 1440/34677 [6:16:24<167:54:21, 18.19s/it]

Batch 235 saved to /kaggle/working/batch_235.csv


Processing books:   4%|▍         | 1446/34677 [6:17:24<98:50:52, 10.71s/it] 

Error processing https://josephineangelini.com/books/trial-by-fire/: HTTPSConnectionPool(host='josephineangelini.com', port=443): Max retries exceeded with url: /books/trial-by-fire/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing https://josephineangelini.com/international-books/trial-by-fire/: HTTPSConnectionPool(host='josephineangelini.com', port=443): Max retries exceeded with url: /international-books/trial-by-fire/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   4%|▍         | 1450/34677 [6:19:29<189:31:57, 20.54s/it]

Batch 236 saved to /kaggle/working/batch_236.csv


Processing books:   4%|▍         | 1455/34677 [6:20:30<139:30:52, 15.12s/it]

Error processing https://www.iqrasense.com/islamic-scholars/ibn-al-qayyim-al-jawziyya-book-al-tibb-al-nabawi.html: HTTPSConnectionPool(host='www.iqrasense.com', port=443): Max retries exceeded with url: /islamic-scholars/ibn-al-qayyim-al-jawziyya-book-al-tibb-al-nabawi.html (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   4%|▍         | 1459/34677 [6:21:42<150:08:40, 16.27s/it]

Error processing https://teachservices.com/back-to-eden-mass-market-kloss-jethro-paperback/: HTTPSConnectionPool(host='teachservices.com', port=443): Max retries exceeded with url: /back-to-eden-mass-market-kloss-jethro-paperback/ (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')))


Processing books:   4%|▍         | 1460/34677 [6:22:30<238:59:14, 25.90s/it]

Batch 237 saved to /kaggle/working/batch_237.csv


Processing books:   4%|▍         | 1464/34677 [6:23:14<137:07:06, 14.86s/it]

Error processing https://archive.org/details/earthingmostimpo0000ober: HTTPSConnectionPool(host='archive.org', port=443): Read timed out.


Processing books:   4%|▍         | 1470/34677 [6:24:12<86:47:46,  9.41s/it] 

Batch 238 saved to /kaggle/working/batch_238.csv


Processing books:   4%|▍         | 1480/34677 [6:25:23<55:26:58,  6.01s/it]

Batch 239 saved to /kaggle/working/batch_239.csv


Processing books:   4%|▍         | 1486/34677 [6:26:18<75:27:07,  8.18s/it]

Error processing https://www.curledup.com/oxygen.htm: HTTPSConnectionPool(host='www.curledup.com', port=443): Max retries exceeded with url: /oxygen.htm (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e961144d150>, 'Connection to www.curledup.com timed out. (connect timeout=10)'))


Processing books:   4%|▍         | 1490/34677 [6:28:37<185:51:00, 20.16s/it]

Batch 240 saved to /kaggle/working/batch_240.csv


Processing books:   4%|▍         | 1500/34677 [6:30:22<62:55:59,  6.83s/it] 

Batch 241 saved to /kaggle/working/batch_241.csv


Processing books:   4%|▍         | 1510/34677 [6:32:17<101:31:55, 11.02s/it]

Batch 242 saved to /kaggle/working/batch_242.csv


Processing books:   4%|▍         | 1520/34677 [6:34:00<93:02:05, 10.10s/it] 

Batch 243 saved to /kaggle/working/batch_243.csv


Processing books:   4%|▍         | 1530/34677 [6:35:48<106:19:47, 11.55s/it]

Batch 244 saved to /kaggle/working/batch_244.csv


Processing books:   4%|▍         | 1540/34677 [6:37:13<99:14:07, 10.78s/it] 

Batch 245 saved to /kaggle/working/batch_245.csv


Processing books:   4%|▍         | 1547/34677 [6:39:04<139:10:56, 15.12s/it]

Error processing https://candicemillard.com/river-of-doubt.html: HTTPSConnectionPool(host='candicemillard.com', port=443): Max retries exceeded with url: /river-of-doubt.html (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')))


Processing books:   4%|▍         | 1550/34677 [6:40:04<151:37:47, 16.48s/it]

Batch 246 saved to /kaggle/working/batch_246.csv


Processing books:   4%|▍         | 1555/34677 [6:40:37<70:12:37,  7.63s/it] 

Error processing https://www.fullofbooks.com/the-mind-readers-by-lori-brighton-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /the-mind-readers-by-lori-brighton-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   4%|▍         | 1560/34677 [6:43:19<175:49:52, 19.11s/it]

Batch 247 saved to /kaggle/working/batch_247.csv


Processing books:   5%|▍         | 1562/34677 [6:43:43<143:50:24, 15.64s/it]

Error processing https://meghanmarch.com/book-series/dirty-billionaire-trilogy/: HTTPSConnectionPool(host='meghanmarch.com', port=443): Max retries exceeded with url: /book-series/dirty-billionaire-trilogy/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing https://meghanmarch.com/book/dirty-billionaire/: HTTPSConnectionPool(host='meghanmarch.com', port=443): Max retries exceeded with url: /book/dirty-billionaire/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   5%|▍         | 1563/34677 [6:44:52<289:44:31, 31.50s/it]

Error processing https://iscreambookblog.com/review/lost-in-me-lexi-ryan/: HTTPSConnectionPool(host='iscreambookblog.com', port=443): Max retries exceeded with url: /review/lost-in-me-lexi-ryan/ (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7e9613ed2680>: Failed to resolve 'iscreambookblog.com' ([Errno -2] Name or service not known)"))


Processing books:   5%|▍         | 1570/34677 [6:47:21<213:57:38, 23.27s/it]

Batch 248 saved to /kaggle/working/batch_248.csv


Processing books:   5%|▍         | 1580/34677 [6:50:25<136:08:25, 14.81s/it]

Batch 249 saved to /kaggle/working/batch_249.csv


Processing books:   5%|▍         | 1590/34677 [6:53:00<172:41:01, 18.79s/it]

Batch 250 saved to /kaggle/working/batch_250.csv


Processing books:   5%|▍         | 1593/34677 [6:53:42<151:27:07, 16.48s/it]

Error processing https://giselleleeb.com/2022/10/the-little-ghost-forthcoming-nightjar-press/: HTTPSConnectionPool(host='giselleleeb.com', port=443): Max retries exceeded with url: /2022/10/the-little-ghost-forthcoming-nightjar-press/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing https://giselleleeb.com/publications-and-prizes-2/: HTTPSConnectionPool(host='giselleleeb.com', port=443): Max retries exceeded with url: /publications-and-prizes-2/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing https://giselleleeb.com/reviews/: HTTPSConnectionPool(host='giselleleeb.com', port=443): Max retries exceeded with url: /reviews/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   5%|▍         | 1595/34677 [6:55:57<356:21:02, 38.78s/it]

Error processing https://www.more2read.com/review/not-a-speck-of-light-by-laird-barron/: HTTPSConnectionPool(host='www.more2read.com', port=443): Max retries exceeded with url: /review/not-a-speck-of-light-by-laird-barron/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   5%|▍         | 1599/34677 [6:57:19<206:04:19, 22.43s/it]

Error processing https://www.more2read.com/review/uncertainties-volume-v-edited-by-brian-j-showers/: HTTPSConnectionPool(host='www.more2read.com', port=443): Max retries exceeded with url: /review/uncertainties-volume-v-edited-by-brian-j-showers/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   5%|▍         | 1600/34677 [6:58:08<278:56:46, 30.36s/it]

Batch 251 saved to /kaggle/working/batch_251.csv


Processing books:   5%|▍         | 1610/34677 [7:00:27<159:07:15, 17.32s/it]

Batch 252 saved to /kaggle/working/batch_252.csv


Processing books:   5%|▍         | 1620/34677 [7:03:08<114:07:39, 12.43s/it]

Batch 253 saved to /kaggle/working/batch_253.csv


Processing books:   5%|▍         | 1630/34677 [7:06:06<159:31:30, 17.38s/it]

Batch 254 saved to /kaggle/working/batch_254.csv


Processing books:   5%|▍         | 1640/34677 [7:07:50<102:48:32, 11.20s/it]

Batch 255 saved to /kaggle/working/batch_255.csv


Processing books:   5%|▍         | 1650/34677 [7:09:34<91:33:05,  9.98s/it] 

Batch 256 saved to /kaggle/working/batch_256.csv


Processing books:   5%|▍         | 1660/34677 [7:11:20<93:55:27, 10.24s/it] 

Batch 257 saved to /kaggle/working/batch_257.csv


Processing books:   5%|▍         | 1668/34677 [7:13:42<157:35:23, 17.19s/it]

Error processing https://www.timelinesmagazine.com/book_reviews/the-march/article_b7c9fee4-1e17-11eb-8d55-1b80c4a8f739.html: HTTPSConnectionPool(host='www.timelinesmagazine.com', port=443): Max retries exceeded with url: /book_reviews/the-march/article_b7c9fee4-1e17-11eb-8d55-1b80c4a8f739.html (Caused by ResponseError('too many 429 error responses'))


Processing books:   5%|▍         | 1670/34677 [7:14:47<207:40:14, 22.65s/it]

Batch 258 saved to /kaggle/working/batch_258.csv


Processing books:   5%|▍         | 1680/34677 [7:16:40<89:11:10,  9.73s/it] 

Batch 259 saved to /kaggle/working/batch_259.csv


Processing books:   5%|▍         | 1690/34677 [7:19:07<123:20:08, 13.46s/it]

Batch 260 saved to /kaggle/working/batch_260.csv


Processing books:   5%|▍         | 1700/34677 [7:21:46<176:00:50, 19.21s/it]

Batch 261 saved to /kaggle/working/batch_261.csv


Processing books:   5%|▍         | 1709/34677 [7:24:09<111:37:54, 12.19s/it]

Error processing https://www.curledup.com/pastoral.htm: HTTPSConnectionPool(host='www.curledup.com', port=443): Max retries exceeded with url: /pastoral.htm (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e9610f85030>, 'Connection to www.curledup.com timed out. (connect timeout=10)'))


Processing books:   5%|▍         | 1710/34677 [7:26:04<393:18:41, 42.95s/it]

Batch 262 saved to /kaggle/working/batch_262.csv


Processing books:   5%|▍         | 1719/34677 [7:29:17<180:49:42, 19.75s/it]

Error processing https://www.nationalbook.org/books/killers-of-the-flower-moon-the-osage-murders-and-the-birth-of-the-fbi/: HTTPSConnectionPool(host='www.nationalbook.org', port=443): Max retries exceeded with url: /books/killers-of-the-flower-moon-the-osage-murders-and-the-birth-of-the-fbi/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   5%|▍         | 1720/34677 [7:29:57<237:17:52, 25.92s/it]

Batch 263 saved to /kaggle/working/batch_263.csv


Processing books:   5%|▍         | 1721/34677 [7:30:23<238:23:06, 26.04s/it]

Error processing https://candicemillard.com/destiny-of-the-republic.html: HTTPSConnectionPool(host='candicemillard.com', port=443): Max retries exceeded with url: /destiny-of-the-republic.html (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')))


Processing books:   5%|▍         | 1725/34677 [7:31:57<214:21:43, 23.42s/it]

Error processing https://www.nationalbook.org/books/the-worst-hard-time-the-untold-story-of-those-who-survived-the-great-american-dust-bowl/: HTTPSConnectionPool(host='www.nationalbook.org', port=443): Max retries exceeded with url: /books/the-worst-hard-time-the-untold-story-of-those-who-survived-the-great-american-dust-bowl/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   5%|▍         | 1727/34677 [7:32:42<192:11:20, 21.00s/it]

Error processing https://s1.papyruspub.com/files/demos/products/ebooks/novels/historical/Preview-Unbroken-A-World-War-II-Story-by-Laura-Hillenbrand.pdf: HTTPSConnectionPool(host='s1.papyruspub.com', port=443): Max retries exceeded with url: /files/demos/products/ebooks/novels/historical/Preview-Unbroken-A-World-War-II-Story-by-Laura-Hillenbrand.pdf (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e9610f38fa0>, 'Connection to s1.papyruspub.com timed out. (connect timeout=10)'))
Error processing https://www.atlanticlibrary.org/unbroken: HTTPSConnectionPool(host='www.atlanticlibrary.org', port=443): Max retries exceeded with url: /unbroken (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)')))


Processing books:   5%|▍         | 1730/34677 [7:35:30<331:45:23, 36.25s/it]

Batch 264 saved to /kaggle/working/batch_264.csv


Processing books:   5%|▌         | 1738/34677 [7:37:32<207:01:15, 22.63s/it]

Error processing https://www.nationalbook.org/books/in-the-heart-of-the-sea-the-tragedy-of-the-whaleship-essex/: HTTPSConnectionPool(host='www.nationalbook.org', port=443): Max retries exceeded with url: /books/in-the-heart-of-the-sea-the-tragedy-of-the-whaleship-essex/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   5%|▌         | 1740/34677 [7:38:19<196:56:59, 21.53s/it]

Batch 265 saved to /kaggle/working/batch_265.csv


Processing books:   5%|▌         | 1750/34677 [7:40:27<106:44:20, 11.67s/it]

Batch 266 saved to /kaggle/working/batch_266.csv


Processing books:   5%|▌         | 1760/34677 [7:42:36<94:16:41, 10.31s/it] 

Batch 267 saved to /kaggle/working/batch_267.csv


Processing books:   5%|▌         | 1770/34677 [7:44:38<99:14:06, 10.86s/it] 

Batch 268 saved to /kaggle/working/batch_268.csv


Processing books:   5%|▌         | 1780/34677 [7:46:39<103:18:11, 11.30s/it]

Batch 269 saved to /kaggle/working/batch_269.csv


Processing books:   5%|▌         | 1790/34677 [7:47:56<74:19:11,  8.14s/it] 

Batch 270 saved to /kaggle/working/batch_270.csv


Processing books:   5%|▌         | 1795/34677 [7:49:12<135:57:11, 14.88s/it]

Error processing https://www.bewareofthereader.com/a-girl-called-samson-by-amy-harmon/: HTTPSConnectionPool(host='www.bewareofthereader.com', port=443): Max retries exceeded with url: /a-girl-called-samson-by-amy-harmon/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   5%|▌         | 1800/34677 [7:50:41<120:51:18, 13.23s/it]

Batch 271 saved to /kaggle/working/batch_271.csv


Processing books:   5%|▌         | 1803/34677 [7:51:21<119:17:42, 13.06s/it]

Error processing https://pbpfinc.org/battle-of-brandywine/: HTTPSConnectionPool(host='pbpfinc.org', port=443): Max retries exceeded with url: /battle-of-brandywine/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   5%|▌         | 1810/34677 [7:53:14<99:36:43, 10.91s/it] 

Batch 272 saved to /kaggle/working/batch_272.csv


Processing books:   5%|▌         | 1820/34677 [7:55:21<110:20:32, 12.09s/it]

Batch 273 saved to /kaggle/working/batch_273.csv


Processing books:   5%|▌         | 1830/34677 [7:57:02<99:25:24, 10.90s/it] 

Batch 274 saved to /kaggle/working/batch_274.csv


Processing books:   5%|▌         | 1840/34677 [7:58:14<61:25:34,  6.73s/it] 

Batch 275 saved to /kaggle/working/batch_275.csv


Processing books:   5%|▌         | 1841/34677 [7:58:29<84:51:04,  9.30s/it]

Error processing https://suzannewoodsfisher.com/book/the-choice/: HTTPSConnectionPool(host='suzannewoodsfisher.com', port=443): Max retries exceeded with url: /book/the-choice/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   5%|▌         | 1849/34677 [8:00:05<84:21:35,  9.25s/it] 

Error processing https://amyclipston.com/books/a-gift-of-grace/: HTTPSConnectionPool(host='amyclipston.com', port=443): Max retries exceeded with url: /books/a-gift-of-grace/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   5%|▌         | 1850/34677 [8:00:45<169:32:16, 18.59s/it]

Batch 276 saved to /kaggle/working/batch_276.csv


Processing books:   5%|▌         | 1860/34677 [8:02:20<100:28:21, 11.02s/it]

Batch 277 saved to /kaggle/working/batch_277.csv


Processing books:   5%|▌         | 1867/34677 [8:03:52<107:48:38, 11.83s/it]

Error processing https://suzannewoodsfisher.com/book/the-keeper/: HTTPSConnectionPool(host='suzannewoodsfisher.com', port=443): Max retries exceeded with url: /book/the-keeper/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   5%|▌         | 1870/34677 [8:04:59<146:49:59, 16.11s/it]

Batch 278 saved to /kaggle/working/batch_278.csv


Processing books:   5%|▌         | 1880/34677 [8:06:58<127:34:56, 14.00s/it]

Batch 279 saved to /kaggle/working/batch_279.csv


Processing books:   5%|▌         | 1882/34677 [8:07:23<119:12:03, 13.09s/it]

Error processing https://kathleenfuller.com/book/an-amish-christmas/: HTTPSConnectionPool(host='kathleenfuller.com', port=443): Max retries exceeded with url: /book/an-amish-christmas/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   5%|▌         | 1890/34677 [8:10:11<160:36:41, 17.64s/it]

Batch 280 saved to /kaggle/working/batch_280.csv
Error processing https://suzannewoodsfisher.com/book/the-waiting/: HTTPSConnectionPool(host='suzannewoodsfisher.com', port=443): Max retries exceeded with url: /book/the-waiting/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   5%|▌         | 1900/34677 [8:13:19<150:52:11, 16.57s/it]

Batch 281 saved to /kaggle/working/batch_281.csv


Processing books:   5%|▌         | 1901/34677 [8:13:28<128:46:39, 14.14s/it]

Error processing https://amyclipston.com/books/a-simple-prayer/: HTTPSConnectionPool(host='amyclipston.com', port=443): Max retries exceeded with url: /books/a-simple-prayer/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 1910/34677 [8:16:07<159:53:47, 17.57s/it]

Batch 282 saved to /kaggle/working/batch_282.csv
Error processing https://suzannewoodsfisher.com/book/christmas-at-rose-hill-farm/: HTTPSConnectionPool(host='suzannewoodsfisher.com', port=443): Max retries exceeded with url: /book/christmas-at-rose-hill-farm/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 1912/34677 [8:16:59<186:01:16, 20.44s/it]

Error processing https://suzannewoodsfisher.com/book/the-revealing/: HTTPSConnectionPool(host='suzannewoodsfisher.com', port=443): Max retries exceeded with url: /book/the-revealing/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 1920/34677 [8:19:18<129:55:28, 14.28s/it]

Batch 283 saved to /kaggle/working/batch_283.csv


Processing books:   6%|▌         | 1924/34677 [8:20:15<124:13:03, 13.65s/it]

Error processing https://suzannewoodsfisher.com/book/the-calling/: HTTPSConnectionPool(host='suzannewoodsfisher.com', port=443): Max retries exceeded with url: /book/the-calling/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 1930/34677 [8:22:09<148:46:10, 16.35s/it]

Batch 284 saved to /kaggle/working/batch_284.csv


Processing books:   6%|▌         | 1934/34677 [8:22:56<120:33:22, 13.25s/it]

Error processing https://triciagoyer.com/books/a-christmas-gift-for-rose/: HTTPSConnectionPool(host='triciagoyer.com', port=443): Max retries exceeded with url: /books/a-christmas-gift-for-rose/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 1938/34677 [8:24:28<140:55:59, 15.50s/it]

Error processing https://suzannewoodsfisher.com/book/the-letters/: HTTPSConnectionPool(host='suzannewoodsfisher.com', port=443): Max retries exceeded with url: /book/the-letters/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 1940/34677 [8:25:28<201:54:52, 22.20s/it]

Batch 285 saved to /kaggle/working/batch_285.csv


Processing books:   6%|▌         | 1941/34677 [8:25:42<177:56:46, 19.57s/it]

Error processing https://triciagoyer.com/books/the-promise-box-seven-brides-for-seven-bachelors-book-2/: HTTPSConnectionPool(host='triciagoyer.com', port=443): Max retries exceeded with url: /books/the-promise-box-seven-brides-for-seven-bachelors-book-2/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing https://triciagoyer.com/series/seven-brides-for-seven-bachelors-series/: HTTPSConnectionPool(host='triciagoyer.com', port=443): Max retries exceeded with url: /series/seven-brides-for-seven-bachelors-series/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 1950/34677 [8:28:58<142:54:45, 15.72s/it]

Batch 286 saved to /kaggle/working/batch_286.csv


Processing books:   6%|▌         | 1956/34677 [8:30:24<149:51:39, 16.49s/it]

Error processing https://www.fullofbooks.com/lysistrata-by-aristophanes-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /lysistrata-by-aristophanes-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   6%|▌         | 1960/34677 [8:32:40<196:23:56, 21.61s/it]

Batch 287 saved to /kaggle/working/batch_287.csv


Processing books:   6%|▌         | 1970/34677 [8:34:51<166:33:04, 18.33s/it]

Batch 288 saved to /kaggle/working/batch_288.csv


Processing books:   6%|▌         | 1980/34677 [8:36:04<86:14:13,  9.49s/it] 

Batch 289 saved to /kaggle/working/batch_289.csv


Processing books:   6%|▌         | 1990/34677 [8:37:36<85:40:16,  9.44s/it] 

Batch 290 saved to /kaggle/working/batch_290.csv


Processing books:   6%|▌         | 2000/34677 [8:39:18<89:24:40,  9.85s/it] 

Batch 291 saved to /kaggle/working/batch_291.csv


Processing books:   6%|▌         | 2010/34677 [8:40:50<71:26:50,  7.87s/it] 

Batch 292 saved to /kaggle/working/batch_292.csv


Processing books:   6%|▌         | 2020/34677 [8:43:08<149:24:13, 16.47s/it]

Batch 293 saved to /kaggle/working/batch_293.csv


Processing books:   6%|▌         | 2030/34677 [8:44:47<88:40:48,  9.78s/it] 

Batch 294 saved to /kaggle/working/batch_294.csv


Processing books:   6%|▌         | 2034/34677 [8:45:17<72:09:13,  7.96s/it]

Error processing http://www.lovevampires.com/lsnightshift.html: HTTPConnectionPool(host='www.lovevampires.com', port=80): Max retries exceeded with url: /lsnightshift.html (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2040/34677 [8:46:36<79:02:08,  8.72s/it] 

Batch 295 saved to /kaggle/working/batch_295.csv


Processing books:   6%|▌         | 2043/34677 [8:47:21<119:40:23, 13.20s/it]

Error processing https://thebibliophage.com/book-review-affinity-by-sarah-waters/: HTTPSConnectionPool(host='thebibliophage.com', port=443): Max retries exceeded with url: /book-review-affinity-by-sarah-waters/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing https://www.fullofbooks.com/affinity-by-sarah-waters-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /affinity-by-sarah-waters-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   6%|▌         | 2050/34677 [8:50:50<160:10:40, 17.67s/it]

Batch 296 saved to /kaggle/working/batch_296.csv


Processing books:   6%|▌         | 2051/34677 [8:51:05<153:54:03, 16.98s/it]

Error processing https://www.curledup.com/propered.htm: HTTPSConnectionPool(host='www.curledup.com', port=443): Max retries exceeded with url: /propered.htm (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e96131bc7f0>, 'Connection to www.curledup.com timed out. (connect timeout=10)'))


Processing books:   6%|▌         | 2060/34677 [8:54:28<127:44:01, 14.10s/it]

Batch 297 saved to /kaggle/working/batch_297.csv
Error processing https://drcoghlan.com/blog/tipping-the-velvet/: HTTPSConnectionPool(host='drcoghlan.com', port=443): Max retries exceeded with url: /blog/tipping-the-velvet/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2065/34677 [8:55:46<101:08:27, 11.16s/it]

Error processing https://safsortedinfo.com/review-unearthly-unearthly-1-by-cynthia-hand/: HTTPSConnectionPool(host='safsortedinfo.com', port=443): Max retries exceeded with url: /review-unearthly-unearthly-1-by-cynthia-hand/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2067/34677 [8:56:45<167:50:59, 18.53s/it]

Error processing https://nalinisingh.com/books/guild-hunter-series/angels-blood/: HTTPSConnectionPool(host='nalinisingh.com', port=443): Max retries exceeded with url: /books/guild-hunter-series/angels-blood/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2070/34677 [8:58:02<186:55:03, 20.64s/it]

Batch 298 saved to /kaggle/working/batch_298.csv


Processing books:   6%|▌         | 2072/34677 [8:58:45<186:09:15, 20.55s/it]

Error processing https://www.iheartreading.net/2012/08/21/book-review-hallowed-unearthly-2-by-cynthia-hand/: HTTPSConnectionPool(host='www.iheartreading.net', port=443): Max retries exceeded with url: /2012/08/21/book-review-hallowed-unearthly-2-by-cynthia-hand/ (Caused by SSLError(SSLCertVerificationError(1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'www.iheartreading.net'. (_ssl.c:1007)")))


Processing books:   6%|▌         | 2075/34677 [9:00:07<212:53:59, 23.51s/it]

Error processing https://nalinisingh.com/books/guild-hunter-series/archangels-kiss/: HTTPSConnectionPool(host='nalinisingh.com', port=443): Max retries exceeded with url: /books/guild-hunter-series/archangels-kiss/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2080/34677 [9:01:50<173:35:21, 19.17s/it]

Batch 299 saved to /kaggle/working/batch_299.csv
Error processing https://nalinisingh.com/books/guild-hunter-series/archangels-consort/: HTTPSConnectionPool(host='nalinisingh.com', port=443): Max retries exceeded with url: /books/guild-hunter-series/archangels-consort/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2082/34677 [9:02:34<172:22:32, 19.04s/it]

Error processing https://s1.papyruspub.com/files/demos/products/ebooks/novels/fantasy/Cassandra-Clare/The-Mortal-Instruments/Preview-City-of-Heavenly-Fire-by-Cassandra-Clare.pdf: HTTPSConnectionPool(host='s1.papyruspub.com', port=443): Max retries exceeded with url: /files/demos/products/ebooks/novels/fantasy/Cassandra-Clare/The-Mortal-Instruments/Preview-City-of-Heavenly-Fire-by-Cassandra-Clare.pdf (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e9612a2b550>, 'Connection to s1.papyruspub.com timed out. (connect timeout=10)'))


Processing books:   6%|▌         | 2085/34677 [9:04:51<279:51:09, 30.91s/it]

Error processing https://www.leeweatherly.com/books/angel/: HTTPSConnectionPool(host='www.leeweatherly.com', port=443): Max retries exceeded with url: /books/angel/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2086/34677 [9:05:38<321:32:18, 35.52s/it]

Error processing https://nalinisingh.com/books/guild-hunter-series/archangels-blade/: HTTPSConnectionPool(host='nalinisingh.com', port=443): Max retries exceeded with url: /books/guild-hunter-series/archangels-blade/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2087/34677 [9:06:14<323:22:01, 35.72s/it]

Error processing https://nalinisingh.com/books/guild-hunter-series/archangels-storm/: HTTPSConnectionPool(host='nalinisingh.com', port=443): Max retries exceeded with url: /books/guild-hunter-series/archangels-storm/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2089/34677 [9:07:00<251:49:12, 27.82s/it]

Error processing https://nalinisingh.com/books/guild-hunter-series/archangels-legion/: HTTPSConnectionPool(host='nalinisingh.com', port=443): Max retries exceeded with url: /books/guild-hunter-series/archangels-legion/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2090/34677 [9:07:35<271:13:13, 29.96s/it]

Batch 300 saved to /kaggle/working/batch_300.csv


Processing books:   6%|▌         | 2091/34677 [9:07:45<217:38:06, 24.04s/it]

Error processing https://genashowalter.com/books/wicked-nights/: HTTPSConnectionPool(host='genashowalter.com', port=443): Max retries exceeded with url: /books/wicked-nights/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2095/34677 [9:08:46<133:40:08, 14.77s/it]

Error processing https://nalinisingh.com/books/guild-hunter-series/archangels-shadows/: HTTPSConnectionPool(host='nalinisingh.com', port=443): Max retries exceeded with url: /books/guild-hunter-series/archangels-shadows/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2096/34677 [9:09:33<220:38:14, 24.38s/it]

Error processing https://www.fullofbooks.com/fallen-in-love-by-lauren-kate-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /fallen-in-love-by-lauren-kate-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   6%|▌         | 2097/34677 [9:11:13<427:15:32, 47.21s/it]

Error processing https://nalinisingh.com/books/guild-hunter-series/archangels-enigma/: HTTPSConnectionPool(host='nalinisingh.com', port=443): Max retries exceeded with url: /books/guild-hunter-series/archangels-enigma/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2100/34677 [9:12:09<238:29:38, 26.36s/it]

Batch 301 saved to /kaggle/working/batch_301.csv
Error processing https://nalinisingh.com/books/guild-hunter-series/archangels-heart/: HTTPSConnectionPool(host='nalinisingh.com', port=443): Max retries exceeded with url: /books/guild-hunter-series/archangels-heart/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2103/34677 [9:12:58<158:37:22, 17.53s/it]

Error processing https://www.fullofbooks.com/immortal-city-by-scott-speer-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /immortal-city-by-scott-speer-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   6%|▌         | 2110/34677 [9:15:50<126:48:22, 14.02s/it]

Batch 302 saved to /kaggle/working/batch_302.csv


Processing books:   6%|▌         | 2120/34677 [9:17:34<82:12:58,  9.09s/it] 

Batch 303 saved to /kaggle/working/batch_303.csv


Processing books:   6%|▌         | 2121/34677 [9:17:35<60:33:25,  6.70s/it]

Error processing https://www.geeksundergrace.com/books/classic-review-grendel-1971/: HTTPSConnectionPool(host='www.geeksundergrace.com', port=443): Max retries exceeded with url: /books/classic-review-grendel-1971/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▌         | 2130/34677 [9:19:33<66:54:37,  7.40s/it] 

Batch 304 saved to /kaggle/working/batch_304.csv


Processing books:   6%|▌         | 2140/34677 [9:21:36<87:19:30,  9.66s/it] 

Batch 305 saved to /kaggle/working/batch_305.csv


Processing books:   6%|▌         | 2145/34677 [9:22:25<94:29:09, 10.46s/it]

Error processing https://archive.org/details/mayombe00pepe: HTTPSConnectionPool(host='archive.org', port=443): Read timed out.


Processing books:   6%|▌         | 2146/34677 [9:22:56<150:11:39, 16.62s/it]

Error processing https://www.rarebookcellar.com/pages/books/126090/paul-theroux/the-last-train-to-zona-verde-my-ultimate-african-safari: HTTPSConnectionPool(host='www.rarebookcellar.com', port=443): Max retries exceeded with url: /pages/books/126090/paul-theroux/the-last-train-to-zona-verde-my-ultimate-african-safari (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e96129db0a0>, 'Connection to www.rarebookcellar.com timed out. (connect timeout=10)'))


Processing books:   6%|▌         | 2150/34677 [9:25:33<221:52:40, 24.56s/it]

Batch 306 saved to /kaggle/working/batch_306.csv


Processing books:   6%|▌         | 2160/34677 [9:27:11<117:51:22, 13.05s/it]

Batch 307 saved to /kaggle/working/batch_307.csv


Processing books:   6%|▋         | 2170/34677 [9:28:58<82:59:09,  9.19s/it] 

Batch 308 saved to /kaggle/working/batch_308.csv


Processing books:   6%|▋         | 2180/34677 [9:30:47<103:52:25, 11.51s/it]

Batch 309 saved to /kaggle/working/batch_309.csv


Processing books:   6%|▋         | 2190/34677 [9:33:16<203:57:23, 22.60s/it]

Batch 310 saved to /kaggle/working/batch_310.csv


Processing books:   6%|▋         | 2200/34677 [9:35:11<93:38:54, 10.38s/it] 

Batch 311 saved to /kaggle/working/batch_311.csv


Processing books:   6%|▋         | 2210/34677 [9:37:06<92:31:07, 10.26s/it] 

Batch 312 saved to /kaggle/working/batch_312.csv


Processing books:   6%|▋         | 2214/34677 [9:37:22<48:59:03,  5.43s/it]

Error processing https://www.target.com/p/firestar-s-quest-warriors-super-edition-by-erin-hunter/-/A-82685158: HTTPSConnectionPool(host='www.target.com', port=443): Max retries exceeded with url: /p/firestar-s-quest-warriors-super-edition-by-erin-hunter/-/A-82685158 (Caused by ResponseError('too many 429 error responses'))


Processing books:   6%|▋         | 2220/34677 [9:38:58<116:42:26, 12.94s/it]

Batch 313 saved to /kaggle/working/batch_313.csv


Processing books:   6%|▋         | 2224/34677 [9:39:24<70:20:40,  7.80s/it] 

Error processing https://reviews.metaphorosis.com/review/the-plague-dogs-richard-adams/: HTTPSConnectionPool(host='reviews.metaphorosis.com', port=443): Max retries exceeded with url: /review/the-plague-dogs-richard-adams/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   6%|▋         | 2225/34677 [9:40:16<189:08:08, 20.98s/it]

Error processing https://www.target.com/p/bluestar-s-prophecy-warriors-super-edition-hardcover-by-erin-hunter/-/A-11397577: HTTPSConnectionPool(host='www.target.com', port=443): Max retries exceeded with url: /p/bluestar-s-prophecy-warriors-super-edition-hardcover-by-erin-hunter/-/A-11397577 (Caused by ResponseError('too many 429 error responses'))
Error processing https://www.target.com/p/warriors-super-edition-bluestar-s-prophecy-by-erin-hunter-paperback/-/A-88896347: HTTPSConnectionPool(host='www.target.com', port=443): Max retries exceeded with url: /p/warriors-super-edition-bluestar-s-prophecy-by-erin-hunter-paperback/-/A-88896347 (Caused by ResponseError('too many 429 error responses'))


Processing books:   6%|▋         | 2230/34677 [9:42:20<171:57:00, 19.08s/it]

Batch 314 saved to /kaggle/working/batch_314.csv


Processing books:   6%|▋         | 2240/34677 [9:44:40<143:09:28, 15.89s/it]

Batch 315 saved to /kaggle/working/batch_315.csv


Processing books:   6%|▋         | 2250/34677 [9:46:34<104:59:30, 11.66s/it]

Batch 316 saved to /kaggle/working/batch_316.csv


Processing books:   7%|▋         | 2260/34677 [9:48:28<88:51:09,  9.87s/it] 

Batch 317 saved to /kaggle/working/batch_317.csv


Processing books:   7%|▋         | 2270/34677 [9:49:54<75:54:24,  8.43s/it] 

Batch 318 saved to /kaggle/working/batch_318.csv


Processing books:   7%|▋         | 2276/34677 [9:50:23<35:53:16,  3.99s/it]

Error processing https://www.fullofbooks.com/inuyasha-vol-1-turning-back-time-by-rumiko-takahashi-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /inuyasha-vol-1-turning-back-time-by-rumiko-takahashi-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   7%|▋         | 2280/34677 [9:52:21<140:14:24, 15.58s/it]

Batch 319 saved to /kaggle/working/batch_319.csv


Processing books:   7%|▋         | 2290/34677 [9:53:38<60:07:33,  6.68s/it] 

Batch 320 saved to /kaggle/working/batch_320.csv


Processing books:   7%|▋         | 2293/34677 [9:54:05<71:00:10,  7.89s/it]

Error processing https://legals.clevelandbanner.com/results/Resources/Documents/Fruits-Basket-Double-Volume-5-6.pdf: HTTPSConnectionPool(host='legals.clevelandbanner.com', port=443): Max retries exceeded with url: /results/Resources/Documents/Fruits-Basket-Double-Volume-5-6.pdf (Caused by SSLError(SSLCertVerificationError(1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'legals.clevelandbanner.com'. (_ssl.c:1007)")))


Processing books:   7%|▋         | 2300/34677 [9:56:16<119:03:44, 13.24s/it]

Batch 321 saved to /kaggle/working/batch_321.csv


Processing books:   7%|▋         | 2310/34677 [9:57:42<77:59:19,  8.67s/it] 

Batch 322 saved to /kaggle/working/batch_322.csv


Processing books:   7%|▋         | 2320/34677 [9:59:46<101:09:00, 11.25s/it]

Batch 323 saved to /kaggle/working/batch_323.csv


Processing books:   7%|▋         | 2329/34677 [10:02:10<120:38:38, 13.43s/it]

Error processing https://www.lovevampires.com/varstrangebrew.html: HTTPSConnectionPool(host='www.lovevampires.com', port=443): Max retries exceeded with url: /varstrangebrew.html (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   7%|▋         | 2330/34677 [10:02:55<205:42:42, 22.89s/it]

Batch 324 saved to /kaggle/working/batch_324.csv


Processing books:   7%|▋         | 2338/34677 [10:04:34<118:05:34, 13.15s/it]

Error processing http://www.lovevampires.com/varmanybloodyreturns.html: HTTPConnectionPool(host='www.lovevampires.com', port=80): Max retries exceeded with url: /varmanybloodyreturns.html (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   7%|▋         | 2339/34677 [10:05:19<204:41:34, 22.79s/it]

Error processing https://www.fullofbooks.com/blood-lite-by-kevin-j-anderson-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /blood-lite-by-kevin-j-anderson-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))
Error processing http://lovevampires.com/kjabloodlite.html: HTTPConnectionPool(host='lovevampires.com', port=80): Max retries exceeded with url: /kjabloodlite.html (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   7%|▋         | 2340/34677 [10:07:38<519:16:36, 57.81s/it]

Batch 325 saved to /kaggle/working/batch_325.csv


Processing books:   7%|▋         | 2341/34677 [10:07:53<403:14:29, 44.89s/it]

Error processing https://www.yabookscentral.com/prom-nights-from-hell/: HTTPSConnectionPool(host='www.yabookscentral.com', port=443): Max retries exceeded with url: /prom-nights-from-hell/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.yabookscentral.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   7%|▋         | 2350/34677 [10:11:46<229:17:35, 25.53s/it]

Batch 326 saved to /kaggle/working/batch_326.csv
Error processing https://www.fullofbooks.com/let-it-snow-by-john-green-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /let-it-snow-by-john-green-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   7%|▋         | 2360/34677 [10:15:34<131:47:15, 14.68s/it]

Batch 327 saved to /kaggle/working/batch_327.csv


Processing books:   7%|▋         | 2370/34677 [10:16:50<94:57:35, 10.58s/it]

Batch 328 saved to /kaggle/working/batch_328.csv


Processing books:   7%|▋         | 2380/34677 [10:19:18<137:10:10, 15.29s/it]

Batch 329 saved to /kaggle/working/batch_329.csv


Processing books:   7%|▋         | 2390/34677 [10:21:00<87:08:29,  9.72s/it] 

Batch 330 saved to /kaggle/working/batch_330.csv


Processing books:   7%|▋         | 2400/34677 [10:22:38<78:50:48,  8.79s/it] 

Batch 331 saved to /kaggle/working/batch_331.csv


Processing books:   7%|▋         | 2410/34677 [10:24:40<94:45:23, 10.57s/it] 

Batch 332 saved to /kaggle/working/batch_332.csv
Error processing https://beforewegoblog.com/review-the-builders-by-daniel-polansky/: HTTPSConnectionPool(host='beforewegoblog.com', port=443): Max retries exceeded with url: /review-the-builders-by-daniel-polansky/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   7%|▋         | 2420/34677 [10:27:17<133:06:01, 14.85s/it]

Batch 333 saved to /kaggle/working/batch_333.csv


Processing books:   7%|▋         | 2422/34677 [10:27:42<126:58:46, 14.17s/it]

Error processing https://www.yabookscentral.com/ashes/: HTTPSConnectionPool(host='www.yabookscentral.com', port=443): Max retries exceeded with url: /ashes/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.yabookscentral.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   7%|▋         | 2430/34677 [10:30:55<129:59:02, 14.51s/it]

Batch 334 saved to /kaggle/working/batch_334.csv


Processing books:   7%|▋         | 2440/34677 [10:32:43<84:57:22,  9.49s/it] 

Batch 335 saved to /kaggle/working/batch_335.csv


Processing books:   7%|▋         | 2450/34677 [10:34:51<90:14:51, 10.08s/it] 

Batch 336 saved to /kaggle/working/batch_336.csv


Processing books:   7%|▋         | 2453/34677 [10:35:28<110:21:14, 12.33s/it]

Error processing http://staging.register.girlscoutsgcnwi.org/textbooks/publication/download/american_idyll_academic_antielitism_as_cultural_critique.pdf: HTTPConnectionPool(host='staging.register.girlscoutsgcnwi.org', port=80): Max retries exceeded with url: /textbooks/publication/download/american_idyll_academic_antielitism_as_cultural_critique.pdf (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7e9613227730>, 'Connection to staging.register.girlscoutsgcnwi.org timed out. (connect timeout=10)'))


Processing books:   7%|▋         | 2460/34677 [10:38:13<121:39:20, 13.59s/it]

Batch 337 saved to /kaggle/working/batch_337.csv


Processing books:   7%|▋         | 2470/34677 [10:40:43<137:20:34, 15.35s/it]

Batch 338 saved to /kaggle/working/batch_338.csv


Processing books:   7%|▋         | 2471/34677 [10:41:01<145:43:39, 16.29s/it]

Error processing https://archive.org/details/fatalinventionho0000robe: HTTPSConnectionPool(host='archive.org', port=443): Read timed out.


Processing books:   7%|▋         | 2480/34677 [10:43:18<131:09:02, 14.66s/it]

Batch 339 saved to /kaggle/working/batch_339.csv


Processing books:   7%|▋         | 2490/34677 [10:45:02<80:59:35,  9.06s/it] 

Batch 340 saved to /kaggle/working/batch_340.csv


Processing books:   7%|▋         | 2500/34677 [10:47:02<88:01:56,  9.85s/it] 

Batch 341 saved to /kaggle/working/batch_341.csv


Processing books:   7%|▋         | 2505/34677 [10:47:56<87:12:07,  9.76s/it] 

Error processing http://www.thebookshop.ubookstore.com/book/9781943056057: HTTPSConnectionPool(host='www.thebookshop.ubookstore.com', port=443): Max retries exceeded with url: /book/9781943056057 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.thebookshop.ubookstore.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   7%|▋         | 2510/34677 [10:50:32<164:03:03, 18.36s/it]

Batch 342 saved to /kaggle/working/batch_342.csv


Processing books:   7%|▋         | 2518/34677 [10:51:49<88:38:38,  9.92s/it] 

Error processing https://apiv1.booko.com.au/9781980846604/Hybrid-Humans-Scientific-Evidence-of-Our-800-000-Year-Old-Alien-Legacy: HTTPSConnectionPool(host='apiv1.booko.com.au', port=443): Max retries exceeded with url: /9781980846604/Hybrid-Humans-Scientific-Evidence-of-Our-800-000-Year-Old-Alien-Legacy (Caused by SSLError(SSLCertVerificationError(1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'apiv1.booko.com.au'. (_ssl.c:1007)")))


Processing books:   7%|▋         | 2520/34677 [10:52:58<185:40:46, 20.79s/it]

Batch 343 saved to /kaggle/working/batch_343.csv


Processing books:   7%|▋         | 2530/34677 [10:55:13<128:02:44, 14.34s/it]

Batch 344 saved to /kaggle/working/batch_344.csv


Processing books:   7%|▋         | 2538/34677 [10:57:16<143:41:02, 16.09s/it]

Error processing https://www.welshofharpersferry.com/library/the-maps-of-antietam-an-atlas-of-the-antietam-sharpsburg-campaign-including-the-battle-of-south-mountain-september-2-20-1862-savas-beatie-military-atlas/: HTTPSConnectionPool(host='www.welshofharpersferry.com', port=443): Max retries exceeded with url: /library/the-maps-of-antietam-an-atlas-of-the-antietam-sharpsburg-campaign-including-the-battle-of-south-mountain-september-2-20-1862-savas-beatie-military-atlas/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   7%|▋         | 2540/34677 [10:58:18<200:38:39, 22.48s/it]

Batch 345 saved to /kaggle/working/batch_345.csv


Processing books:   7%|▋         | 2544/34677 [10:59:21<154:49:31, 17.35s/it]

Error processing https://archive.org/details/shepherdstownlas0000mcgr: HTTPSConnectionPool(host='archive.org', port=443): Read timed out.
Error processing https://encyclopediavirginia.org/entries/shepherdstown-battle-of/: HTTPSConnectionPool(host='encyclopediavirginia.org', port=443): Max retries exceeded with url: /entries/shepherdstown-battle-of/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing https://www.welshofharpersferry.com/library/shepherdstown-last-clash-of-the-antietam-campaign-september-19-20-1862/: HTTPSConnectionPool(host='www.welshofharpersferry.com', port=443): Max retries exceeded with url: /library/shepherdstown-last-clash-of-the-antietam-campaign-september-19-20-1862/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing https://www.wvencyclopedia.org/articles/2403: HTTPSConnectionPool(host='www.wvencyc

Processing books:   7%|▋         | 2546/34677 [11:01:42<349:43:27, 39.18s/it]

Error processing https://shop.americasnationalparks.org/product/125123/A-Guide-to-the-Antietam-Farmsteads/: HTTPSConnectionPool(host='shop.americasnationalparks.org', port=443): Max retries exceeded with url: /product/125123/A-Guide-to-the-Antietam-Farmsteads/ (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')))


Processing books:   7%|▋         | 2550/34677 [11:03:12<207:26:58, 23.25s/it]

Batch 346 saved to /kaggle/working/batch_346.csv


Processing books:   7%|▋         | 2551/34677 [11:03:23<174:24:03, 19.54s/it]

Error processing https://www.press.jhu.edu/books/title/10509/antietam-creek: HTTPSConnectionPool(host='www.press.jhu.edu', port=443): Max retries exceeded with url: /books/title/10509/antietam-creek (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1007)')))


Processing books:   7%|▋         | 2560/34677 [11:05:30<93:42:08, 10.50s/it] 

Batch 347 saved to /kaggle/working/batch_347.csv
Error processing https://kathyreichs.com/bones-of-the-lost/: HTTPSConnectionPool(host='kathyreichs.com', port=443): Max retries exceeded with url: /bones-of-the-lost/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   7%|▋         | 2570/34677 [11:08:30<96:01:10, 10.77s/it] 

Batch 348 saved to /kaggle/working/batch_348.csv


Processing books:   7%|▋         | 2578/34677 [11:10:05<115:29:14, 12.95s/it]

Error processing https://www.fullofbooks.com/the-last-of-the-wine-by-mary-renault-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /the-last-of-the-wine-by-mary-renault-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   7%|▋         | 2580/34677 [11:12:05<286:40:17, 32.15s/it]

Batch 349 saved to /kaggle/working/batch_349.csv


Processing books:   7%|▋         | 2584/34677 [11:12:45<134:38:12, 15.10s/it]

Error processing https://fathomjournal.org/book-review-jews-dont-count/: HTTPSConnectionPool(host='fathomjournal.org', port=443): Max retries exceeded with url: /book-review-jews-dont-count/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='fathomjournal.org', port=443): Read timed out. (read timeout=10)"))


Processing books:   7%|▋         | 2589/34677 [11:15:30<184:24:21, 20.69s/it]

Error processing https://www.fullofbooks.com/the-fixer-by-bernard-malamud-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /the-fixer-by-bernard-malamud-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))
Error processing https://www.curledup.com/thefixer.htm: HTTPSConnectionPool(host='www.curledup.com', port=443): Max retries exceeded with url: /thefixer.htm (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7e9613224e50>, 'Connection to www.curledup.com timed out. (connect timeout=10)'))


Processing books:   7%|▋         | 2590/34677 [11:18:58<684:50:26, 76.84s/it]

Batch 350 saved to /kaggle/working/batch_350.csv


Processing books:   7%|▋         | 2598/34677 [11:21:55<244:54:40, 27.48s/it]

Error processing https://www.fullofbooks.com/the-complete-maus-by-art-spiegelman-review/: HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Max retries exceeded with url: /the-complete-maus-by-art-spiegelman-review/ (Caused by ReadTimeoutError("HTTPSConnectionPool(host='www.fullofbooks.com', port=443): Read timed out. (read timeout=10)"))


Processing books:   7%|▋         | 2600/34677 [11:25:31<564:59:26, 63.41s/it]

Batch 351 saved to /kaggle/working/batch_351.csv


Processing books:   8%|▊         | 2610/34677 [11:27:40<129:21:39, 14.52s/it]

Batch 352 saved to /kaggle/working/batch_352.csv


Processing books:   8%|▊         | 2613/34677 [11:28:40<168:08:24, 18.88s/it]

Error processing https://jewishjournal.com/culture/arts/books/363009/jewish-space-lasers-and-the-history-of-antisemitic-conspiracy-theories/: HTTPSConnectionPool(host='jewishjournal.com', port=443): Max retries exceeded with url: /culture/arts/books/363009/jewish-space-lasers-and-the-history-of-antisemitic-conspiracy-theories/ (Caused by ResponseError('too many 502 error responses'))
Error processing https://w.warwicks.com/book/9781685890643: HTTPSConnectionPool(host='w.warwicks.com', port=443): Max retries exceeded with url: /book/9781685890643 (Caused by SSLError(SSLCertVerificationError(1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'w.warwicks.com'. (_ssl.c:1007)")))


Processing books:   8%|▊         | 2614/34677 [11:30:05<346:04:27, 38.86s/it]

Error processing https://jewishjournal.com/cover_story/367513/the-dei-dilemma/: HTTPSConnectionPool(host='jewishjournal.com', port=443): Max retries exceeded with url: /cover_story/367513/the-dei-dilemma/ (Caused by ResponseError('too many 502 error responses'))


Processing books:   8%|▊         | 2616/34677 [11:31:06<286:19:49, 32.15s/it]

Error processing https://www.valeriebiel.com/blog/review-assignment-and-interview-author-liza-wiemer: HTTPSConnectionPool(host='www.valeriebiel.com', port=443): Max retries exceeded with url: /blog/review-assignment-and-interview-author-liza-wiemer (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing https://diversebooks.org/qa-with-liza-weimer-the-assignment/: HTTPSConnectionPool(host='diversebooks.org', port=443): Max retries exceeded with url: /qa-with-liza-weimer-the-assignment/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))
Error processing http://m.booktable.net/book/9780593123164: HTTPConnectionPool(host='m.booktable.net', port=80): Max retries exceeded with url: /book/9780593123164 (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7e9610b07580>: Failed to resolve 'm.booktable.net' ([Errno -2] Name o

Processing books:   8%|▊         | 2620/34677 [11:33:43<259:04:40, 29.09s/it]

Batch 353 saved to /kaggle/working/batch_353.csv


Processing books:   8%|▊         | 2625/34677 [11:35:32<193:30:44, 21.73s/it]

Error processing https://www.css.mhpbooks.com/public/scholarship/Documents/the_culture_of_critique_an_evolutionary_analysis_of_jewish_involvement_in_twentieth_century_intellectual_and_political_movements.pdf: HTTPSConnectionPool(host='www.css.mhpbooks.com', port=443): Max retries exceeded with url: /public/scholarship/Documents/the_culture_of_critique_an_evolutionary_analysis_of_jewish_involvement_in_twentieth_century_intellectual_and_political_movements.pdf (Caused by SSLError(SSLCertVerificationError(1, "[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'www.css.mhpbooks.com'. (_ssl.c:1007)")))
Error processing https://reports.wurkhouse.com/textbooks/uploaded-files/HomePages/The_Culture_Of_Critique_An_Evolutionary_Analysis_Of_Jewish_Involvement_In_Twentieth_Century_Intellectual_And_Political_Movements.pdf: HTTPSConnectionPool(host='reports.wurkhouse.com', port=443): Max retries exceeded with url: /textbooks/uploaded-files/Home

Processing books:   8%|▊         | 2630/34677 [11:38:47<193:49:55, 21.77s/it]

Batch 354 saved to /kaggle/working/batch_354.csv


Processing books:   8%|▊         | 2640/34677 [11:40:46<119:56:42, 13.48s/it]

Batch 355 saved to /kaggle/working/batch_355.csv


Processing books:   8%|▊         | 2643/34677 [11:41:12<94:44:17, 10.65s/it] 

Error processing https://www.thestar.com/entertainment/books/the-dog-stars-by-peter-heller-review/article_b8f1bd40-f778-530d-9af7-f0308aed6ca9.html: HTTPSConnectionPool(host='www.thestar.com', port=443): Max retries exceeded with url: /entertainment/books/the-dog-stars-by-peter-heller-review/article_b8f1bd40-f778-530d-9af7-f0308aed6ca9.html (Caused by ResponseError('too many 429 error responses'))


Processing books:   8%|▊         | 2645/34677 [11:42:14<170:31:50, 19.17s/it]

Error processing https://starcrossedbookblog.com/2015/05/book-review-ashfall-ashfall-1-by-mike-mullin/: HTTPSConnectionPool(host='starcrossedbookblog.com', port=443): Read timed out.


Processing books:   8%|▊         | 2648/34677 [11:43:35<194:12:40, 21.83s/it]

Error processing https://www.more2read.com/review/severance-by-ling-ma/: HTTPSConnectionPool(host='www.more2read.com', port=443): Max retries exceeded with url: /review/severance-by-ling-ma/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   8%|▊         | 2649/34677 [11:44:23<265:41:28, 29.86s/it]

Error processing https://www.briansbookblog.com/feed-mira-grant/: HTTPSConnectionPool(host='www.briansbookblog.com', port=443): Max retries exceeded with url: /feed-mira-grant/ (Caused by ProtocolError('Connection aborted.', RemoteDisconnected('Remote end closed connection without response')))


Processing books:   8%|▊         | 2650/34677 [11:45:04<294:32:17, 33.11s/it]

Batch 356 saved to /kaggle/working/batch_356.csv


Processing books:   8%|▊         | 2660/34677 [11:47:24<125:10:57, 14.08s/it]

Batch 357 saved to /kaggle/working/batch_357.csv


Processing books:   8%|▊         | 2670/34677 [11:49:10<98:50:08, 11.12s/it] 

Batch 358 saved to /kaggle/working/batch_358.csv


Processing books:   8%|▊         | 2671/34677 [11:49:27<115:09:35, 12.95s/it]

Error processing https://archive.org/details/appleconfidentia0000linz: HTTPSConnectionPool(host='archive.org', port=443): Read timed out.


Processing books:   8%|▊         | 2680/34677 [11:51:27<146:31:01, 16.48s/it]

Batch 359 saved to /kaggle/working/batch_359.csv


Processing books:   8%|▊         | 2690/34677 [11:53:35<124:23:27, 14.00s/it]

Batch 360 saved to /kaggle/working/batch_360.csv


Processing books:   8%|▊         | 2700/34677 [11:55:20<100:27:14, 11.31s/it]

Batch 361 saved to /kaggle/working/batch_361.csv


Processing books:   8%|▊         | 2707/34677 [11:56:46<92:27:28, 10.41s/it] 

In [None]:
print("test")

In [None]:
print("test2")