# Read epub file using UnstructuredEPubLoader and apply Underwoods list of ocr corrections

In [None]:
#! pip install pypandoc

In [9]:
print ('import libraries')
# import libraries
from langchain_community.document_loaders import UnstructuredEPubLoader
from langchain.document_loaders import PyPDFLoader
import pandas as pd
import re
import os
import csv
import json
from tqdm import tqdm  # Import tqdm for progress tracking
import time

# Save start time
print ('start time')
start_time = time.time()
#######################

# Move to epub folder
os.chdir(r'.\literature epub')
# load text from epub
epub_file = '1860 A Residence in Jutland the Danish Isles and Copenhagen vol II.epub'
book_title = epub_file[:-5]
print (f'Start processing "{book_title}".\nWait a few seconds.')
loader = UnstructuredEPubLoader(epub_file, mode="elements")
epub_pages = loader.load()

# Extract data from epub and send it to a dataframe
content = []
source = []
element_no = []


# add counter to element number
count = 1 
for i in epub_pages:
       
    # return the text content, source, and element no.
    pageContent = i.page_content
    pageSource = i.metadata['source'][:-5]
    elementNo = count
    
    content.append(pageContent)
    source.append(pageSource)
    element_no.append(elementNo)

    count += 1
    
df = pd.DataFrame({'content': content, 'source': source, 'element_no': element_no})

######################
print ('Preprocess text')
# preprocess text
def preproces_text(text_string):
    # Store text data in variable
    # Remove newlines, replace apostrophe s with s, remove quotation marks
    text_string_replace = text_string.replace(r' \xad\n', '').replace(r'\xad\n', '') \
        .replace(r'\xad', '').replace(r'\xad ', '') \
        .replace(r'-\n', '').replace('—', ' ').replace(r'\n', ' ') \
        .replace("'s", "s").replace('"', ' ').replace("'", " ") \
        .replace('.', ' . ').replace(' .', ' . ').replace(',', ' ,') \
        .replace('!', ' !').replace('?', ' ?')

    # scrub for multiple signs
    # but keep full stops, commas, exclamation, and question marks
    text_string_re = re.sub(r':|;||||\||\+|\"|‘|’|“|”|\’|…|\-|–|—|\$|&|\*|>|<|\/||||»|«', '', text_string_replace)

    # Remove numbers and words containing numbers
    text_string_no_numbers = re.sub(r'\b\w*\d\w*\b', ' ', text_string_re)
    # Remove multiple whitespaces
    text_string_sub_additional_white_space = re.sub(r'\s+', ' ', text_string_no_numbers )

    return text_string_sub_additional_white_space

df['preprocessed_content'] = df['content'].apply(lambda x : preproces_text(x))

################

# go back to main folder
os.chdir('..')


print('Starting ocr correction - loading correction rules')
# Read the correction rules from the file
with open('Underwoods_CorrectionRules.txt', 'r', encoding='utf-8') as f:
    CorrectionRules_string = f.read()

# Process the correction rules
CorrectionRules_list = CorrectionRules_string.split('\n')
list_of_CorrectionRules_list = [i.split('\t') for i in CorrectionRules_list]
better_list_of_CorrectionRules = [i[0:2] for i in list_of_CorrectionRules_list]

# Add additional pairs
observations_to_add = [["Bruffels", 'Brussels'], ["fix", 'six'], ['Elsinore', 'Helsingør']]
better_list_of_CorrectionRules.extend(observations_to_add)

# Build correction dictionary
correction_dict = dict(better_list_of_CorrectionRules)



print('Ocr correction')

def ocr_post_correction(text):
    # Prepare for correction
    ### Keep track of the conditions of the words (capital, title, upper,lower)
    tokens = text.split()

    # Track the original case of each word
    original_case = [i for i in tokens]
    clean_text_list = [i.lower() for i in tokens]

    # Initialize the list to store corrected words and a dictionary to track corrections
    new_word_list = []
    corrections_tracker = {}

    # Iterate through the clean_text_list word list
    for index, word in enumerate(clean_text_list):
        # Check if the word has a correction in the dictionary
        if word in correction_dict:
            # If it does, append the corrected word to the new list and track the correction
            corrected_word = correction_dict[word]
            # Restore the original case of the word
            if original_case[index].istitle():
                corrected_word = corrected_word.capitalize()
            elif original_case[index].isupper():
                corrected_word = corrected_word.upper()
            new_word_list.append(corrected_word)
            corrections_tracker[word] = corrected_word

            # Extract context around the corrected word
            start_index = max(0, index - 10)
            end_index = min(len(clean_text_list), index + 11)
            context = clean_text_list[start_index:end_index]

            # Store the context with the correction
            corrections_tracker[word] = {
                'corrected_word': corrected_word,
                'context': ' '.join(context)
            }
        else:
            # If it doesn't, append the original word
            new_word_list.append(original_case[index])

    # Send data back to text
    new_corrected_text = ' '.join(new_word_list)

    return corrections_tracker, new_corrected_text

# Use the function and make two new columns to add to the dataframe
df['corrections_tracker'], df['ocr_corrected_text'] = zip(*tqdm(df['preprocessed_content'].apply(ocr_post_correction), desc="Applying OCR post-correction", colour='yellow'))

print ('Saving data')
# Save the df as a csv file
os.chdir(r'.\literature csv')
df.to_csv(f'{book_title}.csv', index=False)


# Save txt versions
os.chdir('..')
os.chdir(r'.\literature txt')

# Save text version without spell correction
text = ' '.join(df['preprocessed_content'].to_list())
with open(f'{book_title} without ocr correction.txt', 'w', encoding='utf-8') as f:
    f.write(text)

# Save text version with spell correction
# Join the corrected words back into a string
ocr_corrected_text = ' '.join(df['ocr_corrected_text'].to_list())
with open(f'{book_title} with ocr correction.txt', 'w', encoding='utf-8') as f:
    f.write(ocr_corrected_text)

########
# Save ocr corrections
print ('Save ocr corrections')
os.chdir('..')
os.chdir(r'.\ocr_correction_tracker')
obs_corrections_tracker = []
for i,j in enumerate(df['corrections_tracker'].to_list()):
    if j:
        observation = f"Index no.: {str(i)}. OCR observation: {str(j)}"
        observation = observation.replace('{', '').replace('}', '')
        obs_corrections_tracker.append(observation)

ocr_observations = '\n\n'.join(obs_corrections_tracker)

with open(f'{book_title} ocr corrections.txt', 'w', encoding='utf-8') as f:
    f.write(ocr_observations)

##############

print ('Script done.')

# change to main folder
os.chdir('..')

# Record the end time
end_time = time.time()

# Calculate the elapsed time
elapsed_time = end_time - start_time

# Print the elapsed time
print(f"Time taken to run the script: {elapsed_time:.6f} seconds")

import libraries
start time
Start processing "1860 A Residence in Jutland the Danish Isles and Copenhagen vol II".
Wait a few seconds.
Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1314/1314 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 5.438003 seconds





In [9]:
#! pip install --upgrade langchain_community unstructured pypandoc

In [None]:
# Interate a folder

In [None]:
print ('import libraries')
# import libraries
from langchain_community.document_loaders import UnstructuredEPubLoader
from langchain.document_loaders import PyPDFLoader
import pandas as pd
import re
import os
import csv
import json
from tqdm import tqdm  # Import tqdm for progress tracking
import time

In [29]:
path_to_folder = r'C:\Users\lakj\Documents\GitHub\nordic travel literature\literature epub'
no_of_files = len(os.listdir(path_to_folder))
files = os.listdir(path_to_folder)

In [30]:
def input_is_folder(epub_file):
    # Save start time
    print ('start time')
    start_time = time.time()
    #######################
    
    # Move to epub folder
    os.chdir(r'.\literature epub')
    # load text from epub
    #epub_file = '1853 Visit to Iceland And the Scandinavian North.epub'
    book_title = epub_file[:-5]
    print (f'Start processing "{book_title}".\nWait a few seconds.')
    loader = UnstructuredEPubLoader(epub_file, mode="elements")
    epub_pages = loader.load()
    
    # Extract data from epub and send it to a dataframe
    content = []
    source = []
    element_no = []
    
    
    # add counter to element number
    count = 1 
    for i in epub_pages:
           
        # return the text content, source, and element no.
        pageContent = i.page_content
        pageSource = i.metadata['source'][:-5]
        elementNo = count
        
        content.append(pageContent)
        source.append(pageSource)
        element_no.append(elementNo)
    
        count += 1
        
    df = pd.DataFrame({'content': content, 'source': source, 'element_no': element_no})
    
    ######################
    print ('Preprocess text')
    # preprocess text
    def preproces_text(text_string):
        # Store text data in variable
        # Remove newlines, replace apostrophe s with s, remove quotation marks
        text_string_replace = text_string.replace(r' \xad\n', '').replace(r'\xad\n', '') \
            .replace(r'\xad', '').replace(r'\xad ', '') \
            .replace(r'-\n', '').replace('—', ' ').replace(r'\n', ' ') \
            .replace("'s", "s").replace('"', ' ').replace("'", " ") \
            .replace('.', ' . ').replace(' .', ' . ').replace(',', ' ,') \
            .replace('!', ' !').replace('?', ' ?')
    
        # scrub for multiple signs
        # but keep full stops, commas, exclamation, and question marks
        text_string_re = re.sub(r':|;||||\||\+|\"|‘|’|“|”|\’|…|\-|–|—|\$|&|\*|>|<|\/||||»|«', '', text_string_replace)
    
        # Remove numbers and words containing numbers
        # text_string_no_numbers = re.sub(r'\b\w*\d\w*\b', '', text_string_re)
        # Remove multiple whitespaces
        text_string_sub_additional_white_space = re.sub(r'\s+', ' ', text_string_re)
    
        return text_string_sub_additional_white_space
    
    df['preprocessed_content'] = df['content'].apply(lambda x : preproces_text(x))
    
    ################
    
    # go back to main folder
    os.chdir('..')
    
    
    print('Starting ocr correction - loading correction rules')
    # Read the correction rules from the file
    with open('Underwoods_CorrectionRules.txt', 'r', encoding='utf-8') as f:
        CorrectionRules_string = f.read()
    
    # Process the correction rules
    CorrectionRules_list = CorrectionRules_string.split('\n')
    list_of_CorrectionRules_list = [i.split('\t') for i in CorrectionRules_list]
    better_list_of_CorrectionRules = [i[0:2] for i in list_of_CorrectionRules_list]
    
    # Add additional pairs
    observations_to_add = [["Bruffels", 'Brussels'], ["fix", 'six'], ['Elsinore', 'Helsingør']]
    better_list_of_CorrectionRules.extend(observations_to_add)
    
    # Build correction dictionary
    correction_dict = dict(better_list_of_CorrectionRules)
    
    
    
    print('Ocr correction')
    
    def ocr_post_correction(text):
        # Prepare for correction
        ### Keep track of the conditions of the words (capital, title, upper,lower)
        tokens = text.split()
    
        # Track the original case of each word
        original_case = [i for i in tokens]
        clean_text_list = [i.lower() for i in tokens]
    
        # Initialize the list to store corrected words and a dictionary to track corrections
        new_word_list = []
        corrections_tracker = {}
    
        # Iterate through the clean_text_list word list
        for index, word in enumerate(clean_text_list):
            # Check if the word has a correction in the dictionary
            if word in correction_dict:
                # If it does, append the corrected word to the new list and track the correction
                corrected_word = correction_dict[word]
                # Restore the original case of the word
                if original_case[index].istitle():
                    corrected_word = corrected_word.capitalize()
                elif original_case[index].isupper():
                    corrected_word = corrected_word.upper()
                new_word_list.append(corrected_word)
                corrections_tracker[word] = corrected_word
    
                # Extract context around the corrected word
                start_index = max(0, index - 10)
                end_index = min(len(clean_text_list), index + 11)
                context = clean_text_list[start_index:end_index]
    
                # Store the context with the correction
                corrections_tracker[word] = {
                    'corrected_word': corrected_word,
                    'context': ' '.join(context)
                }
            else:
                # If it doesn't, append the original word
                new_word_list.append(original_case[index])
    
        # Send data back to text
        new_corrected_text = ' '.join(new_word_list)
    
        return corrections_tracker, new_corrected_text
    
    # Use the function and make two new columns to add to the dataframe
    df['corrections_tracker'], df['ocr_corrected_text'] = zip(*tqdm(df['preprocessed_content'].apply(ocr_post_correction), desc="Applying OCR post-correction", colour='yellow'))
    
    print ('Saving data')
    # Save the df as a csv file
    os.chdir(r'.\literature csv')
    df.to_csv(f'{book_title}.csv', index=False)
    
    
    # Save txt versions
    os.chdir('..')
    os.chdir(r'.\literature txt')
    
    # Save text version without spell correction
    text = ' '.join(df['preprocessed_content'].to_list())
    with open(f'{book_title} without ocr correction.txt', 'w', encoding='utf-8') as f:
        f.write(text)
    
    # Save text version with spell correction
    # Join the corrected words back into a string
    ocr_corrected_text = ' '.join(df['ocr_corrected_text'].to_list())
    with open(f'{book_title} with ocr correction.txt', 'w', encoding='utf-8') as f:
        f.write(ocr_corrected_text)
    
    ########
    # Save ocr corrections
    print ('Save ocr corrections')
    os.chdir('..')
    os.chdir(r'.\ocr_correction_tracker')
    obs_corrections_tracker = []
    for i,j in enumerate(df['corrections_tracker'].to_list()):
        if j:
            observation = f"Index no.: {str(i)}. OCR observation: {str(j)}"
            observation = observation.replace('{', '').replace('}', '')
            obs_corrections_tracker.append(observation)
    
    ocr_observations = '\n\n'.join(obs_corrections_tracker)
    
    with open(f'{book_title} ocr corrections.txt', 'w', encoding='utf-8') as f:
        f.write(ocr_observations)
    
    ##############
    
    print ('Script done.')
    
    # change to main folder
    os.chdir('..')
    
    # Record the end time
    end_time = time.time()
    
    # Calculate the elapsed time
    elapsed_time = end_time - start_time
    
    # Print the elapsed time
    print(f"Time taken to run the script: {elapsed_time:.6f} seconds")

In [31]:
[input_is_folder(epub_file) for epub_file in files]

start time
Start processing "1773 Travels through Holland Flanders Germany Denmark Sweden Lapland Russia the Ukraine and Poland in the years 1768 1769 and 1770".
Wait a few seconds.
Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 784/784 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.448855 seconds
start time
Start processing "1795 A tour through some of the northern parts of Europe".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 677/677 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.777956 seconds
start time
Start processing "1805 A northern summer".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2201/2201 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 24.087781 seconds
start time
Start processing "1807 A tour through part of Germany Poland Russia Sweden Denmark".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 598/598 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.860720 seconds
start time
Start processing "1809 Travelling sketches in Russia and Sweden".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2413/2413 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 19.674750 seconds
start time
Start processing "1810 Travel through Denmark and part of Sweden during the winter and spring of the year 1809".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 650/650 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.647672 seconds
start time
Start processing "1811 Journal of a Tour in Iceland in the Summer of 1809".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1977/1977 [00:00<00:00, 246885.37it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.278895 seconds
start time
Start processing "1812 Travels in the island of Iceland during the summer of the year MDCCCX".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 6263/6263 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 33.383862 seconds
start time
Start processing "1813 Travels in Sweden during the Autumn of 1812".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2801/2801 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 7.188789 seconds
start time
Start processing "1813 Travels through Norway and Lapland during the years 1806 1807 and 1808".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1611/1611 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 5.673369 seconds
start time
Start processing "1815 A Tour Through Sweden Swedish_Lapland Finland and Denmark".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 649/649 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.594533 seconds
start time
Start processing "1816 Journal of a Tour in Germany Sweden Russia Poland During the Years 1813 and 1814".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1678/1678 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.905273 seconds
start time
Start processing "1816 Sketches on a Tour to Copenhagen Through Norway and Sweden".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1465/1465 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.702153 seconds
start time
Start processing "1816 Travels Through Part of the Russian Empire and the Country of Poland Along the Southern Shores of the Baltic".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1007/1007 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.028809 seconds
start time
Start processing "1818 Iceland Or The Journal of a Residence in that Island During the Years 1814 and 1815 Vol 1".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1011/1011 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.726991 seconds
start time
Start processing "1818 Iceland Or The Journal of a Residence in that Island During the Years 1814 and 1815 Vol 2".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2210/2210 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.343188 seconds
start time
Start processing "1826 Travels in Norway Sweden Denmark Hanover Germany Netherlands".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 3981/3981 [00:00<?, ?it/s]

Saving data
Save ocr corrections





Script done.
Time taken to run the script: 7.966364 seconds
start time
Start processing "1826 Travels in Sweden Denmark and Norway".
Wait a few seconds.
Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 353/353 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.957046 seconds
start time
Start processing "1827 Travels in Norway Sweden Finland Russia and Turkey vol 1".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1954/1954 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 5.306626 seconds
start time
Start processing "1827 Travels in Norway Sweden Finland Russia and Turkey vol 2".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1784/1784 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.855132 seconds
start time
Start processing "1830 Field sports of the north of Europe vol 1".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1377/1377 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.339149 seconds
start time
Start processing "1830 Field sports of the north of Europe vol 2".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1720/1720 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.614130 seconds
start time
Start processing "1831 Iceland Or The Journal of a Residence in that Island".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 943/943 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.966225 seconds
start time
Start processing "1832 Letters from the north of Europe".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1174/1174 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.466552 seconds
start time
Start processing "1834 Norway Views of wild scenery and journal".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 412/412 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.297385 seconds
start time
Start processing "1835 A Personal Narrative of a Journey Through Norway Part of Sweden and the Islands and States of Denmark".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1215/1215 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.264511 seconds
start time
Start processing "1835 A Visit to Iceland".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 881/881 [00:00<00:00, 110031.32it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.759729 seconds
start time
Start processing "1836 Journal of a Residence in Norway During the Years 1834 1835 and 1836".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1292/1292 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.950837 seconds
start time
Start processing "1838 Recollections of a tour in the North of Europe in 1836-1837".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 5003/5003 [00:00<00:00, 4997404.84it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 27.158223 seconds
start time
Start processing "1838 Travels in Various Countries of Scandinavia vol 1".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 3905/3905 [00:00<00:00, 4013417.57it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 8.683641 seconds
start time





Start processing "1838 Travels in Various Countries of Scandinavia vol 2".
Wait a few seconds.
Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 3035/3035 [00:00<00:00, 3140037.65it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 6.958644 seconds
start time
Start processing "1838 Travels in Various Countries of Scandinavia vol 3".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2491/2491 [00:00<00:00, 2493558.77it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.874808 seconds
start time
Start processing "1839 A Tour in Sweden in 1838".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 951/951 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.129997 seconds
start time
Start processing "1840 An Historical and Descriptive Account of Iceland, Greenland, and the Faroe Islands".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1378/1378 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.238269 seconds
start time
Start processing "1840 Excursions in Denmark Norway and Sweden vol 1".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 924/924 [00:00<00:00, 959053.92it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.807806 seconds
start time
Start processing "1841 Visit to Northern Europe vol 2".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1120/1120 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.342052 seconds
start time
Start processing "1842 Letters from the Shores of the Baltic vol 1".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 358/358 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.049547 seconds
start time
Start processing "1842 Letters from the Shores of the Baltic vol 2".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 519/519 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.790713 seconds
start time
Start processing "1848 Handbook for Northern Europe Including Denmark Norway Sweden Finland and Russia".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 3665/3665 [00:00<00:00, 3669640.53it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 8.295602 seconds





start time
Start processing "1849 A Yacht Voyage to Norway Denmark and Sweden".
Wait a few seconds.
Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1663/1663 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.464308 seconds
start time
Start processing "1849 Handbook for Northern Europe Including Denmark Norway Sweden Finland and Russia vol 2".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2956/2956 [00:00<00:00, 2956214.26it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 6.361740 seconds
start time
Start processing "1851 One year in Scandinavia".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 212/212 [00:00<00:00, 209715.20it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.244613 seconds
start time
Start processing "1852 Observations on the Social and Political State of Denmark".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1754/1754 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 5.040210 seconds
start time
Start processing "1853 Life in Sweden vol 1".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1094/1094 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.393388 seconds
start time
Start processing "1853 Life in Sweden vol 2".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1459/1459 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.275326 seconds
start time
Start processing "1853 Norway and Its Glaciers".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1611/1611 [00:00<00:00, 1613424.96it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.760660 seconds
start time
Start processing "1853 Norway and its scenery".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2900/2900 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 5.224669 seconds
start time
Start processing "1853 Visit to Iceland And the Scandinavian North".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1170/1170 [00:00<00:00, 1168691.52it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.245926 seconds
start time
Start processing "1854 A Yacht Voyage to Iceland in 1853".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 749/749 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.595286 seconds
start time
Start processing "1854 Norðurfari or Rambles in Iceland".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1358/1358 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.744073 seconds
start time
Start processing "1854 The Baltic Its Gates Shores and Cities".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1296/1296 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.631772 seconds
start time
Start processing "1854 Travels and Adventures in Sweden and Norway".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 812/812 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.595928 seconds
start time
Start processing "1854 Travels on the Shores of the Baltic Extended to Moscow".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1357/1357 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.628800 seconds
start time
Start processing "1855 Forest Scenes in Norway and Sweden".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2676/2676 [00:00<00:00, 2673644.00it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 6.064349 seconds
start time
Start processing "1856 A Summer in Northern Europe".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1484/1484 [00:00<00:00, 1483753.79it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.227608 seconds
start time
Start processing "1856 Europe vol 3 Residence in Norway Notes on France Russia etc".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1690/1690 [00:00<00:00, 1694972.20it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 6.391122 seconds





start time
Start processing "1856 Tracings of Iceland and the Faroe Islands".
Wait a few seconds.
Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 178/178 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.555885 seconds
start time
Start processing "1857 A long vacation ramble in Norway and Sweden".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1015/1015 [00:00<00:00, 1015315.66it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.576519 seconds
start time
Start processing "1857 Letters from High Latitudes".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 746/746 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.509607 seconds
start time
Start processing "1857 Unprotected Females in Norway".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1045/1045 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.292920 seconds
start time
Start processing "1858 The Oxonian in Thelemarken vol 1".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1107/1107 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.910603 seconds
start time
Start processing "1858 The Oxonian in Thelemarken vol 2".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1207/1207 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.936493 seconds
start time
Start processing "1859 Northern Travel".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 762/762 [00:00<00:00, 382669.98it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.826235 seconds
start time
Start processing "1859 The Norse folk or A visit to the homes of Norway and Sweden".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 522/522 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 5.894387 seconds
start time
Start processing "1859 Through Norway with a knapsack".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2797/2797 [00:00<00:00, 2790549.07it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 21.091928 seconds
start time
Start processing "1861 The Oxonian in Iceland".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1013/1013 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.440825 seconds
start time
Start processing "1862 One year in Sweden including a visit to the isle of Gotland vol 1".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 4391/4391 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 7.846021 seconds
start time
Start processing "1862 Pen and Pencil Sketches of Faroe and Iceland".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 3648/3648 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.414979 seconds
start time
Start processing "1863 A Yachting Cruise in the Baltic".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1587/1587 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.814864 seconds
start time
Start processing "1864 Chasing the Sun".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 610/610 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.248142 seconds
start time
Start processing "1867 Sweden and Norway".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1879/1879 [00:00<00:00, 1877345.69it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.030429 seconds
start time
Start processing "1867 The Land of Thor".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1536/1536 [00:00<00:00, 1533551.76it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.944042 seconds
start time
Start processing "1870 The Knapsack guide to Norway".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2437/2437 [00:00<00:00, 2379310.72it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 5.432774 seconds
start time
Start processing "1871 A Handbook for Travellers in Denmark Norway and Sweden third edition".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 7155/7155 [00:00<00:00, 3582457.34it/s]

Saving data





Save ocr corrections
Script done.
Time taken to run the script: 12.423533 seconds
start time
Start processing "1871 Canoe Travelling Log of a Cruise on the Baltic".
Wait a few seconds.
Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 733/733 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.964013 seconds
start time
Start processing "1871 How to See Norway".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1019/1019 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.276581 seconds
start time
Start processing "1871 Up the Baltic Or Young America in Norway Sweden and Denmark".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2772/2772 [00:00<00:00, 2874316.61it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.453424 seconds
start time
Start processing "1873 Tent Life with English Gipsies in Norway".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1918/1918 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 5.187166 seconds
start time
Start processing "1873 To Iceland in a Yacht".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 461/461 [00:00<00:00, 440249.12it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.621122 seconds
start time
Start processing "1874 Wayside notes in Scandinavia".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2456/2456 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 13.317996 seconds
start time
Start processing "1875 The Pilgrim of Scandinavia".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 388/388 [00:00<00:00, 388398.56it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.950545 seconds
start time
Start processing "1875 Ultima Thule or A Summer in Iceland vol 1".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1893/1893 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 6.036602 seconds
start time
Start processing "1875 Ultima Thule or A Summer in Iceland vol 2".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1747/1747 [00:00<00:00, 1743385.46it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 5.438897 seconds
start time
Start processing "1875 Ultima Thule or A Summer in Iceland".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1893/1893 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 6.161680 seconds
start time
Start processing "1875 Up The Baltic  Or Young America in Norway Sweden and Denmark".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2861/2861 [00:00<00:00, 2906953.43it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.486533 seconds
start time
Start processing "1877 A summer holiday in Scandinavia".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1663/1663 [00:00<00:00, 1805157.23it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.512595 seconds
start time
Start processing "1877 Scenes of travel in Norway".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 655/655 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.699773 seconds
start time
Start processing "1877 Six weeks in Norway".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 313/313 [00:00<00:00, 312724.43it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.874555 seconds
start time
Start processing "1878 To the Arctic Regions and Back in Six Weeks".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1809/1809 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 4.046121 seconds
start time
Start processing "1879 Diary of a tour in Sweden Norway and Russia in 1827 with letters".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1122/1122 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 12.561373 seconds
start time
Start processing "1879 Gamle Norge Rambles and Scrambles in Norway".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 655/655 [00:00<00:00, 654735.25it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.036509 seconds
start time
Start processing "1881 Denmark and Iceland".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1456/1456 [00:00<00:00, 1452987.54it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.938597 seconds
start time
Start processing "1881 The story of a scandinavian summer".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1661/1661 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.765163 seconds
start time
Start processing "1882 Summer travelling in Iceland".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1580/1580 [00:00<00:00, 1577106.22it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.588426 seconds
start time
Start processing "1882 Sweden and Norway".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2008/2008 [00:00<00:00, 990376.58it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.877325 seconds
start time
Start processing "1882 The Land of the Midnight Sun vol 1".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2399/2399 [00:00<00:00, 2390623.73it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 5.007459 seconds
start time
Start processing "1882 The Land of the Midnight Sun vol 2".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2358/2358 [00:00<00:00, 2421093.96it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 5.143129 seconds
start time
Start processing "1882 Three in Norway by Two of Them".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1322/1322 [00:00<00:00, 1320207.12it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.865093 seconds
start time
Start processing "1884 Driftwood from Scandinavia".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1167/1167 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.985980 seconds
start time
Start processing "1884 Zigzag Journeys in Northern Land".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 2196/2196 [00:00<00:00, 2191978.01it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.223131 seconds
start time
Start processing "1885 Sketches in Holland and Scandinavia".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 428/428 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.420000 seconds
start time
Start processing "1885 The viking Bodleys".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1622/1622 [00:00<00:00, 1603762.63it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 8.616974 seconds
start time
Start processing "1887 Glimpses of Scandinavia and Russia".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 533/533 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.932653 seconds
start time
Start processing "1887 Norway Nights and Russian Days".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 603/603 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.893769 seconds
start time
Start processing "1889 Letters Written During a Short Residence in Sweden Norway and Denmark".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 603/603 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.148021 seconds
start time
Start processing "1890 Norwegian Pictures".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1006/1006 [00:00<00:00, 1053024.66it/s]


Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.095540 seconds
start time
Start processing "1894 A Girls Ride in Iceland".
Wait a few seconds.
Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 747/747 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.281957 seconds
start time
Start processing "1894 A Winter Jaunt to Norway".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1480/1480 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.809255 seconds
start time
Start processing "1899 Ruins of the Saga Time".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 677/677 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.063102 seconds
start time
Start processing "1902 Across Iceland".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 990/990 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.234737 seconds
start time
Start processing "1906 Through Scandinavia to Moscow".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 927/927 [00:00<00:00, 929727.36it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.410426 seconds
start time
Start processing "1907 The Norwegian Fjords".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 700/700 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.791727 seconds
start time
Start processing "1911 Peeps at Many Lands Norway".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 551/551 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.571963 seconds
start time
Start processing "1914 Capitals of the Northlands Tales of Ten Cities".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1244/1244 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 2.651119 seconds
start time
Start processing "1914 Iceland Horseback tours in saga land".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1652/1652 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.459002 seconds
start time
Start processing "1918 The Charm of Scandinavia".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 1492/1492 [00:00<00:00, 1489976.56it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 3.359514 seconds
start time
Start processing "1921 Denmark".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 307/307 [00:00<?, ?it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 1.163779 seconds
start time
Start processing "1963 Iceland Its Scenes and Its Sagas".
Wait a few seconds.





Preprocess text
Starting ocr correction - loading correction rules
Ocr correction


Applying OCR post-correction: 100%|[33m██████████[0m| 3773/3773 [00:00<00:00, 3773273.48it/s]

Saving data
Save ocr corrections
Script done.
Time taken to run the script: 6.587312 seconds





[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]