# eScriptorium API Extensions

### to find info about transcriptions, line and region typologies, and more, use get_basic_info(doc_pk)


Empty line identification

In [None]:
# Function to identify empty lines across all parts of the document
def identify_empty_lines_in_document(doc_pk, tr_level, min_length=1):
    # Fetch all parts of the document
    parts = get_all_parts(doc_pk)

    # Iterate over each part
    for part in parts:
        part_pk = part['pk']
        #print(f"Processing part {part_pk}...")

        # Fetch transcriptions and all lines for the part
        transcriptions = get_page_transcription(doc_pk, part_pk, tr_level)
        lines = get_all_lines_of_part(doc_pk, part_pk)
        line_pks = get_pks_of_dict_list(lines)

        # List to store lines that have empty or short transcriptions
        lines2identify = list()

        # Identify lines with short transcription content
        for tr in transcriptions:
            if len(tr['content'].strip()) < min_length:  # Ensure spaces are counted as empty
                lines2identify.append(tr['line'])  # Store line pk

        # Identify lines that have no transcriptions
        for line_pk in line_pks:
            if line_pk not in [tr['line'] for tr in transcriptions]:  # Lines with no transcription
                lines2identify.append(line_pk)

        # Report the identified lines instead of deleting them
        if len(lines2identify) > 0:
            print(f"Identified {len(lines2identify)} lines with empty or no transcription in part {part_pk}:")
            for line_pk in lines2identify:
                print(f"Line PK: {line_pk}")

# Example usage for document pk 8
doc_pk = 8  # Document primary key
tr_level = 18  # Replace with actual transcription level pk (e.g., 'manual' = 18')
min_length = 1  # Minimum length to consider a transcription not empty

# Identify empty lines across all parts of the document
identify_empty_lines_in_document(doc_pk, tr_level, min_length=min_length)


Line Typology Analysis and Update

In [None]:
# @title
# Function to count the occurrence of each line typology in the document
def count_line_typologies(doc_pk):
    # Dictionary to store the count of each typology
    typology_counts = defaultdict(int)

    # Get all parts of the document
    parts = get_all_parts(doc_pk)

    for part in parts:
        part_pk = part['pk']
        #print(f"Processing part {part_pk}")

        # Fetch all lines in the current part
        lines = get_all_lines_of_part(doc_pk, part_pk)

        for line in lines:
            typology = line.get('typology', None)

            # If the line has a typology, count it
            if typology:
                typology_counts[typology] += 1
            else:
                typology_counts['None'] += 1  # Count lines with no typology as 'None'

    # Convert typology counts to a normal dictionary (optional, for easier reading)
    return dict(typology_counts)

# Call the function with the document primary key and print the results
typology_summary = count_line_typologies(doc_pk=XXX)  # Replace XXX with the actual document primary key

# Print the result
for typology, count in typology_summary.items():
    print(f"Typology '{typology}': {count} lines")


In [None]:
# @title
# Function to identify lines with specific typologies (e.g., None or 18)
def find_specific_typologies(doc_pk, target_typologies):
    # List to store lines with matching typologies
    matching_lines = []

    # Get all parts of the document
    parts = get_all_parts(doc_pk)

    for part in parts:
        part_pk = part['pk']
        #print(f"Processing part {part_pk}")

        # Fetch all lines in the current part
        lines = get_all_lines_of_part(doc_pk, part_pk)

        for line in lines:
            typology = line.get('typology', None)

            # Check if the typology is in our target list
            if typology in target_typologies or typology is None:
                # Collect detailed information about the line
                line_info = {
                    'line_pk': line['pk'],
                    'document_part': line['document_part'],
                    'typology': typology,
                    'content': None  # Default None for content
                }

                # Add the line info to the matching list
                matching_lines.append(line_info)

    return matching_lines

# Define the target typologies to search for
target_typologies = [18]  # You can add more typologies if needed

# Call the function to find lines with target_typologies (e.g., None or 18)
specific_lines = find_specific_typologies(doc_pk=XXX, target_typologies=target_typologies)  # Replace XXX with your document pk

# Print out the details of the lines found
for line in specific_lines:
    typology_label = 'None' if line['typology'] is None else line['typology']
    print(f"Line PK: {line['line_pk']}, Document Part: {line['document_part']}, Typology: {typology_label}, Content: {line['content']}")


In [None]:
#change typology of specific line

# Set your document pk, part pk, and line pk
doc_pk = XXX
part_pk = YYY
line_pk = ZZZ

default_typology_pk = 9 #change according to the typology you want

# Fetch the specific line data (assuming the get_line function exists)
line_data = get_line(doc_pk, part_pk, line_pk)

# Check if typology is None and update it, change according to need
if line_data['typology'] is None:
    line_data['typology'] = default_typology_pk  # Set typology to 'default' PK

    # Prepare the payload with the required fields
    update_payload = {
        'typology': line_data['typology'],  # Use the PK for 'default'
        'document_part': line_data['document_part']  # Include the document part PK
    }

    # Use the update_line function to update this specific line
    update_url = get_specific_line_url(doc_pk, part_pk, line_pk)  # Generate the specific URL for the line
    result = update_item(update_url, update_payload)  # Send the update

    # Check the response
    if result.status_code == 200:
        print(f"Line {line_pk} typology updated to 'default'")
    else:
        print(f"Failed to update line {line_pk}, response: {result.status_code}, {result.content}")
else:
    print(f"Line {line_pk} already has typology: {line_data['typology']}")


In [None]:
#Bulk change the typology of all lines with a specific name

# Define the typology you want to set
default_typology_pk = 9  #change according to the typology you want

# Function to process all lines in a document and update typology if it's None
def process_document_lines_bulk(doc_pk):
    # Get all parts of the document
    parts = get_all_parts(doc_pk)  # Assuming this function exists to fetch all parts of the document

    for part in parts:
        part_pk = part['pk']
        print(f"Processing part {part_pk}")

        # Fetch all lines in the current part
        lines = get_all_lines_of_part(doc_pk, part_pk)  # Assuming this function exists to fetch all lines for the part
        updated_lines = []  # To store the lines that need updating

        for line in lines:
            line_pk = line['pk']

            # Check if the line's typology is None
            if line['typology'] is None:
                # Update the line's typology to 'default'
                line['typology'] = default_typology_pk  # Set to 'default' PK

                # Prepare the updated line data with required fields
                updated_lines.append({
                    'pk': line_pk,
                    'typology': line['typology'],
                    'document_part': line['document_part']
                })
            else:
                print(f"Line {line_pk} in part {part_pk} already has typology: {line['typology']}")

        # If we have lines to update, send them in bulk
        if updated_lines:
            result = bulk_update_lines(doc_pk, part_pk, updated_lines)
            if result.status_code == 200:
                print(f"Successfully updated {len(updated_lines)} lines in part {part_pk}")
            else:
                print(f"Failed to update lines in part {part_pk}, response: {result.status_code}, {result.content}")
        else:
            print(f"No lines to update in part {part_pk}")

# Call the function with the document primary key
process_document_lines_bulk(doc_pk=XXX) #

Region Typology Analysis

In [None]:
# Function to count the occurrence of each region typology in the document
def count_region_typologies(doc_pk):
    # Dictionary to store the count of each typology
    region_typology_counts = defaultdict(int)

    # Get all parts of the document
    parts = get_all_parts(doc_pk)

    for part in parts:
        part_pk = part['pk']
        #print(f"Processing part {part_pk}")

        # Fetch all regions in the current part
        regions = get_all_regions_of_part(doc_pk, part_pk)  # Assuming this function fetches all regions for the part

        for region in regions:
            region_typology = region.get('typology', None)

            # If the region has a typology, count it
            if region_typology:
                region_typology_counts[region_typology] += 1
            else:
                region_typology_counts['None'] += 1  # Count regions with no typology as 'None'

    # Convert the region typology counts to a normal dictionary (optional, for easier reading)
    return dict(region_typology_counts)

# Call the function with the document primary key and print the results
region_typology_summary = count_region_typologies(doc_pk=XXX)  # Replace XXX with the actual document pk

# Print the result
for typology, count in region_typology_summary.items():
    print(f"Typology '{typology}': {count} regions")


In [None]:
# Function to find regions with specific typologies (e.g., 37, 58, None)
def find_specific_region_typologies(doc_pk, target_typologies):
    # List to store regions with matching typologies
    matching_regions = []

    # Get all parts of the document
    parts = get_all_parts(doc_pk)

    for part in parts:
        part_pk = part['pk']
        #print(f"Processing part {part_pk}")

        # Fetch all regions in the current part
        regions = get_all_regions_of_part(doc_pk, part_pk)

        for region in regions:
            region_typology = region.get('typology', None)

            # Check if the region's typology is in our target list
            if region_typology in target_typologies or region_typology is None:
                # Collect detailed information about the region
                region_info = {
                    'region_pk': region['pk'],
                    'document_part': part_pk,
                    'typology': region_typology,
                    #'external_id': region.get('external_id', 'No ID'),
                    #'box': region.get('box', 'No box info')
                }

                # Add the region info to the matching list
                matching_regions.append(region_info)

    return matching_regions

# Define the target typologies to search for
target_typologies = [37, 58]  # Adding None as a separate case

# Call the function to find regions with target_typologies (e.g., 37, 58, or None)
specific_regions = find_specific_region_typologies(doc_pk=XXX, target_typologies=target_typologies)  # Replace XXX with the actual document primary key

# Print out the details of the regions found
if specific_regions:
    print("Found regions with specific typologies:")
    for region in specific_regions:
        typology_label = 'None' if region['typology'] is None else region['typology']
        print(f"Region PK: {region['region_pk']}, Document Part: {region['document_part']}, Typology: {typology_label}") #External ID: {region['external_id']}, Box: {region['box']}")
else:
    print("No regions found with the specified typologies.")
