In [6]:
from bs4 import BeautifulSoup

def fix_blockquote_tags(input_file_path, output_file_path):
    with open(input_file_path, 'r', encoding='utf-8') as file:
        soup = BeautifulSoup(file, 'html.parser')

    # Find all blockquote tags
    blockquotes = soup.find_all('blockquote')

    for blockquote in blockquotes:
        # Check if there are nested blockquote tags
        nested_blockquotes = blockquote.find_all('blockquote')
        for nested in nested_blockquotes:
            nested.decompose()

    # Write the fixed HTML to the output file
    with open(output_file_path, 'w', encoding='utf-8') as file:
        file.write(str(soup))

# Modify the paths to your input HTML file and output HTML file
input_file_path = '/Users/sjhuskey/Library/CloudStorage/OneDrive-UniversityofOklahoma/DH/Bradford/html-to-structured/mvp_modified4_output.html'
output_file_path = '/Users/sjhuskey/Library/CloudStorage/OneDrive-UniversityofOklahoma/DH/Bradford/html-to-structured/mvp_modified4_output_fixed.html'
fix_blockquote_tags(input_file_path, output_file_path)

print(f"Fixed HTML saved to '{output_file_path}'.")

Fixed HTML saved to '/Users/sjhuskey/Library/CloudStorage/OneDrive-UniversityofOklahoma/DH/Bradford/html-to-structured/mvp_modified4_output_fixed.html'.


In [10]:
import json
import math

# Function to convert JSON entry to HTML
def json_to_html(entry):
    html = f'<div id="braford_{entry["identifier"]}">\n'
    html += f'<p class="name"><strong>Name: </strong>{entry["name"]}</p>\n'
    html += f'<p class="date"><strong>Date: </strong>{"No date" if not entry["date"] else entry["date"]}</p>\n'
    for source in entry["sources"]:
        if source["Reference"]:
            html += f'<p class="source"><strong>Source: </strong>{source["Reference"]}. {source["Resource"]}</p>\n'
        else:
            html += f'<p class="source"><strong>Source: </strong>{source["Resource"]}</p>\n'
    for description in entry["descriptions"]:
        html += f'<p class="description"><strong>Description: </strong> {description["description"]}</p>\n'
    html += '</div>\n|--------------------|\n'
    return html

# Read JSON data from file
input_file_path = 'output_fixed.json'  # Replace with the actual path to your JSON file

with open(input_file_path, 'r', encoding='utf-8') as file:
    data = json.load(file)

# Calculate the number of entries per group
total_entries = len(data)
entries_per_group = math.ceil(total_entries / 3)

# Function to create HTML content for a group of entries
def create_html_content(entries):
    html_content = '''<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Converted JSON to HTML</title>
</head>
<body>
'''
    for entry in entries:
        html_content += json_to_html(entry)
    html_content += '''
</body>
</html>
'''
    return html_content

# Create three separate HTML documents
for i in range(3):
    group_entries = data[i * entries_per_group:(i + 1) * entries_per_group]
    html_content = create_html_content(group_entries)
    output_file_path = f'newmvp-to-word-part{i + 1}.html'  # Replace with the actual path to your output HTML file
    with open(output_file_path, 'w', encoding='utf-8') as file:
        file.write(html_content)
    print(f"Data conversion complete. HTML saved to '{output_file_path}'.")

Data conversion complete. HTML saved to 'newmvp-to-word-part1.html'.
Data conversion complete. HTML saved to 'newmvp-to-word-part2.html'.
Data conversion complete. HTML saved to 'newmvp-to-word-part3.html'.


In [9]:
import json
import csv

# Read JSON data from file
input_file_path = 'output.json'  # Replace with the actual path to your JSON file
output_file_path = 'new_output.csv'  # Replace with the actual path to your output CSV file

with open(input_file_path, 'r', encoding='utf-8') as file:
    data = json.load(file)

# Open the CSV file for writing
with open(output_file_path, 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['identifier', 'name', 'date', 'source', 'reference', 'description']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()

    # Iterate over each entry in the JSON data
    for entry in data:
        identifier = entry['identifier']
        name = entry['name']
        date = entry['date']

        # Write rows for sources
        for source in entry['sources']:
            writer.writerow({
                'identifier': identifier,
                'name': name,
                'date': date,
                'source': source['Resource'],
                'reference': source['Reference'],
                'description': ''
            })

        # Write rows for descriptions
        for description in entry['descriptions']:
            writer.writerow({
                'identifier': identifier,
                'name': name,
                'date': date,
                'source': '',
                'reference': '',
                'description': description['description']
            })

print(f"Data conversion complete. CSV saved to '{output_file_path}'.")

Data conversion complete. CSV saved to 'new_output.csv'.
