In [3]:
import pandas as pd
from PIL import Image
import base64
from io import BytesIO
import os

# Define the year and path to the diagram data
year = 2022
diagram_data_path = f"../diagram_data/{year}_diagram_data.xlsx"

# Load the diagram data DataFrame
diagram_data_df = pd.read_excel(diagram_data_path)

# Function to encode image as base64 for HTML embedding and format the display
def get_image_html(image_path, image_labels):
    buffered = BytesIO()
    img = Image.open(image_path)
    img.save(buffered, format="PNG")
    img_str = base64.b64encode(buffered.getvalue()).decode()
    labels_html = ''.join(f'<p>{label}: {value}</p>' for label, value in zip(image_labels.columns[1:], image_labels.values[0][1:]))
    return f'''
<div style="display:flex; width:100%; align-items:center; margin-bottom:20px;">
    <div style="width:20%;">
        {labels_html}
    </div>
    <div style="width:80%;">
        <img src="data:image/png;base64,{img_str}" style="display:block; width:100%; height:auto;"/>
    </div>
</div>
'''

# Generate HTML content
html_content = f'<html><head><title>{year} Image Classifications</title></head><body>'
for index, row in diagram_data_df.iterrows():
    image_path = "../" + row["image path"]  # Adjust the relative path as needed
    image_labels = pd.DataFrame([row], columns=diagram_data_df.columns)
    html_content += get_image_html(image_path, image_labels)
html_content += '</body></html>'

# Write HTML content to a file
output_path = f"../diagram_data_html_outputs/{year}_diagram_data.html"
os.makedirs(os.path.dirname(output_path), exist_ok=True)  # Ensure the directory exists
with open(output_path, "w") as file:
    file.write(html_content)
