# Imports

In [1]:
import numpy as np
import pandas as pd
import fitz as fitz

# Code

## Constants

In [2]:
# relative file paths
visitor_xlsx_path: str = "../../assets/excel_files/observation_tables.xlsx"

# sheet names
main_sheet_name: str = "main data"
groups_sheet_name_base: str = "object group"
indivs_sheet_name_base: str = "object indiv"

# export paths
groups_export_path: str = "../../assets/output_files/section_stats.pdf"
artifacts_export_path: str = "../../assets/output_files/artifact_stats.pdf"

## Loading XLSX

In [3]:
visitor_xlsx: pd.ExcelFile = pd.ExcelFile(visitor_xlsx_path)
visitor_df: pd.DataFrame = pd.read_excel(visitor_xlsx, sheet_name=main_sheet_name, index_col=0)

num_visitors: int = visitor_df.shape[0]

In [None]:
groups_df: pd.DataFrame = pd.DataFrame()
for i in visitor_df["visitor_id"].values:
	sheet_name: str = groups_sheet_name_base + " " + "{0:0=3d}".format(i)
	temp_df: pd.DataFrame = pd.read_excel(visitor_xlsx, sheet_name=sheet_name, index_col=0)
	col_name: str = "{0:0=3d}".format(i)
	groups_df[col_name] = pd.Series(dtype=object)
	for j in temp_df.index.to_list():
		if "Frieze" in j:
			groups_df.at[j, col_name] = (bool(temp_df.loc[j, "revisited"]), bool(temp_df.loc[j, "viewed_labels"]), bool(temp_df.loc[j, "took_photos"]))
		else:
			groups_df.at[j, col_name] = (bool(temp_df.loc[j, "took_photos"]), False)
groups_df = groups_df.T

In [None]:
artifacts_df: pd.DataFrame = pd.DataFrame()
for i in visitor_df["visitor_id"].values:
	sheet_name: str = indivs_sheet_name_base + " " + "{0:0=3d}".format(i)
	temp_df: pd.DataFrame = pd.read_excel(visitor_xlsx, sheet_name=sheet_name, index_col=0)
	col_name: str = "{0:0=3d}".format(i)
	artifacts_df[col_name] = pd.Series(dtype=object)
	for j in temp_df.index.to_list():
		if "Frieze" in j:
			artifacts_df.at[j, col_name] = (bool(temp_df.loc[j, "visited"]), bool(temp_df.loc[j, "viewed_labels"]), bool(temp_df.loc[j, "took_photos"]))
		else:
			artifacts_df.at[j, col_name] = (bool(temp_df.loc[j, "revisited"]), bool(temp_df.loc[j, "viewed_labels"]), bool(temp_df.loc[j, "took_photos"]))
artifacts_df = artifacts_df.T

## Operations

### Support Functions

In [None]:
GROUP_ANALYSIS_COLS: list = [
	"South Metopes II-V",
	"West Pediments",
	"South Metopes VI-IX",
	"South Metopes XXVI-XXIX",
	"East Pediments",
	"South Metopes XXX-XXXII",
	"Frieze Section 1",
	"Frieze Section 2",
	"Frieze Section 3",
	"Frieze Section 4", 
	"Frieze Section 5"
]

ARTIFACT_ANALYSIS_COLS: list = [
	"South Metope II",
	"South Metope III",
	"South Metope IV",
	"West Pediment A*",
	"West Pediment A (back)*",
	"West Pediment H",
	"West Pediment L*",
	"West Pediment M",
	"West Pediment N*",
	"West Pediment N (back)",
	"West Pediment O",
	"West Pediment Q",
	"South Metope V",
	"South Metope VI",
	"South Metope VII",
	"South Metope VIII",
	"South Metope IX",
	"East Pediment A-C*",
	"East Pediment D*",
	"East Pediment D (back)",
	"East Pediment E and F",
	"East Pediment E,F (back)",
	"East Pediment G*",
	"East Pediment G (back)",
	"East Pediment K",
	"East Pediment L and M*",
	"East Pediment K,L,M (back)",
	"East Pediment O",
	"South Metope XXVI",
	"South Metope XXVII*",
	"South Metope XXVIII",
	"South Metope XXIX",
	"South Metope XXX",
	"South Metope XXXI",
	"South Metope XXXII",
	"South Frieze XXXI 78-79*",
	"East Frieze III 7-11*",
	"East Frieze IV 24-25*",
	"East Frieze III 3-35*",
	"North Frieze V 13*",
	"North Frieze XXVII 73-74*",
	"North Frieze XLVII 132-136*",
	"West Frieze 2-3*",
]

In [None]:
def collect_data(df: pd.DataFrame, analysis_cols: list[str], num_visitors: int) -> dict[str, tuple[float, float]]:
	engagement: dict[str, tuple[float, float]] = {}
	for col in analysis_cols:
		engagement_count: int = 0
		visitor_count: int = 0
		for cell in df[col].values:
			cell_counted: bool = False
			for element in cell:
				if element == True:
					if not cell_counted:
						visitor_count += 1
						cell_counted = True
					engagement_count += 1
		engagement[col] = (engagement_count, visitor_count)
	return engagement

def plot_data(data: dict[str, tuple[float, float]], start_string: str, export_path: str, show_data: bool) -> None:
	doc: fitz.Document = fitz.open()
	page: fitz.Page = doc.new_page()

	margin: int = 50
	
	sorted_keys: list[str] = sorted(
		data,
		key=lambda k: (-sum(data[k]), k)
	)

	current_rank: int = 0
	next_rank: int = 1
	previous_score: int = None
	text_content: str = "\n\n"
	for i, key in enumerate(sorted_keys):
		current_score: int = sum(data[key])
		if current_score != previous_score:
			previous_score = current_score
			current_rank = next_rank
			next_rank += 1
		else:
			next_rank += 1
		text_content += str(current_rank) + ") " + str(key) + ": " + str(data[key][0]) + " (engagements) + " + str(data[key][1]) + " (visitors engaged) \n"
	
	page.insert_text(
		fitz.Point(margin, margin),
		start_string + ":",
		fontsize=16
	)
	page.insert_text(
		fitz.Point(margin, margin),
		text_content,
		fontsize=12
	)

	if show_data:
		print(text_content)

	doc.save(export_path)
	doc.close()

### Execution

In [None]:
group_data: dict[str, tuple[float, float]] = collect_data(groups_df, GROUP_ANALYSIS_COLS, num_visitors)
plot_data(group_data, "Most Engaging Sections", groups_export_path, False)

In [None]:
artifacts_data: dict[str, tuple[float, float]] = collect_data(artifacts_df, ARTIFACT_ANALYSIS_COLS, num_visitors)
plot_data(artifacts_data, "Most Engaging Artifacts", artifacts_export_path, False)