# Imports

In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
import fitz as fitz
from enum import Enum
import io as io

## Classes and Enums

In [2]:
# using as a value tracker

class RoomOrder(Enum):
	ABF = 0 # 18a, 18b, 18
	BAF = 1 # 18b, 18a, 18
	AFB = 2 # 18a, 18, 18b
	BFA = 3 # 18b, 18, 18a
	FAB = 4 # 18, 18a, 18b
	FBA = 5 # 18, 18b, 18a
	BA = 6 # 18b, 18a
	BF = 7 # 18b, 18
	FB = 8 # 18, 18b
	FA = 9 # 18, 18a
	AB = 10 # 18a, 18b
	AF = 11 # 18a, 18
	A = 12 # 18a
	B = 13 # 18b
	F = 14 # 18

class GroupComp(Enum):
	INDIVIDUAL = 0
	GROUP = 1

class LecternsViewed(Enum):
	N = "None"
	W = "West"
	M = "Middle"
	E = "East"
	WM = "West, Middle"
	WE = "West, East"
	ME = "Middle, East"
	WME = "West, Middle, East"

class VisitorType(Enum):
	BROWSER = 0
	FOLLOWER = 1
	SEARCHER = 2
	RESEARCHER = 3

class TurnDirection(Enum):
	LEFT = 0
	MIDDLE = 1
	RIGHT = 2
	NONE = 3

class TeamMember(Enum):
	Courtney = 0
	Jerry = 1
	Owen = 2
	Ritvik = 3
	Sofia = 4

class Gender(Enum):
	FEMALE = 0
	MALE = 1

class FirstTurnDirection(Enum):
	LEFT = 0
	MIDDLE = 1
	RIGHT = 2

class DayOfWeek(Enum):
	Sunday = 0
	Monday = 1
	Tuesday = 2
	Wednesday = 3
	Thursday = 4
	Friday = 5
	Saturday = 6

class Languages(Enum):
	EN = "English"
	ZH_S = "Chinese (Simplified"
	ZH_T = "Chinese (Traditional)"
	FR = "French"
	DE = "German"
	HI = "Hindi"
	IT = "Italian"
	ES_ES = "Spanish"

# Code

## Constants

In [3]:
# relative file paths
visitor_xlsx_path: str = "../../assets/excel_files/observation_tables.xlsx"
survey_xlsx_path: str = "../../assets/excel_files/survey_responses.xlsx"

# sheet name
main_sheet_name: str = "main data"

# export paths
observation_export_path: str = "../../assets/output_files/visitor_pie_charts.pdf"
survey_export_path: str = "../../assets/output_files/survey_pie_charts.pdf"

## Loading XLSX

In [4]:
visitor_xlsx: pd.ExcelFile = pd.ExcelFile(visitor_xlsx_path)
visitor_df: pd.DataFrame = pd.read_excel(visitor_xlsx, sheet_name=main_sheet_name, index_col=0)

survey_xlsx: pd.ExcelFile = pd.ExcelFile(survey_xlsx_path)
survey_df: pd.DataFrame = pd.read_excel(survey_xlsx, index_col=8)
# isolate row of question names
survey_question_names: pd.Series = survey_df.iloc[0]
# remove row of question names
survey_df = survey_df[1:]

In [5]:
# simplify question names
for i in survey_question_names.keys():
	survey_question_names[i] = survey_question_names[i].replace(" - Selected Choice", "")
	survey_question_names[i] = survey_question_names[i].replace(", ", ", <br>")
	survey_question_names[i] = survey_question_names[i].replace(": ", ": <br>")

## Operations

### Support Functions

In [6]:
# lists of columns to analyze and snums to transform labels
OBSERVATION_ANALYSIS_COLS: list = [
	"group_comp",
	"gender",
	"room_order",
	"lecterns_visited",
	"visitor_type",
	"first_turn_direction"
]

OBSERVATION_ENUM_CLASSES: list = []

SURVEY_ANALYSIS_COLS: list = [
	"UserLanguage",
	"Q3",
	"Q4",
	"Q5",
	"Q6",
	"Q7",
	"Q8",
	"Q10",
	"Q12",
	"Q13",
	"Q14",
	"Q16",
	"Q19"
]

SURVEY_ENUM_CLASSES: list = [
	Languages
]

In [7]:
def plot_data(df: pd.DataFrame, analysis_cols: list, enum_classes: list, names: pd.Series, export_path: str, show_charts: bool) -> None:
	"""
	Generates pie charts for specified columns and saves them to a PDF.

	Maps categorical data based on provided Enums, creates Plotly pie charts,
	and inserts them into a PDF document, organizing two charts per page 
	(top and bottom).

	Args:
		df: The pandas DataFrame containing the data to analyze.
		analysis_cols: List of column names to generate charts for.
		enum_classes: List of Enum classes used to map raw data values to 
					  display names.
		names: A pandas Series mapping column names to human-readable titles.
			   If None, raw column names are used as titles.
		export_path: File path where the resulting PDF will be saved.
		show_charts: If True, displays each chart in the browser/notebook.
	"""
	doc: fitz.Document = fitz.open()

	given_names: bool = names is not None

	page_width: int = 595
	page_height: int = 842

	margin: int = 50
	midpoint: int = page_height / 2

	master_map: dict[str, str] = {}
	for enum_cls in enum_classes:
		for member in enum_cls:
			master_map[member.name] = str(member.value)

	plot_df: pd.DataFrame = df.copy()

	for i, col in enumerate(analysis_cols):
		if i % 2 == 0:
			page: fitz.Page = doc.new_page(width=page_width, height=page_height)
		
		plot_df[col] = plot_df[col].map(lambda x: master_map.get(x, x))
		data_counts: pd.Series = plot_df[col].value_counts()
		
		fig: go.Figure = go.Figure(data=[go.Pie(
			labels=list(data_counts.index), 
			values=list(data_counts.values),
			textinfo='label+percent',
			insidetextorientation='radial',
		)])
		
		if given_names:
			fig.update_layout(
				title={
					'text': "'" + names[col] + "'",
					'y':0.95,
					'x':0.5,
					'xanchor': 'center',
					'yanchor': 'top'
				},
				width=800,
				margin=dict(t=80, b=10, l=10, r=10)
			)
		else:
			fig.update_layout(
				title={
					'text': "'" + col + "'",
					'y':0.95,
					'x':0.5,
					'xanchor': 'center',
					'yanchor': 'top'
				},
				width=800,
				margin=dict(t=50, b=10, l=10, r=10)
			)

		image_bytes: bytes = pio.to_image(fig, format="png", width=800, height=500, scale=2)
		
		if show_charts:
			fig.show()

		image_rect: fitz.Rect
		if i % 2 == 0:
			image_rect = fitz.Rect(margin, margin, page_width - margin, midpoint - 40)
		else:
			image_rect = fitz.Rect(margin, midpoint + 20, page_width - margin, page_height - margin - 40)
		
		page.insert_image(image_rect, stream=image_bytes)

	doc.save(export_path)
	doc.close()

### Execution

In [10]:
plot_data(visitor_df, OBSERVATION_ANALYSIS_COLS, OBSERVATION_ENUM_CLASSES, None, observation_export_path, False)

In [9]:
plot_data(survey_df, SURVEY_ANALYSIS_COLS, SURVEY_ENUM_CLASSES, survey_question_names, survey_export_path, False)