# MNI Calculation

This notebook prepares data for MNI calculation and writes the results to the `source_data.xlsx` file.

In [None]:
import pandas as pd
import sys
sys.path.append('..')
from scripts.mni import calculate_mni

df_occurrences = pd.read_pickle('../data/pkl/df_occurrences.pkl')
df_briana_with_responses = pd.read_pickle('../data/pkl/df_briana_with_responses.pkl')

# Ensure a populated 'Taxon Label' column exists
if 'Taxon Label' not in df_occurrences.columns or df_occurrences['Taxon Label'].isna().all():
    df_occurrences['Taxon Label'] = pd.NA
    for c in ['Post: Taxon Guess?', 'Pre: Taxon']:
        if c in df_occurrences.columns:
            df_occurrences['Taxon Label'] = df_occurrences['Taxon Label'].fillna(df_occurrences[c])

columns_occurrences = ['ID', 'TransectUID', 'Taxon Label', 'Pre: Age', 'Pre: Sex', 'Pre: Size Class ']
columns_briana = ['OccurrenceID', 'What element is this?', 'Side']

df_occurrences = df_occurrences.reindex(columns=columns_occurrences)
df_briana_with_responses = df_briana_with_responses.reindex(columns=columns_briana)

df = (
    df_occurrences
    .merge(
        df_briana_with_responses,
        left_on='ID',
        right_on='OccurrenceID',
        how='left',
    )
    .drop(columns=['ID', 'OccurrenceID'])
)
df['TransectUID'] = pd.to_numeric(df['TransectUID'], errors='coerce').astype('Int64')


In [None]:
pivot_df = (
    df.pivot_table(
        index=["TransectUID", "Taxon Label", "Pre: Age", "Pre: Sex", "Pre: Size Class ", "What element is this?"],
        columns="Side",
        aggfunc="size",
        fill_value=0,
    )
    .rename_axis(columns=None)
    .reset_index()
)
pivot_df["TransectUID"] = pd.to_numeric(pivot_df["TransectUID"], errors="coerce").astype("Int64")
pivot_df.head()
from pathlib import Path
pivot_output_path = Path("../data/export/excel/pivot_df.xlsx")
pivot_output_path.parent.mkdir(parents=True, exist_ok=True)
pivot_df.to_excel(pivot_output_path, index=False)


In [None]:
mni_per_transect = calculate_mni(pivot_df)
mni_per_transect

In [None]:
from pathlib import Path

output_path = Path("../data/export/excel/source_data.xlsx")
if output_path.exists():
    transect_sheet = pd.read_excel(output_path, sheet_name="Transects")
else:
    transect_sheet = pd.DataFrame(columns=["TransectUID"])

if "TransectUID" not in transect_sheet.columns:
    if "UID" in transect_sheet.columns:
        transect_sheet = transect_sheet.rename(columns={"UID": "TransectUID"})
    else:
        transect_sheet["TransectUID"] = pd.NA

transect_sheet["TransectUID"] = pd.to_numeric(transect_sheet["TransectUID"], errors="coerce").astype("Int64")
mni_per_transect["TransectUID"] = pd.to_numeric(mni_per_transect["TransectUID"], errors="coerce").astype("Int64")

transect_sheet = transect_sheet.merge(mni_per_transect, on="TransectUID", how="left")
transect_sheet = transect_sheet.rename(columns={"MNI": "MNI_calc"})

output_path.parent.mkdir(parents=True, exist_ok=True)
if output_path.exists():
    with pd.ExcelWriter(output_path, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
        transect_sheet.to_excel(writer, sheet_name="Transects", index=False)
else:
    with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
        transect_sheet.to_excel(writer, sheet_name="Transects", index=False)
