In [38]:
import pandas
import re

In [39]:
# ======================================================================
# Latex formatting utility functions
# ======================================================================
def bold_text(text):
  return "\\textbf{" + text + "}"

def capitalize(text):
  return text[0].upper() + text[1:]

def capitalize_sentence(text, sep="_"):
  return " ".join(map(capitalize, text.split(sep)))

def model_name(model):
  splt = model.split("_")[:2]
  temp = re.sub(r"[0-9]", "", " ".join(splt))
  return capitalize_sentence(temp)

def compti(aggregate, single):
  return round(aggregate, 2) <= round(single, 2)

def paper_format(name, file_path, algorithm, filter):
  metal_name = name.split("_")[0]
  metal_name = capitalize(metal_name)

  df = pandas.read_excel(file_path)
  base_results = ""
  edcr_results = ""

  for index, row in df.iterrows():
    if row['Algorithm'] != algorithm: continue
    if 'Exclude' in df.columns and str(row['Exclude']) != 'nan': 
      continue

    comp_df = df[df['Algorithm'] == algorithm]
    comp_df = comp_df[comp_df['Exclude'].isnull()]
    
    base_model = model_name(row['Model'])
    base_precision = row['Base Precision']
    base_recall = row['Base Recall']
    base_f1 = row['Base F1']
    label = row['Label']

    edcr_precision = row['Precision (EDCR)']
    edcr_recall = row['Recall (EDCR)']
    edcr_f1 = row['F1 (EDCR)']

    base_is_best_precision = compti(max(comp_df['Base Precision'].max(), comp_df['Precision (EDCR)'].max()), base_precision)
    base_is_best_recall = compti(max(comp_df['Base Recall'].max(), comp_df['Recall (EDCR)'].max()), base_recall)
    base_is_best_f1 = compti(max(comp_df['Base F1'].max(), comp_df['F1 (EDCR)'].max()), base_f1)

    edcr_is_best_precision = compti(max(comp_df['Base Precision'].max(), comp_df['Precision (EDCR)'].max()), edcr_precision)
    edcr_is_best_recall = compti(max(comp_df['Base Recall'].max(), comp_df['Recall (EDCR)'].max()), edcr_recall)
    edcr_is_best_f1 = compti(max(comp_df['Base F1'].max(), comp_df['F1 (EDCR)'].max()), edcr_f1)
    
    base_precision_string = f"{base_precision:.2f}"
    base_recall_string = f"{base_recall:.2f}"
    base_f1_string = f"{base_f1:.2f}"
    
    edcr_precision_string = f"{edcr_precision:.2f}"
    edcr_recall_string = f"{edcr_recall:.2f}"
    edcr_f1_string = f"{edcr_f1:.2f}"

    # Bold the best models
    if base_is_best_precision: base_precision_string = bold_text(base_precision_string)
    if base_is_best_recall: base_recall_string = bold_text(base_recall_string)
    if base_is_best_f1: base_f1_string = bold_text(base_f1_string)

    if edcr_is_best_precision: edcr_precision_string = bold_text(edcr_precision_string)
    if edcr_is_best_recall: edcr_recall_string = bold_text(edcr_recall_string)
    if edcr_is_best_f1: edcr_f1_string = bold_text(edcr_f1_string)

    precision_improvement = f"{'+' if edcr_precision > base_precision else '' }{round((edcr_precision - base_precision) / base_precision * 100, 2)}\\%"
    recall_improvement = f"{'+' if edcr_recall > base_recall else ''}{round((edcr_recall - base_recall) / base_recall * 100, 2)}\\%"
    f1_improvement = f"{'+' if edcr_f1 > base_f1 else '' }{round((edcr_f1 - base_f1) / base_f1 * 100, 2)}\\%"

    base_results += f"{base_model} & {base_precision_string} & {base_recall_string} & {base_f1_string}\\\\\n"
    edcr_results += f"{base_model} (EDCR) & {edcr_precision_string} ({precision_improvement}) & {edcr_recall_string} ({recall_improvement}) & {edcr_f1_string} ({f1_improvement})\\\\\n"
  
  capitalized_algorithm = capitalize_sentence(algorithm, sep="_")
  caption = f"\\caption{'{'}Model Evaluation Results for {metal_name}. Using the {capitalized_algorithm} algorithm. {filter}{'}'}"
  return ""\
  "\\begin{table}\n" \
  "\\centering\n" \
  "\\begin{tabular}{|c|c|c|c|}\n" \
  "\\hline\n" \
  "Model & Precision & Recall & F1\\\\\n" \
  "\\hline\n" \
  + base_results + "\n\\hline\n" + edcr_results + "\n\\hline\n" \
  "\\end{tabular}\n" + caption + "\n"\
  "\\end{table}\n"

In [40]:
import os

write_file = open("results.tex", "w")

detection_correction_file_paths = []

def reverse_str(x):
        return x[::-1]
def detection_correction(file, file_path, algorithm, filter):
  write_file.write(paper_format(name=file, 
                 file_path=file_path, 
                 algorithm=algorithm, 
                 filter=filter
                )
        )

for file in os.listdir("../out/threshold"):
  file_path = os.path.join("../out/threshold", file)
  detection_correction_file_paths.append({"file": file, "path": file_path})
  
for file in os.listdir("../out/top_f1"):
  file_path = os.path.join("../out/top_f1", file)
  detection_correction_file_paths.append({"file": file, "path": file_path})


detection_correction_file_paths = sorted(detection_correction_file_paths,key=lambda x: x['file'])
for file in detection_correction_file_paths:
        filter_text = ""
        if "top_f1" in file['path']: filter_text = "Filtering by Top F1"
        if "threshold" in file['path']: filter_text = "Filtering by Threshold"

        detection_correction(file=file["file"], file_path=file["path"], algorithm="detection_correction", filter=filter_text)
        detection_correction(file=file["file"], file_path=file["path"], algorithm="correction", filter=filter_text)

write_file.close()