In [None]:

import pandas as pd
from tqdm import tqdm

In [11]:
#Get a list of all species names
species_list = ['alligator_mississippiensis', 'anolis_carolinensis', 'anopheles_gambiae', 'apis_mellifera', 'arabidopsis_thaliana', 'aspergillus_nidulans', 'bos_taurus', 'brachypodium_distachyon', 'caenorhabditis_elegans', 'canis_lupus_familiaris', 'columba_livia', 'coprinopsis_cinerea', 'cryptococcus_neoformans', 'danio_rerio', 'daphnia_carinata', 'dictyostelium_discoideum', 'drosophila_melanogaster', 'eimeria_maxima', 'entamoeba_histolytica', 'equus_caballus', 'gallus_gallus', 'giardia_intestinalis', 'glycine_max', 'gorilla_gorilla', 'homo_sapiens', 'hordeum_vulgare', 'leishmania_donovani', 'lotus_japonicus', 'manduca_sexta', 'medicago_truncatula', 'mus_musculus', 'neurospora_crassa', 'nicotiana_tabacum', 'oreochromis_niloticus', 'oryctolagus_cuniculus', 'oryza_sativa', 'oryzias_latipes', 'ovis_aries', 'pan_troglodytes', 'phoenix_dactylifera', 'plasmodium_falciparum', 'rattus_norvegicus', 'rhizophagus_irregularis', 'saccharomyces_cerevisiae', 'schizophyllum_commune', 'schizosaccharomyces_pombe', 'selaginella_moellendorffii', 'setaria_viridis', 'solanum_lycopersicum', 'strongylocentrotus_purpuratus', 'sus_scrofa', 'taeniopygia_guttata', 'toxoplasma_gondii', 'tribolium_castaneum', 'trichoplax_adhaerens', 'triticum_aestivum', 'trypanosoma_brucei', 'ustilago_maydis', 'xenopus_laevis', 'zea_mays']

In [12]:
protozoa_list = ['plasmodium_falciparum', 'entamoeba_histolytica', 'dictyostelium_discoideum',
                 'giardia_intestinalis', 'trypanosoma_brucei', 'leishmania_donovani', 
                 'toxoplasma_gondii', 'eimeria_maxima']

fungi_list = ['saccharomyces_cerevisiae', 'ustilago_maydis', 'schizosaccharomyces_pombe',
              'aspergillus_nidulans', 'cryptococcus_neoformans', 'neurospora_crassa', 
              'coprinopsis_cinerea', 'rhizophagus_irregularis', 'schizophyllum_commune']

plants_list = ['oryza_sativa', 'arabidopsis_thaliana', 'selaginella_moellendorffii', "brachypodium_distachyon", 
               "setaria_viridis", "zea_mays", "hordeum_vulgare", 
               "triticum_aestivum", "phoenix_dactylifera", "lotus_japonicus",
               "medicago_truncatula", "nicotiana_tabacum", "glycine_max", "solanum_lycopersicum"]

invertebrates_list = ['trichoplax_adhaerens', 'tribolium_castaneum', 'manduca_sexta', 
                      'apis_mellifera', 'strongylocentrotus_purpuratus', 'daphnia_carinata', 
                      'drosophila_melanogaster', 'anopheles_gambiae', 'caenorhabditis_elegans']

vertebrates_list = ['gallus_gallus', 'alligator_mississippiensis', 'xenopus_laevis',
                    'oreochromis_niloticus', 'homo_sapiens', 'bos_taurus', "mus_musculus", "ovis_aries", "canis_lupus_familiaris", 
                    "equus_caballus", "gorilla_gorilla", "pan_troglodytes",
                    "rattus_norvegicus", "oryctolagus_cuniculus", "sus_scrofa",
                    "danio_rerio", "oryzias_latipes", "taeniopygia_guttata",
                    "columba_livia", "anolis_carolinensis"]

## Load in processed predictions

In [8]:
netstart_overall_df_full = pd.read_csv('../../data/data_evaluation/preds_processed/transcripts/netstart_overall_df_transcripts.csv.gz', compression = "gzip")
netstart_group_specific_df_full = pd.read_csv('../../data/data_evaluation/preds_processed/transcripts/netstart_group_specific_df_transcripts.csv.gz', compression = "gzip")

esm2_finetuned_ablation_df_full = pd.read_csv('../../data/data_evaluation/preds_processed/transcripts/esm2_finetuned_ablation_df_transcripts.csv.gz', compression = "gzip")
netstart1_ablation_df_full = pd.read_csv('../../data/data_evaluation/preds_processed/transcripts/netstart1_ablation_df_transcripts.csv.gz', compression = "gzip")

augustus_df_full = pd.read_csv('../../data/data_evaluation/preds_processed/transcripts/augustus_df.csv.gz', compression = "gzip")
augustus_no_softmask_df_full = pd.read_csv('../../data/data_evaluation/preds_processed/transcripts/augustus_no_softmask_df.csv.gz', compression = "gzip")

tiberius_df_full = pd.read_csv('../../data/data_evaluation/preds_processed/transcripts/tiberius_df.csv.gz', compression = "gzip")
tiberius_no_softmask_df_full = pd.read_csv('../../data/data_evaluation/preds_processed/transcripts/tiberius_no_softmask_df.csv.gz', compression = "gzip")

TIS_transformer_df_full = pd.read_csv('../../data/data_evaluation/preds_processed/transcripts/TIS_transformer_df_transcripts.csv.gz', compression = "gzip")

In [28]:
assert netstart_overall_df_full.shape[0] == netstart_group_specific_df_full.shape[0] == TIS_transformer_df_full.shape[0] == esm2_finetuned_ablation_df_full.shape[0]

## Extract transcript-level accuracies per species and per group

In [50]:
def get_species_accuracies(TIS_transformer_df_full, 
                           netstart_overall_df_full, 
                           netstart_group_specific_df_full, 
                           esm2_finetuned_ablation_df_full,
                           netstart1_ablation_df_full,
                           augustus_df_full,
                           tiberius_df_full
                           ):
    def calculate_accuracy(df):
        if df.empty:
            return None
        return float((df['label'] == 1.0).sum() / len(df))

    def process_dataframe(df, model_name):
        results = {}
        for species in tqdm(df['species'].unique(), desc=f"Processing {model_name}"):
            species_df = df[df['species'] == species]
            
            # Get max predictions for each sequence number
            filtered = species_df.loc[species_df.groupby('seq_number')['preds'].idxmax()]
            
            # Split by source
            refseq_mask = filtered['annotation_source'].isin(["RefSeq", "BestRefSeq"])
            refseq_df = filtered[refseq_mask]
            gnomon_df = filtered[~refseq_mask]
            
            results[species] = {
                model_name: calculate_accuracy(filtered),
                f"{model_name}_refseq": calculate_accuracy(refseq_df),
                f"{model_name}_gnomon": calculate_accuracy(gnomon_df)
            }
        return results
    
    def process_dataframe_gene_finders(df, model_name):
        results = {}
        for species in tqdm(df['species'].unique(), desc=f"Processing {model_name}"):
            species_df = df[(df['species'] == species) & (df['seq_type'] == "TIS")]

            # Split by source
            refseq_mask = species_df['annotation_source'] != "Gnomon"
            refseq_df = species_df[refseq_mask]
            gnomon_df = species_df[~refseq_mask]
            
            #Get species-specific accuracy
            results[species] = {
                model_name: species_df["preds"].sum()/species_df.shape[0],
                f"{model_name}_refseq": refseq_df["preds"].sum()/refseq_df.shape[0],
                f"{model_name}_gnomon": gnomon_df["preds"].sum()/gnomon_df.shape[0]
            }

        return results

    # Process each model's data
    tiberius_results = process_dataframe_gene_finders(tiberius_df_full, "Tiberius (Softmasked)")
    tt_results = process_dataframe(TIS_transformer_df_full, "TIS Transformer")
    netstart_overall_results = process_dataframe(netstart_overall_df_full, "NetStart 2.0")
    netstart_group_results = process_dataframe(netstart_group_specific_df_full, "NetStart 2.0 (Group-specific)")
    esm2_finetuned_ablation_results = process_dataframe(esm2_finetuned_ablation_df_full, "ESM2 Finetuned Ablation")
    netstart1_ablation_results = process_dataframe(netstart1_ablation_df_full, "NetStart 1.0 Ablation")
    augustus_results = process_dataframe_gene_finders(augustus_df_full, "AUGUSTUS (Softmasked)")
    
    
    # Merge results
    accuracies = {}
    for species in species_list:
        accuracies[species] = {
            **augustus_results.get(species, {}),
            **tiberius_results.get(species, {}),
            **tt_results.get(species, {}),
            **netstart1_ablation_results.get(species, {}),
            **esm2_finetuned_ablation_results.get(species, {}),
            **netstart_overall_results.get(species, {}),
            **netstart_group_results.get(species, {})
        }
    
    return pd.DataFrame(accuracies)

accuracies_species = get_species_accuracies(TIS_transformer_df_full, 
                                            netstart_overall_df_full, 
                                            netstart_group_specific_df_full,
                                            esm2_finetuned_ablation_df_full,
                                            netstart1_ablation_df_full,
                                            augustus_df_full,
                                            tiberius_df_full)

Processing TIS Transformer: 100%|██████████| 1/1 [00:01<00:00,  1.13s/it]
Processing NetStart 2.0: 100%|██████████| 1/1 [00:01<00:00,  1.61s/it]


In [30]:
assert TIS_transformer_df_full[TIS_transformer_df_full["TSS_annotated"] == True].shape[0] == netstart_overall_df_full[netstart_overall_df_full["TSS_annotated"] == True].shape[0] == netstart_group_specific_df_full[netstart_group_specific_df_full["TSS_annotated"] == True].shape[0] == esm2_finetuned_ablation_df_full[esm2_finetuned_ablation_df_full["TSS_annotated"] == True].shape[0] == netstart1_ablation_df_full[netstart1_ablation_df_full["TSS_annotated"] == True].shape[0]
assert TIS_transformer_df_full[TIS_transformer_df_full["label"] == 1.0].shape[0] == netstart_overall_df_full[netstart_overall_df_full["label"] == 1.0].shape[0] == netstart_group_specific_df_full[netstart_group_specific_df_full["label"] == 1.0].shape[0] == esm2_finetuned_ablation_df_full[esm2_finetuned_ablation_df_full["label"] == 1.0].shape[0] == augustus_df_full[augustus_df_full["label"] == 1.0].shape[0] == netstart1_ablation_df_full[netstart1_ablation_df_full["label"] == 1.0].shape[0]

In [14]:
TIS_transformer_df_full_TSS = TIS_transformer_df_full[TIS_transformer_df_full["TSS_annotated"] == True]
netstart_overall_df_full_TSS = netstart_overall_df_full[netstart_overall_df_full["TSS_annotated"] == True]
netstart_group_specific_df_full_TSS = netstart_group_specific_df_full[netstart_group_specific_df_full["TSS_annotated"] == True]
esm2_finetuned_ablation_df_full_TSS = esm2_finetuned_ablation_df_full[esm2_finetuned_ablation_df_full["TSS_annotated"] == True]
netstart1_ablation_df_full_TSS = netstart1_ablation_df_full[netstart1_ablation_df_full["TSS_annotated"] == True]
augustus_df_full_TSS = augustus_df_full[augustus_df_full["TSS_annotated"] == True]
tiberius_df_full_TSS = tiberius_df_full[tiberius_df_full["TSS_annotated"].isin(["True", "True\n"])]

assert augustus_df_full_TSS[augustus_df_full_TSS["label"] == 1.0].shape[0] == TIS_transformer_df_full_TSS[TIS_transformer_df_full_TSS["label"] == 1.0].shape[0]


accuracies_species_TSS_true = get_species_accuracies(TIS_transformer_df_full_TSS, 
                                            netstart_overall_df_full_TSS, 
                                            netstart_group_specific_df_full_TSS,
                                            esm2_finetuned_ablation_df_full_TSS,
                                            netstart1_ablation_df_full_TSS,
                                            augustus_df_full_TSS,
                                            tiberius_df_full_TSS)

  f"{model_name}_gnomon": gnomon_df["preds"].sum()/gnomon_df.shape[0]
Processing Tiberius (Softmasked): 100%|██████████| 5/5 [00:00<00:00, 206.11it/s]
Processing TIS Transformer: 100%|██████████| 52/52 [00:28<00:00,  1.85it/s]
Processing NetStart 2.0: 0it [00:00, ?it/s]
Processing NetStart 2.0 (Group-specific): 100%|██████████| 52/52 [00:28<00:00,  1.80it/s]
Processing ESM2 Finetuned Ablation: 100%|██████████| 52/52 [00:27<00:00,  1.86it/s]
Processing NetStart 1.0 Ablation: 100%|██████████| 52/52 [00:28<00:00,  1.83it/s]
  f"{model_name}_gnomon": gnomon_df["preds"].sum()/gnomon_df.shape[0]
  f"{model_name}_refseq": refseq_df["preds"].sum()/refseq_df.shape[0],
Processing AUGUSTUS (Softmasked): 100%|██████████| 52/52 [00:00<00:00, 61.61it/s]


In [23]:
def generate_accuracy_table_species(accuracies_dict, TSS):
    # List of species you want to include in the table
    species_list_tib = ["homo_sapiens", "drosophila_melanogaster", "arabidopsis_thaliana", "cryptococcus_neoformans", "toxoplasma_gondii"]

    # Extract the models (keys of the first species' accuracy data), excluding _refseq and _gnomon variants
    models = [model for model in accuracies_dict["homo_sapiens"].index if "_refseq" not in model and "_gnomon" not in model]

    # Initialize the LaTeX table string
    latex_table = "\\begin{tabular}{l" + "r" * len(species_list) + "}\n\\toprule\n"

    # Add the header row (species names as column names)
    latex_table += " & " + " & ".join(species_list_tib) + " \\\\\n\\midrule\n"

    ##Get transcript counts
    row = ["Transcript Count"]
    for species in species_list_tib:
        if TSS:
            transcript_count = TIS_transformer_df_full_TSS[(TIS_transformer_df_full_TSS["label"] == 1.0) & (TIS_transformer_df_full_TSS["species"] == species)].shape[0] 
            transcript_count_refseq = TIS_transformer_df_full_TSS[(TIS_transformer_df_full_TSS["label"] == 1.0) & (TIS_transformer_df_full_TSS["species"] == species) & (TIS_transformer_df_full["annotation_source"].isin(["RefSeq", "BestRefSeq"]))].shape[0] 
        else:
            transcript_count = TIS_transformer_df_full[(TIS_transformer_df_full["label"] == 1.0) & (TIS_transformer_df_full["species"] == species)].shape[0] 
            transcript_count_refseq = TIS_transformer_df_full[(TIS_transformer_df_full["label"] == 1.0) & (TIS_transformer_df_full["species"] == species) & (TIS_transformer_df_full["annotation_source"].isin(["RefSeq", "BestRefSeq"]))].shape[0] 

        transcript_counts = f"{transcript_count} ({transcript_count_refseq})"

        row.append(transcript_counts)

    latex_table += " & ".join(row) + " \\\\\n"

    ##Get accuracies
    # Iterate over each model and generate the LaTeX rows
    for model in models:
        row = [model]  # Start with the model name
        
        # Iterate over each species and add their accuracies
        for species in species_list_tib:
            # Check for the "overall" (non-refseq) and "refseq" versions
            if species in accuracies_dict:
                overall_accuracy = round(accuracies_dict[species].get(model, None) * 100, 2)
                refseq_accuracy = round(accuracies_dict[species].get(f"{model}_refseq", None) * 100, 2)
                
                # Handle missing or NaN values
                if overall_accuracy is None:
                    overall_accuracy_str = "N/A"
                else:
                    overall_accuracy_str = f"{overall_accuracy:.2f}"
                
                if refseq_accuracy is None:
                    refseq_accuracy_str = ""
                else:
                    refseq_accuracy_str = f"({refseq_accuracy:.2f})"
                
                # Combine overall and refseq accuracies into the same field
                combined_accuracy = overall_accuracy_str + (" " + refseq_accuracy_str if refseq_accuracy_str else "")
                row.append(combined_accuracy)
            else:
                row.append("N/A")
        
        # Add the row to the LaTeX table
        latex_table += " & ".join(row) + " \\\\\n"

    # End the LaTeX table
    latex_table += "\\bottomrule\n\\end{tabular}"

    # Print the LaTeX table
    print(latex_table)

In [24]:
generate_accuracy_table_species(accuracies_species_TSS_true, True)

  transcript_count_refseq = TIS_transformer_df_full_TSS[(TIS_transformer_df_full_TSS["label"] == 1.0) & (TIS_transformer_df_full_TSS["species"] == species) & (TIS_transformer_df_full["annotation_source"].isin(["RefSeq", "BestRefSeq"]))].shape[0]


\begin{tabular}{lrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr}
\toprule
 & homo_sapiens \\
\midrule
Transcript Count & 9542 (5280) \\
AUGUSTUS (Softmasked) & 60.31 (63.66) \\
TIS Transformer & 89.39 (86.86) \\
NetStart 1.0 Ablation & 43.75 (49.47) \\
ESM2 Finetuned Ablation & 91.25 (90.57) \\
NetStart 2.0 (Group-specific) & 92.17 (91.44) \\
Tiberius (Softmasked) & 72.55 (74.49) \\
\bottomrule
\end{tabular}


In [17]:
def calculate_accuracies(filtered_df):
    """Calculate true and total predictions for a filtered DataFrame."""
    true_preds = filtered_df[filtered_df["label"] == 1.0].shape[0]
    total_preds = filtered_df.shape[0]
    return true_preds, total_preds

def get_species_accuracies(preds_df, species):
    """Calculate accuracies for a given species."""
    df_species = preds_df[preds_df["species"] == species]
    df_refseq = df_species[df_species["annotation_source"].isin(["RefSeq", "BestRefSeq"])]
    df_gnomon = df_species[df_species["annotation_source"] == "Gnomon"]

    # Get max predictions per seq_number
    filtered_refseq = df_refseq.loc[df_refseq.groupby('seq_number')['preds'].idxmax()]
    filtered_gnomon = df_gnomon.loc[df_gnomon.groupby('seq_number')['preds'].idxmax()]
    filtered_overall = df_species.loc[df_species.groupby('seq_number')['preds'].idxmax()]

    # Calculate true and total predictions
    true_refseq, total_refseq = calculate_accuracies(filtered_refseq)
    true_gnomon, total_gnomon = calculate_accuracies(filtered_gnomon)
    true_overall, total_overall = calculate_accuracies(filtered_overall)

    return {
        "RefSeq": (true_refseq, total_refseq),
        "Gnomon": (true_gnomon, total_gnomon),
        "Overall": (true_overall, total_overall)
    }

def get_species_accuracies_gene_finder(preds_df, species):
    """Calculate accuracies for a given species."""
    #Sort to only get TIS-labelled cases
    df_species = preds_df[(preds_df["species"] == species) & (preds_df["seq_type"] == "TIS")]
    df_refseq = df_species[df_species["annotation_source"].isin(["RefSeq", "BestRefSeq"])]
    df_gnomon = df_species[df_species["annotation_source"] == "Gnomon"]

    # Calculate true and total predictions
    true_refseq, total_refseq = df_refseq["preds"].sum(), df_refseq.shape[0]
    true_gnomon, total_gnomon = df_gnomon["preds"].sum(), df_gnomon.shape[0]
    true_overall, total_overall = df_species["preds"].sum(), df_species.shape[0]

    return {
        "RefSeq": (true_refseq, total_refseq),
        "Gnomon": (true_gnomon, total_gnomon),
        "Overall": (true_overall, total_overall)
    }

def get_group_accuracies(preds_df, species_group_list, gene_finder):
    """Aggregate accuracies across a group of species."""
    group_accuracies = {"RefSeq": (0, 0), "Gnomon": (0, 0), "Overall": (0, 0)}

    for species in species_group_list:
        if gene_finder:
            species_accuracies = get_species_accuracies_gene_finder(preds_df, species)
        else:
            species_accuracies = get_species_accuracies(preds_df, species)
        for key in group_accuracies:
            group_accuracies[key] = (
                group_accuracies[key][0] + species_accuracies[key][0],
                group_accuracies[key][1] + species_accuracies[key][1]
            )

    # Calculate final accuracies and keep counts
    final_results = {
        key: {
            "accuracy": round(group_accuracies[key][0] / group_accuracies[key][1] * 100, 3) if group_accuracies[key][1] != 0 else None,
            "true_count": int(group_accuracies[key][0]),
            "total_count": int(group_accuracies[key][1])
        }
        for key in group_accuracies
    }

    return final_results

def get_group_accuracies_merged(preds_df, species_group_list, group_name, accuracies_dict, gene_finder):
    """Add group accuracies to the given dictionary."""
    accuracies_dict[group_name] = get_group_accuracies(preds_df, species_group_list, gene_finder)
    return accuracies_dict

def get_group_accuracies_final(preds_df, gene_finder):
    accuracies_dict = {}
    accuracies_dict = get_group_accuracies_merged(preds_df, vertebrates_list, "Vertebrates", accuracies_dict, gene_finder)
    accuracies_dict = get_group_accuracies_merged(preds_df, invertebrates_list, "Invertebrates", accuracies_dict, gene_finder)
    accuracies_dict = get_group_accuracies_merged(preds_df, plants_list, "Plants", accuracies_dict, gene_finder)
    accuracies_dict = get_group_accuracies_merged(preds_df, fungi_list, "Fungi", accuracies_dict, gene_finder)
    accuracies_dict = get_group_accuracies_merged(preds_df, protozoa_list, "Protozoans", accuracies_dict, gene_finder)

    return accuracies_dict


In [18]:
# Get group preds (only transcripts with an annotated transcription start site)
group_accuracies_dict_tis_transformer = get_group_accuracies_final(TIS_transformer_df_full[TIS_transformer_df_full["TSS_annotated"] == True], gene_finder=False)
group_accuracies_dict_netstart_overall = get_group_accuracies_final(netstart_overall_df_full[netstart_overall_df_full["TSS_annotated"] == True], gene_finder=False)
group_accuracies_dict_netstart_group = get_group_accuracies_final(netstart_group_specific_df_full[netstart_group_specific_df_full["TSS_annotated"] == True], gene_finder=False)
group_accuracies_dict_esm2_finetuned_ablation = get_group_accuracies_final(esm2_finetuned_ablation_df_full[esm2_finetuned_ablation_df_full["TSS_annotated"] == True], gene_finder=False)
group_accuracies_dict_netstart1_ablation = get_group_accuracies_final(netstart1_ablation_df_full[netstart1_ablation_df_full["TSS_annotated"] == True], gene_finder=False)
group_accuracies_dict_augustus = get_group_accuracies_final(augustus_df_full[augustus_df_full["TSS_annotated"] == True], gene_finder=True)
group_accuracies_dict_tiberius = get_group_accuracies_final(tiberius_df_full[tiberius_df_full["TSS_annotated"] == True], gene_finder=True)


In [None]:

# Initialize a dictionary for the new DataFrame structure
models = ['AUGUSTUS', 'TIS Transformer', 'ESM-2 Finetuned', 'NetStart 1.0 ablation', 'NetStart 2.0', 'NetStart 2.0 (group-specific checkpoints)']
restructured_data = {
    "Model": models,
}

# Add systematic groups as columns
systematic_groups = group_accuracies_dict_tis_transformer.keys()
for group in systematic_groups:
    restructured_data[group] = []

# Populate the dictionary with formatted accuracy values
for model, data_dict in zip(models, [group_accuracies_dict_augustus,
                                     group_accuracies_dict_tis_transformer,
                                     group_accuracies_dict_esm2_finetuned_ablation,
                                     group_accuracies_dict_netstart1_ablation,
                                     group_accuracies_dict_netstart_overall, 
                                     group_accuracies_dict_netstart_group]):
    for group in systematic_groups:
        overall_accuracy = data_dict[group]['Overall']['accuracy']
        refseq_accuracy = data_dict[group]['RefSeq']['accuracy']
        # Format the field with "Overall" and "RefSeq" accuracies
        formatted_entry = f"{overall_accuracy:.2f} ({refseq_accuracy:.2f})"
        restructured_data[group].append(formatted_entry)

# Create the new DataFrame
df_restructured = pd.DataFrame(restructured_data)
df_restructured.set_index("Model", inplace=True)

# Populate the Total Count row with "Overall" and "RefSeq" total counts
total_count_adjusted = {group: f"{group_accuracies_dict_tis_transformer[group]['Overall']['total_count']} ({group_accuracies_dict_tis_transformer[group]['RefSeq']['total_count']})"
                        for group in systematic_groups}

# Create the adjusted Total Count DataFrame
total_count_df = pd.DataFrame(total_count_adjusted, index=["Total Count"])

# Combine the accuracy DataFrame with total counts
df_final = pd.concat([df_restructured, total_count_df])

In [None]:
latex_table = df_final.to_latex(float_format="%.2f")
print(latex_table)