In [39]:
import pandas as pd
import polars as pl

def summarize_failure_modes(file_path: str, elements_to_sumarize: list[str], use_polars=False):
    """
    Summarizes failure modes from a CSV file using either Polars or Pandas.
    
    Args:
        file_path: Path to the CSV file
        elements_to_sumarize: List of column names to summarize
        use_polars: Whether to use Polars (True) or Pandas (False)
        
    Returns:
        A list of tuples where each tuple contains (column_name, count_as_string)
    """
    result = []
    
    if use_polars:
        df = pl.read_csv(file_path)
        for element in elements_to_sumarize:
            count = df.filter(pl.col(element) == 1).select(pl.len()).item()
            result.append((element, str(count)))
    else:
        df = pd.read_csv(file_path)
        for element in elements_to_sumarize:
            count = df[df[element] == 1].shape[0]
            result.append((element, str(count)))
    
    return result

In [None]:
# Failure Mode Taxonomy for Synthetic Queries
file = '../homeworks/my_hw2/synthetic_queries_for_analysis_es.csv'

In [None]:
# Failure Mode Taxonomy using Pandas
sumary_pd = summarize_failure_modes(file, ['Localization_Errors', 'Incorrect_Assumption_Number_People'], use_polars=False)
[print(f'{sumary[0]}:{sumary[1]}') for sumary in sumary_pd];

In [None]:
# Failure Mode Taxonomy using Polars
sumary_pl = summarize_failure_modes(file, ['Localization_Errors', 'Incorrect_Assumption_Number_People'], use_polars=True)
[print(f'{sumary[0]}:{sumary[1]}') for sumary in sumary_pl];

Localization_Errors:10
Incorrect_Assumption_Number_People:4
Localization_Errors:10
Incorrect_Assumption_Number_People:4
