In [2]:
import pandas as pd

def calculate_statistics_from_csv(csv_file):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file)

    # Initialize a dictionary to store the statistics
    statistics = {}

    # Define the columns of interest
    columns_of_interest = ['PR', 'SR', 'NR', 'WS', 'SFST', 'PR_Benefit', 'SR_Benefit', 'NR_Benefit', 'WS_Benefit', 'SFST_Benefit']

    # Iterate over each column of interest in the DataFrame (excluding the 'id' column)
    for column in columns_of_interest:
        if column not in df.columns:
            print(f"Warning: Column {column} not found in the CSV file.")
            continue

        # Count the occurrences of 0, 1, and 2 in the column
        counts = df[column].value_counts().to_dict()

        # Ensure all possible values (0, 1, 2) are included in the counts
        statistics[column] = {
            'Lower': counts.get(0, 0),
            'Moderate': counts.get(1, 0),
            'Higher': counts.get(2, 0)
        }

    # Convert the statistics dictionary to a DataFrame for easier viewing
    statistics_df = pd.DataFrame(statistics).transpose()
    statistics_df.columns = ['Lower', 'Moderate', 'Higher']

    # Print the statistics DataFrame
    print(statistics_df)

    return statistics_df

def main():
    # Specify the path to the CSV file
    csv_file = '3_output_class_rem_extra.csv'

    # Calculate and display the statistics
    statistics_df = calculate_statistics_from_csv(csv_file)

if __name__ == '__main__':
    main()


              Lower  Moderate  Higher
PR               12       102      96
SR               95        63      52
NR               65        67      78
WS               85        70      55
SFST             74        48      88
PR_Benefit      109         9      92
SR_Benefit      118        75      17
NR_Benefit       76        89      45
WS_Benefit      144        41      25
SFST_Benefit     80        48      82
