In [None]:
import pandas as pd

# --- Configuration ---
csv_file_path = './val_data.csv'  # <--- IMPORTANT: Replace with the actual path to your CSV file

# List of your label columns
label_columns = ['ac', 'air_panas', 'bau', 'general', 'kebersihan', 'linen', 'service', 'sunrise_meal', 'tv', 'wifi']
# --- End Configuration ---

try:
    # Load the dataset from the CSV file
    df = pd.read_csv(csv_file_path)

    # Verify that all specified label columns exist in the DataFrame
    missing_cols = [col for col in label_columns if col not in df.columns]
    if missing_cols:
        print(f"Error: The following label columns were not found in the CSV: {missing_cols}")
        print(f"Available columns are: {list(df.columns)}")
    else:
        # Select only the label columns (optional, but good practice if there are many other columns)
        # labels_df = df[label_columns] # You can use either df or labels_df below

        # Group by all label columns and count the size of each group
        # This effectively counts rows where the combination of values in label_columns is identical.
        combination_counts = df.groupby(label_columns).size()

        # Convert the result (which is a pandas Series with a MultiIndex)
        # into a DataFrame for easier viewing. The count is named 'Count'.
        combination_counts_df = combination_counts.reset_index(name='Count')

        # Sort the results by count in descending order (most frequent combinations first)
        combination_counts_df = combination_counts_df.sort_values('Count', ascending=False)

        combination_counts_df.to_csv('label_combination_counts_2.csv', index=False)

        # --- Display the results ---
        print(f"Found {len(combination_counts_df)} unique label combinations.")
        print("\nLabel Combination Occurrences (Top 20 shown if more exist):")
        # Use pd.option_context to temporarily display more rows if needed
        with pd.option_context('display.max_rows', 100):
              print(combination_counts_df)



except FileNotFoundError:
    print(f"Error: The file '{csv_file_path}' was not found.")
    print("Please make sure the file path is correct and the file exists.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Found 187 unique label combinations.

Label Combination Occurrences (Top 20 shown if more exist):
       ac air_panas   bau general kebersihan linen service sunrise_meal    tv  \
109  neut      neut  neut    neut        neg  neut    neut         neut  neut   
125  neut      neut  neut    neut       neut  neut     neg         neut  neut   
120  neut      neut  neut    neut       neut   neg    neut         neut  neut   
57   neut       neg  neut    neut       neut  neut    neut         neut  neut   
99   neut      neut  neut    neut        neg   neg    neut         neut  neut   
..    ...       ...   ...     ...        ...   ...     ...          ...   ...   
76   neut      neut   neg    neut        neg  neut     pos         neut  neut   
77   neut      neut   neg    neut       neut   neg     neg         neut  neut   
78   neut      neut   neg    neut       neut   neg    neut         neut  neut   
81   neut      neut   neg    neut       neut  neut    neut          neg  neut   
186   pos  