In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('laptop_price - dataset.csv')

In [3]:
df.head()

Unnamed: 0,Company,Product,TypeName,Inches,ScreenResolution,CPU_Company,CPU_Type,CPU_Frequency (GHz),RAM (GB),Memory,GPU_Company,GPU_Type,OpSys,Weight (kg),Price (Euro)
0,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel,Core i5,2.3,8,128GB SSD,Intel,Iris Plus Graphics 640,macOS,1.37,1339.69
1,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel,Core i5,1.8,8,128GB Flash Storage,Intel,HD Graphics 6000,macOS,1.34,898.94
2,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel,Core i5 7200U,2.5,8,256GB SSD,Intel,HD Graphics 620,No OS,1.86,575.0
3,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel,Core i7,2.7,16,512GB SSD,AMD,Radeon Pro 455,macOS,1.83,2537.45
4,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel,Core i5,3.1,8,256GB SSD,Intel,Iris Plus Graphics 650,macOS,1.37,1803.6


In [4]:
import pandas as pd
import numpy as np

def apply_aggregation(df, col, agg_func):
    """
    Applies the given aggregation function (as a string, e.g., 'sum', 'mean', etc.)
    to the column 'col' of the DataFrame 'df'.
    """
    return df[col].agg(agg_func)

def totals_subtotals(df, list_index, list_values, list_columns, list_subtotals, aggregation, totals_columns=False, totals_rows=False):
    """
    Create a pivot table with optional grand totals and subtotals.

    This function creates a pivot table from the input DataFrame, allowing you to specify
    which columns to use as index, values (to be aggregated), and columns. In addition, it
    supports adding grand totals (either as an extra row or extra column) and subtotals
    based on a specified grouping column.

    Parameters:
    -----------
    df : DataFrame
         The input DataFrame.

    list_index : list
         A list of column names to use as the pivot table index (rows).

    list_values : list
         A list of column names to aggregate. These columns are expected to be numeric 
         (float data type). The function will attempt to convert them to float.

    list_columns : list
         A list of column names to use as the pivot table columns.

    list_subtotals : list
         A list with a single element specifying the column on which to compute subtotals.
         **Important:** The specified column must be present in `list_index`.

    aggregation : str
         The aggregation function to use. Only one method is allowed. 
         Valid options are: 'sum', 'min', 'max', 'mean', or 'median'.

    totals_columns : bool, default False
         If True, a grand total column is added to the pivot table.

    totals_rows : bool, default False
         If True, a grand total row is added to the pivot table. When True, the pivot table
         is auto-sorted descending by the last column (excluding the grand total row).

    Returns:
    --------
    df_with_subtotals : DataFrame
         A DataFrame containing the pivot table with the requested totals and subtotals.

    Variations:
    -----------
    - If `totals_rows` is True and `totals_columns` is True, a grand total is computed for both 
      rows and columns.
    - If `totals_rows` is False but `totals_columns` is True, only a grand total column is appended.
    - If `list_subtotals` is provided (it must be a list of length 1 with a column present in 
      `list_index`), subtotals are computed by grouping the data on that column.
      
    Notes:
    ------
    - The function uses `pd.set_option('future.no_silent_downcasting', True)` to opt into future 
      behavior for downcasting on fill operations.
    - If conversion of columns in `list_values` to float fails, an Exception is raised.
    """
    pd.set_option('future.no_silent_downcasting', True)

    aggregation_dict = {
        'sum': 'sum',
        'min': 'min',
        'max': 'max',
        'mean': 'mean',
        'median': 'median'
    }
    # Set the aggregation options
    known_aggregations = ['sum', 'min', 'max', 'mean', 'median']

    list_num = list_values.copy()
    if ((list_columns != np.nan) or (list_columns != '')) and (type(list_columns) == list):

        if ((list_values != np.nan) or (list_values != '')) and (type(list_values) == list):

            if ((list_index != np.nan) or (list_index != '')) and (type(list_index) == list):

                if aggregation in known_aggregations:
                    df1 = df.copy()
                    try:
                        # generate a dict for float conversion
                        dict_num = {}
                        for i in list_num:
                            dict_num[i] = 'float'
                        # Set the float data type for the columns 
                        df1 = df1.astype(dict_num)
                    except Exception as e:
                        raise Exception('The list you provided cannot be converted to a float data type') from e

                    # Set the total for the rows
                    if totals_rows == True:
                        # Pivot table with margins (grand total)
                        pivot_df = pd.pivot_table(
                            df1,
                            values=list_values,
                            columns=list_columns,
                            index=list_index,
                            aggfunc=aggregation,
                            margins=True,
                            margins_name='Total'
                        )
                    elif totals_rows == False:
                        # Pivot table without margins
                        pivot_df = pd.pivot_table(
                            df1,
                            values=list_values,
                            columns=list_columns,
                            index=list_index,
                            aggfunc=aggregation
                        )
                    else:
                        raise Exception('totals_rows must be True or False')

                    # Reset the index columns
                    pivot_df = pivot_df.reset_index()

                    # Set the total for the columns
                    if totals_rows == True:
                        if totals_columns == True:
                            pivot_df = pivot_df.iloc[:-1, :]
                            # Initialize an empty DataFrame to hold grand totals
                            grand_totals = pd.DataFrame()

                            string_col = 0

                            # Loop through each column to compute grand totals based on data type
                            for col in pivot_df.columns:
                                if pd.api.types.is_numeric_dtype(pivot_df[col]):
                                    grand_totals[col] = [apply_aggregation(pivot_df, col, aggregation_dict[aggregation])]
                                elif pd.api.types.is_string_dtype(pivot_df[col]):
                                    # For string columns, mark the first one as 'Total' and leave others blank
                                    if string_col == 0:
                                        grand_totals[col] = ['Total']
                                        string_col += 1
                                    else:
                                        grand_totals[col] = ['']
                            # Append grand totals as a new row in the DataFrame
                            pivot_df = pd.concat([pivot_df, grand_totals], ignore_index=True)

                            # Sort the values by the last column (excluding the grand total row)
                            last_column = pivot_df.columns[-1]
                            pivot_df_sorted = pivot_df.iloc[:-1, :].sort_values(by=last_column, ascending=False)
                            pivot_df_to_sort = pivot_df.iloc[-1:, :]
                            pivot_df_sorted = pd.concat([pivot_df_sorted, pivot_df_to_sort], ignore_index=True)
                        else:
                            # Sort the values by the last column
                            pivot_df = pivot_df.iloc[:-1, :]
                            last_column = pivot_df.columns[-1]
                            pivot_df_sorted = pivot_df.sort_values(by=last_column, ascending=False)

                    elif totals_rows == False:
                        if totals_columns == True:
                            # Initialize an empty DataFrame to hold grand totals
                            grand_totals = pd.DataFrame()

                            string_col = 0

                            # Loop through each column to compute grand totals
                            for col in pivot_df.columns:
                                if pd.api.types.is_numeric_dtype(pivot_df[col]):
                                    grand_totals[col] = [apply_aggregation(pivot_df, col, aggregation_dict[aggregation])]
                                elif pd.api.types.is_string_dtype(pivot_df[col]):
                                    if string_col == 0:
                                        grand_totals[col] = ['Total']
                                        string_col += 1
                                    else:
                                        grand_totals[col] = ['']
                            # Append grand totals as a new row in the DataFrame
                            pivot_df = pd.concat([pivot_df, grand_totals], ignore_index=True)
                            pivot_df_sorted = pivot_df.copy()
                        else:
                            pivot_df_sorted = pivot_df.copy()
                    else:
                        raise Exception('totals_columns must be True or False')

                else:
                    raise Exception('Set an appropriate aggregation function. Check the doc.')

                # Subtotals
                # Generate subtotals based on the specified column in list_subtotals
                if ((list_subtotals != np.nan) or (list_subtotals != '')) and (type(list_subtotals) == list) and (len(list_subtotals) == 1) and (list_subtotals[0] in list_index):
                    if totals_columns == False:
                        try:
                            all_rows = []
                            for i in list_subtotals:
                                # Ensure the column is of string type
                                pivot_df_sorted = pivot_df_sorted.astype({i: str})

                                # Capture the original column order
                                original_columns_order = pivot_df_sorted.columns.tolist()

                                pivot_df_sorted = pivot_df_sorted.sort_index(axis=1)
                                # Group by the subtotal column
                                grouped_combined = pivot_df_sorted.groupby(i)
                                subtotals_combined = grouped_combined.agg(aggregation_dict[aggregation], numeric_only=True)
                                subtotals_combined = subtotals_combined.reset_index()

                                # Loop through each group and add the subtotal row after the group
                                for group_name, group_df in grouped_combined:
                                    for _, row in group_df.iterrows():
                                        all_rows.append(row.to_dict())
                                    subtotal_data = {}
                                    for col in pivot_df_sorted.columns:
                                        if (col == i) or (i in col):
                                            subtotal_data[col] = f"Subtotal {group_name}"
                                        elif pd.api.types.is_numeric_dtype(pivot_df_sorted[col]):
                                            subtotal_data[col] = subtotals_combined.loc[subtotals_combined[i] == group_name, col].iloc[0]
                                        else:
                                            subtotal_data[col] = ''
                                    all_rows.append(subtotal_data)

                            # Combine everything together
                            df_with_subtotals = pd.DataFrame(all_rows)

                        except Exception as e:
                            raise Exception('Provide a valid column containing string or categorical values') from e
                    elif totals_columns == True:
                        try:
                            pivot_df_sorted_last = pivot_df_sorted.iloc[-1]
                            pivot_df_sorted = pivot_df_sorted.iloc[:-1]

                            all_rows = []
                            for i in list_subtotals:
                                pivot_df_sorted = pivot_df_sorted.astype({i: str})

                                # Capture the original column order
                                original_columns_order = pivot_df_sorted.columns.tolist()

                                pivot_df_sorted = pivot_df_sorted.sort_index(axis=1)
                                grouped_combined = pivot_df_sorted.groupby(i)
                                subtotals_combined = grouped_combined.agg(aggregation_dict[aggregation], numeric_only=True)
                                subtotals_combined = subtotals_combined.reset_index()

                                for group_name, group_df in grouped_combined:
                                    for _, row in group_df.iterrows():
                                        all_rows.append(row.to_dict())
                                    subtotal_data = {}
                                    for col in pivot_df_sorted.columns:
                                        if (col == i) or (i in col):
                                            subtotal_data[col] = f"Subtotal {group_name}"
                                        elif pd.api.types.is_numeric_dtype(pivot_df_sorted[col]):
                                            subtotal_data[col] = subtotals_combined.loc[subtotals_combined[i] == group_name, col].iloc[0]
                                        else:
                                            subtotal_data[col] = ''
                                    all_rows.append(subtotal_data)

                            df_with_subtotals = pd.DataFrame(all_rows)
                            # Append the last (grand total) row back
                            df_with_subtotals = pd.concat([df_with_subtotals, pd.DataFrame(pivot_df_sorted_last).T], ignore_index=True)
                        except Exception as e:
                            raise Exception("An error occurred while calculating subtotals with totals_columns=True") from e
                    else:
                        df_with_subtotals = pivot_df_sorted.copy()
                else:
                    raise Exception('Provide a valid subtotal (a list of length 1 with a column present in list_index)')

                # Reset the column's index to a MultiIndex and re-order according to original_columns_order
                df_with_subtotals.columns = pd.MultiIndex.from_tuples(df_with_subtotals.columns)
                df_with_subtotals = df_with_subtotals[original_columns_order]
                df_with_subtotals.loc[:, list_values] = (df_with_subtotals.loc[:, list_values].fillna(0).infer_objects(copy=False).round(decimals=2))

            else:
                raise Exception('Provide a list for indexing')
        else:
            raise Exception('Provide a list for values')
    else:
        raise Exception('Provide a list for columns')

    return df_with_subtotals


In [5]:
df1 = totals_subtotals(df, 
                       ['Inches', 'Company', 'ScreenResolution'],
                       ['Price (Euro)'],
                       ['TypeName'],
                       ['Company'], 
                       aggregation='mean', 
                       totals_columns=True, 
                       totals_rows=True)

In [11]:
df1

Unnamed: 0_level_0,Inches,Company,ScreenResolution,Price (Euro),Price (Euro),Price (Euro),Price (Euro),Price (Euro),Price (Euro),Price (Euro)
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,2 in 1 Convertible,Gaming,Netbook,Notebook,Ultrabook,Workstation,Total
0,17.3,Acer,IPS Panel Full HD 1920x1080,0.0,2277.67,0.0,754.0,0.0,0.0,1407.0
1,14.0,Acer,IPS Panel Full HD / Touchscreen 1920x1080,1149.0,0.0,0.0,0.0,0.0,0.0,1149.0
2,13.3,Acer,IPS Panel Full HD 1920x1080,0.0,0.0,0.0,0.0,989.0,0.0,989.0
3,13.3,Acer,IPS Panel Full HD / Touchscreen 1920x1080,831.0,0.0,0.0,0.0,0.0,0.0,831.0
4,14.0,Acer,IPS Panel Full HD 1920x1080,0.0,0.0,0.0,719.0,857.67,0.0,788.33
...,...,...,...,...,...,...,...,...,...,...
230,,Subtotal Vero,,0.0,0.0,0.0,217.42,0.0,0.0,217.42
231,15.6,Xiaomi,IPS Panel Full HD 1920x1080,0.0,0.0,0.0,1299.47,0.0,0.0,1299.47
232,13.3,Xiaomi,IPS Panel Full HD 1920x1080,0.0,0.0,0.0,0.0,967.45,0.0,967.45
233,,Subtotal Xiaomi,,0.0,0.0,0.0,1299.47,967.45,0.0,1133.46
