In [1]:
import pandas as pd
data = {
    'Age':[8,12,15,22,22,30,35,55,40,45,66,44,22],
    'categorical_names' : ['Tiny', 'Whiskers', 'Tiny', 'Shadow', 'Velvet', 'Luna', 'Sage', 'Luna', 'Pepper', 'Sapphire','Sage', 'Luna', 'Pepper'],
    'Salary':[30000,40000,45000,45000,35000,55000,60000,65000,70000,85000,60000,65000,70000]
}
df=pd.DataFrame(data)

In [2]:
df

Unnamed: 0,Age,categorical_names,Salary
0,8,Tiny,30000
1,12,Whiskers,40000
2,15,Tiny,45000
3,22,Shadow,45000
4,22,Velvet,35000
5,30,Luna,55000
6,35,Sage,60000
7,55,Luna,65000
8,40,Pepper,70000
9,45,Sapphire,85000


## Range Logic

In [3]:
import pandas as pd
import numpy as np
import math


def calculate_num_bins(series):
    n = len(series)
    bin_width = math.sqrt(n)
    if series.min() ==0:
        num_bins = int(np.ceil((series.max() - series.min())+1/ bin_width))
    else:
        num_bins = int(np.ceil((series.max() - series.min())/ bin_width))
            
    return num_bins

def group_aggregates_resultant_df(df, chart_config, round_column=None):
    x_axis = chart_config["x-axis"]
    y_axis = chart_config["y-axis"]
    operation = chart_config.get("operation", "mean")
    
    if df[x_axis].dtype.kind in 'iufc':  
        num_unique_values = df[x_axis].nunique()
        if num_unique_values > 10:  
            num_bins = chart_config.get("num_bins")
            if num_bins is None:
                num_bins = calculate_num_bins(df[x_axis])

            if x_axis == y_axis:
                resultant_df = df.groupby(pd.cut(df[x_axis], bins=num_bins, precision=0)).size().reset_index(name="count")
                chart_config["y-axis"] = "count"
            else:
                resultant_df = df.groupby(pd.cut(df[x_axis], bins=num_bins, precision=0)).agg({y_axis: operation}).reset_index()
        else:  
            if x_axis == y_axis:
                resultant_df = df.groupby(x_axis).size().reset_index(name="count")
                chart_config["y-axis"] = "count"
            else:
                resultant_df = df.groupby(x_axis)[y_axis].agg(operation).reset_index()
    else:  
        if x_axis == y_axis:
            resultant_df = df.groupby(x_axis).size().reset_index(name="count")
            chart_config["y-axis"] = "count"
        else:
            resultant_df = df.groupby(x_axis)[y_axis].agg(operation).reset_index()

    return resultant_df

chart_config_num = {"x-axis": "Age", "y-axis": "Salary"}
chart_config_cat = {"x-axis": "categorical_names", "y-axis": "Salary"}

result_df_num = group_aggregates_resultant_df(df, chart_config_num)
print("Result with numerical x-axis:")
print(result_df_num)

result_df_cat = group_aggregates_resultant_df(df, chart_config_cat)
print("\nResult with categorical x-axis:")
print(result_df_cat)


Result with numerical x-axis:
             Age   Salary
0    (8.0, 11.0]  30000.0
1   (11.0, 15.0]  40000.0
2   (15.0, 18.0]  45000.0
3   (18.0, 22.0]      NaN
4   (22.0, 25.0]  50000.0
5   (25.0, 28.0]      NaN
6   (28.0, 32.0]  55000.0
7   (32.0, 35.0]  60000.0
8   (35.0, 39.0]      NaN
9   (39.0, 42.0]  70000.0
10  (42.0, 46.0]  75000.0
11  (46.0, 49.0]      NaN
12  (49.0, 52.0]      NaN
13  (52.0, 56.0]  65000.0
14  (56.0, 59.0]      NaN
15  (59.0, 63.0]      NaN
16  (63.0, 66.0]  60000.0

Result with categorical x-axis:
  categorical_names        Salary
0              Luna  61666.666667
1            Pepper  70000.000000
2              Sage  60000.000000
3          Sapphire  85000.000000
4            Shadow  45000.000000
5              Tiny  37500.000000
6            Velvet  35000.000000
7          Whiskers  40000.000000


  resultant_df = df.groupby(pd.cut(df[x_axis], bins=num_bins, precision=0)).agg({y_axis: operation}).reset_index()


## Quatrile logic

In [4]:
import pandas as pd
import numpy as np
import math


def calculate_num_bins(series):
    q1,q3 = np.percentile(series,[25,75])
    iqr=q3-q1
    # n = len(series)
    # bin_width=(2*iqr)/n**(1/3)
    num_bins = int((series.max()-series.min())/iqr)
    return num_bins
    
def group_aggregates_resultant_df(df, chart_config, round_column=None):
    x_axis = chart_config["x-axis"]
    y_axis = chart_config["y-axis"]
    operation = chart_config.get("operation", "mean")
    
    if df[x_axis].dtype.kind in 'iufc':  
        num_unique_values = df[x_axis].nunique()
        if num_unique_values > 10:  
            num_bins = chart_config.get("num_bins")
            if num_bins is None:
                num_bins = calculate_num_bins(df[x_axis])

            if x_axis == y_axis:
                resultant_df = df.groupby(pd.cut(df[x_axis], bins=num_bins, precision=0)).size().reset_index(name="count")
                chart_config["y-axis"] = "count"
            else:
                resultant_df = df.groupby(pd.cut(df[x_axis], bins=num_bins, precision=0)).agg({y_axis: operation}).reset_index()
        else:  
            if x_axis == y_axis:
                resultant_df = df.groupby(x_axis).size().reset_index(name="count")
                chart_config["y-axis"] = "count"
            else:
                resultant_df = df.groupby(x_axis)[y_axis].agg(operation).reset_index()
    else:  
        if x_axis == y_axis:
            resultant_df = df.groupby(x_axis).size().reset_index(name="count")
            chart_config["y-axis"] = "count"
        else:
            resultant_df = df.groupby(x_axis)[y_axis].agg(operation).reset_index()

    return resultant_df
chart_config_num = {"x-axis": "Age", "y-axis": "Salary"}
chart_config_cat = {"x-axis": "categorical_names", "y-axis": "Salary"}

result_df_num = group_aggregates_resultant_df(df, chart_config_num)
print("Result with numerical x-axis:")
print(result_df_num)

result_df_cat = group_aggregates_resultant_df(df, chart_config_cat)
print("\nResult with categorical x-axis:")
print(result_df_cat)

Result with numerical x-axis:
            Age   Salary
0   (8.0, 37.0]  47500.0
1  (37.0, 66.0]  69000.0

Result with categorical x-axis:
  categorical_names        Salary
0              Luna  61666.666667
1            Pepper  70000.000000
2              Sage  60000.000000
3          Sapphire  85000.000000
4            Shadow  45000.000000
5              Tiny  37500.000000
6            Velvet  35000.000000
7          Whiskers  40000.000000


  resultant_df = df.groupby(pd.cut(df[x_axis], bins=num_bins, precision=0)).agg({y_axis: operation}).reset_index()
