In [10]:
import pandas as pd
import numpy as np

df = pd.read_csv('./data/retail_price.csv') \
    .assign(
        month_year=lambda d: pd.to_datetime(d['month_year']),
        year=lambda d: d['month_year'].dt.year,
        month=lambda d: d['month_year'].dt.month,
        weekend=lambda d: d['weekday'].apply(lambda x: 1 if x >= 5 else 0),
        text=lambda d: d['product_category_name'].apply(lambda s: ' '.join(s.split('_')))
    )
df.shape

(676, 31)

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 676 entries, 0 to 675
Data columns (total 31 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   product_id                  676 non-null    object        
 1   product_category_name       676 non-null    object        
 2   month_year                  676 non-null    datetime64[ns]
 3   qty                         676 non-null    int64         
 4   total_price                 676 non-null    float64       
 5   freight_price               676 non-null    float64       
 6   unit_price                  676 non-null    float64       
 7   product_name_lenght         676 non-null    int64         
 8   product_description_lenght  676 non-null    int64         
 9   product_photos_qty          676 non-null    int64         
 10  product_weight_g            676 non-null    int64         
 11  product_score               676 non-null    float64       

In [29]:
df \
    .groupby(['product_category_name', 'unit_price']) \
    .size() \
    .reset_index() \
    .groupby(['product_category_name']) \
    .size()

product_category_name
bed_bath_table           26
computers_accessories    48
consoles_games           10
cool_stuff               18
furniture_decor          13
garden_tools             56
health_beauty            33
perfumery                14
watches_gifts            78
dtype: int64

In [30]:
pq_df = df.groupby(['product_category_name', 'unit_price']) \
    .agg(
        q=pd.NamedAgg('qty', 'sum')
    ) \
    .reset_index() \
    .rename(columns={'unit_price': 'p', 'product_category_name': 'category'}) \
    .set_index(['category']) \
    .join(df \
        .rename(columns={'product_category_name': 'category'}) \
        .groupby(['category']) \
        .agg(c=pd.NamedAgg('freight_price', 'mean'))
    )
pq_df.shape

(296, 3)

In [31]:
pq_df

Unnamed: 0_level_0,p,q,c
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bed_bath_table,39.240000,8,16.139718
bed_bath_table,39.990000,105,16.139718
bed_bath_table,40.531818,11,16.139718
bed_bath_table,44.154444,32,16.139718
bed_bath_table,45.900000,76,16.139718
...,...,...,...
watches_gifts,339.900000,1,16.492840
watches_gifts,345.706667,9,16.492840
watches_gifts,346.158000,10,16.492840
watches_gifts,347.000000,5,16.492840


In [55]:
from sklearn.linear_model import LinearRegression

def get_mc(category):
    return pq_df[pq_df.index==category].iloc[0].c

def get_Xy(category):
    Xy = pq_df[pq_df.index==category]
    X = np.log(Xy[['p']])
    y = np.log(Xy['q'])
    
    return X, y

def get_model(X, y):
    m = LinearRegression()
    m.fit(X, y)
    
    return m
    
def get_params(model):
    return pd.Series([model.intercept_, model.coef_[0]], ['b_0', 'b_1'])

def get_pq(b_0, b_1):
    return lambda q: (-b_0 / b_1) + (1 / b_1) * q

def get_qp(b_0, b_1):
    return lambda p: b_0 + (b_1 * p)

def get_mr(b_0, b_1):
    return lambda q: (-b_0 / b_1) + (2 * (1 / b_1) * q)

def get_qo(b_0, b_1):
    z_0 = (-b_0 / b_1)
    z_1 = 2 * (1 / b_1)
    return lambda mc: (mc - z_0) / z_1

def get_funcs(b_0, b_1):
    pq = get_pq(b_0, b_1)
    qp = get_qp(b_0, b_1)
    mr = get_mr(b_0, b_1)
    qo = get_qo(b_0, b_1)
    r = lambda p, q: p * q
    t = lambda p, q, mc: (p * q) - (mc * q)
    
    return pq, qp, mr, qo, r, t

def get_opt(f, mc):
    pq_f, qp_f, mr_f, qo_f, r_f, t_f = f
    
    q_opt = qo_f(mc)
    p_opt = pq_f(q_opt)
    mr_opt = mr_f(q_opt)
    r_opt = r_f(p_opt, q_opt)
    t_opt = t_f(p_opt, q_opt, mc)

    return pd.Series({
        'mc': mc,
        'q_opt': q_opt,
        'p_opt': p_opt,
        'mr_opt': mr_opt,
        'r_opt': r_opt,
        't_opt': t_opt
    })

def optimize(category):
    X, y = get_Xy(category)
    m = get_model(X, y)
    p = get_params(m)
    f = get_funcs(p.b_0, p.b_1)
    mc = np.log(get_mc(category))
    
    return {**{'category': category}, **get_opt(f, mc).to_dict()}

opt_df = pd.DataFrame([optimize(c) for c in pq_df.index.unique()])
opt_df.shape

(9, 7)

In [57]:
np.exp(opt_df.set_index(['category']))

Unnamed: 0_level_0,mc,q_opt,p_opt,mr_opt,r_opt,t_opt
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
bed_bath_table,16.139718,5.993082,3469.87,16.139718,2184266.0,15012.05
computers_accessories,25.103741,3.54577,2.368603e-11,25.103741,3.55392e-14,6.01148e-16
consoles_games,14.809415,10.203546,31.00986,14.809415,2913.127,5.565571
cool_stuff,18.975096,5.301944,802.2958,18.975096,69926.75,515.868
furniture_decor,16.944617,11.426505,173.3073,16.944617,284193.2,288.2514
garden_tools,28.45831,7.659429,205.1773,28.45831,50974.24,55.80485
health_beauty,18.607448,6.315087,1810042000.0,18.607448,1.151803e+17,526526100000000.0
perfumery,14.336311,12.563646,90.99016,14.336311,90748.14,107.428
watches_gifts,16.49284,13.434794,139.6856,16.49284,373919.0,257.2936


In [58]:
df.groupby(['product_category_name'])['unit_price'].mean()

product_category_name
bed_bath_table            78.629278
computers_accessories    119.482323
consoles_games            27.033766
cool_stuff               107.857512
furniture_decor           60.154262
garden_tools              80.094699
health_beauty            132.309870
perfumery                 89.348813
watches_gifts            164.880007
Name: unit_price, dtype: float64

In [59]:
df.groupby(['product_category_name'])['unit_price'].max()

product_category_name
bed_bath_table           215.000000
computers_accessories    178.057143
consoles_games            36.200000
cool_stuff               176.990000
furniture_decor          103.233333
garden_tools             187.854286
health_beauty            364.000000
perfumery                142.500000
watches_gifts            348.800000
Name: unit_price, dtype: float64