In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
raw_data = pd.read_csv("DS_test_reformulation.csv", sep=";").drop(columns=["Unnamed: 0"])

In [3]:
def str_to_float(value):
    # Replace ',' with '.' and convert to float
    return float(value.replace(',', '.'))

In [4]:
dictionary_to_number = {"mobile": 1, "laptop": 2}
raw_data['device'] = raw_data['device'].map(dictionary_to_number)
raw_data["percentual_price_increment"] = raw_data.percentual_price_increment.apply(str_to_float)
raw_data["profit_per_order"] = raw_data.profit_per_order.apply(str_to_float)
raw_data["profit"] = raw_data.profit.apply(str_to_float)
raw_data['date'] = pd.to_datetime(raw_data['date'])
raw_data

Unnamed: 0,date,weekday,device,percentual_price_increment,traffic,orders,profit,profit_per_order
0,2022-01-01,6,1,0.150,2225,8,357.50,44.687500
1,2022-01-02,7,1,0.075,2348,15,712.50,47.500000
2,2022-01-03,1,1,-0.025,2473,49,1653.75,33.750000
3,2022-01-04,2,1,-0.050,2320,37,1435.00,38.783784
4,2022-01-05,3,1,-0.025,2762,56,2135.00,38.125000
...,...,...,...,...,...,...,...,...
995,2023-05-11,4,2,-0.100,2526,71,2080.00,29.295775
996,2023-05-12,5,2,0.100,1637,47,2480.00,52.765957
997,2023-05-13,6,2,0.025,2162,20,778.75,38.937500
998,2023-05-14,7,2,-0.200,2833,52,1580.00,30.384615


In [5]:
raw_data

Unnamed: 0,date,weekday,device,percentual_price_increment,traffic,orders,profit,profit_per_order
0,2022-01-01,6,1,0.150,2225,8,357.50,44.687500
1,2022-01-02,7,1,0.075,2348,15,712.50,47.500000
2,2022-01-03,1,1,-0.025,2473,49,1653.75,33.750000
3,2022-01-04,2,1,-0.050,2320,37,1435.00,38.783784
4,2022-01-05,3,1,-0.025,2762,56,2135.00,38.125000
...,...,...,...,...,...,...,...,...
995,2023-05-11,4,2,-0.100,2526,71,2080.00,29.295775
996,2023-05-12,5,2,0.100,1637,47,2480.00,52.765957
997,2023-05-13,6,2,0.025,2162,20,778.75,38.937500
998,2023-05-14,7,2,-0.200,2833,52,1580.00,30.384615


In [6]:
def clean_data(df: pd.DataFrame) -> pd.DataFrame:
    percentual_price_increment_filter = df["percentual_price_increment"] != 0
    df_first_cleaned = df[percentual_price_increment_filter]
    return df_first_cleaned.copy(deep=True)

raw_data_cleaned = clean_data(raw_data)
raw_data_cleaned

Unnamed: 0,date,weekday,device,percentual_price_increment,traffic,orders,profit,profit_per_order
0,2022-01-01,6,1,0.150,2225,8,357.50,44.687500
1,2022-01-02,7,1,0.075,2348,15,712.50,47.500000
2,2022-01-03,1,1,-0.025,2473,49,1653.75,33.750000
3,2022-01-04,2,1,-0.050,2320,37,1435.00,38.783784
4,2022-01-05,3,1,-0.025,2762,56,2135.00,38.125000
...,...,...,...,...,...,...,...,...
995,2023-05-11,4,2,-0.100,2526,71,2080.00,29.295775
996,2023-05-12,5,2,0.100,1637,47,2480.00,52.765957
997,2023-05-13,6,2,0.025,2162,20,778.75,38.937500
998,2023-05-14,7,2,-0.200,2833,52,1580.00,30.384615


In [13]:
import statsmodels.api as sm
from matplotlib import pyplot as plt

In [27]:
def get_price_elasticity_mean(group: pd.DataFrame) -> float:
    price_elasticity_grouped_df = group["response"] / group["percentual_price_increment"]
    return price_elasticity_grouped_df.mean()

def get_price_elasticity_slope(group: pd.DataFrame) -> dict:
    X = group[['percentual_price_increment']]
    y = group["response"]
    model = sm.OLS(y, sm.add_constant(X)).fit()
    intercept = model.params[0]
    slope = model.params[1]
    
    return {"slope": slope, "intercept": intercept}

def plotter(group: pd.DataFrame, price_elasticity_slope: dict) -> None:
    fig = plt.figure()
    x = np.linspace(group['percentual_price_increment'].min(), group['percentual_price_increment'].max(), 50)
    intercept = price_elasticity_slope["intercept"]
    slope = price_elasticity_slope["slope"]
    y = intercept + slope * x
    plt.scatter(group['percentual_price_increment'], group['response'], label='Data')
    plt.plot(x, y, color='red', label='Linear Price Response')

    plt.xlabel('Price')
    plt.ylabel('Response')
    plt.title('Linear Price Response Function')

    plt.legend()
    plt.savefig('linear_response.png')
    
def analysis(group):
    group_sorted = group.sort_values(by="date")
    group_sorted["response"] = group_sorted["orders"].pct_change() 
    group_sorted["price_elasticity"] = group_sorted["response"] / group_sorted["percentual_price_increment"] 
    group_sorted.dropna(inplace=True) 
    price_elasticity_slope_dict = get_price_elasticity_slope(group_sorted)
    data = pd.DataFrame([{
        "price_elasticity_mean": get_price_elasticity_mean(group_sorted),
        "price_elasticity_slope": price_elasticity_slope_dict["slope"],
        "sum_profit": group_sorted.profit.sum(), "traffic": group_sorted.traffic.sum()}])
#     plotter(group_sorted, price_elasticity_slope_dict)
    return group_sorted



raw_data_clusterized = raw_data_cleaned.groupby(["weekday", "device"]).apply(analysis)
# raw_data_clusterized.sort_values(by="price_elasticity_mean", ascending=False)
raw_data_clusterized

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,date,weekday,device,percentual_price_increment,traffic,orders,profit,profit_per_order,response,price_elasticity
weekday,device,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,1,9,2022-01-10,1,1,0.075,2242,44,1838.75,41.789773,-0.102041,-1.360544
1,1,16,2022-01-17,1,1,0.025,2301,49,1963.75,40.076531,0.113636,4.545455
1,1,23,2022-01-24,1,1,0.025,2221,43,1771.25,41.191860,-0.122449,-4.897959
1,1,30,2022-01-31,1,1,-0.100,2594,55,1365.00,24.818182,0.279070,-2.790698
1,1,37,2022-02-07,1,1,0.200,2402,43,2290.00,53.255814,-0.218182,-1.090909
...,...,...,...,...,...,...,...,...,...,...,...,...
7,2,970,2023-04-16,7,2,-0.175,2488,49,1213.75,24.770408,0.750000,-4.285714
7,2,977,2023-04-23,7,2,0.125,1951,9,471.25,52.361111,-0.816327,-6.530612
7,2,984,2023-04-30,7,2,-0.050,2901,34,1170.00,34.411765,2.777778,-55.555556
7,2,991,2023-05-07,7,2,-0.100,2757,37,1495.00,40.405405,0.088235,-0.882353


In [None]:
def build_groups_of_clients(df: pd.DataFrame) -> pd.DataFrame:
    elastic   = df[df.percentual_price_increment > 8]
    medium = df[df.percentual_price_increment > 1]
    inelastic = df[df.percentual_price_increment < 1]
    df["groups_of_clients"] = pd.concat() 
    
    return df

In [11]:
from jenkspy import JenksNaturalBreaks


def jenks_natural_breaks(df: pd.DataFrame, final_grouped_clusters: int) -> dict:
    label_dict = {}
    jnb = JenksNaturalBreaks(final_grouped_clusters)
    jnb.fit(df.price_elasticity)
    for cluster_id, label in enumerate(jnb.labels_):
        label_dict[cluster_id] = label
    return label_dict

In [13]:
jenks_natural_breaks(raw_data_clusterized)

TypeError: Number of class have to be a positive integer: expected an instance of 'int' but found <class 'pandas.core.frame.DataFrame'>