In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [17]:
raw_data = pd.read_csv("DS_test_reformulation.csv", sep=";").drop(columns=["Unnamed: 0"])

In [18]:
def str_to_float(value):
    # Replace ',' with '.' and convert to float
    return float(value.replace(',', '.'))

In [19]:
dictionary_to_number = {"mobile": 1, "laptop": 2}
raw_data['device'] = raw_data['device'].map(dictionary_to_number)
raw_data["percentual_price_increment"] = raw_data.percentual_price_increment.apply(str_to_float)
raw_data["profit_per_order"] = raw_data.profit_per_order.apply(str_to_float)
raw_data["profit"] = raw_data.profit.apply(str_to_float)
raw_data['date'] = pd.to_datetime(raw_data['date'])
raw_data

Unnamed: 0,date,weekday,device,percentual_price_increment,traffic,orders,profit,profit_per_order
0,2022-01-01,6,1,0.150,2225,8,357.50,44.687500
1,2022-01-02,7,1,0.075,2348,15,712.50,47.500000
2,2022-01-03,1,1,-0.025,2473,49,1653.75,33.750000
3,2022-01-04,2,1,-0.050,2320,37,1435.00,38.783784
4,2022-01-05,3,1,-0.025,2762,56,2135.00,38.125000
...,...,...,...,...,...,...,...,...
995,2023-05-11,4,2,-0.100,2526,71,2080.00,29.295775
996,2023-05-12,5,2,0.100,1637,47,2480.00,52.765957
997,2023-05-13,6,2,0.025,2162,20,778.75,38.937500
998,2023-05-14,7,2,-0.200,2833,52,1580.00,30.384615


In [20]:
raw_data[(raw_data['weekday'] == 1) & (raw_data['device'] == 1)].orders.pct_change()

2           NaN
9     -0.102041
16     0.113636
23    -0.122449
30     0.279070
         ...   
471    0.676471
478   -0.175439
485    0.191489
492   -0.160714
499   -0.170213
Name: orders, Length: 72, dtype: float64

In [28]:
(raw_data[(raw_data['weekday'] == 1) & (raw_data['device'] == 1)].orders.pct_change()/\
raw_data[(raw_data['weekday'] == 1) & (raw_data['device'] == 1)]['percentual_price_increment'])\
                                                                .replace([np.inf, -np.inf], np.nan).dropna().mean()

-0.2618013553583861

In [29]:
(raw_data[(raw_data['weekday'] == 2) & (raw_data['device'] == 1)].orders.pct_change()/\
raw_data[(raw_data['weekday'] == 2) & (raw_data['device'] == 1)]['percentual_price_increment'])\
                                                                .replace([np.inf, -np.inf], np.nan).dropna().mean()

0.2975240591055196

In [22]:
# Calculate price elasticity for each segment
elasticity_data = []
for weekday in range(1, 8):
    for device in range(1, 3):
        subset = raw_data[(raw_data['weekday'] == weekday) & (raw_data['device'] == device)]
        pct_change = subset['orders'].pct_change()
        price_increment = subset['percentual_price_increment']
        
        # Calculate price elasticity and exclude NaN and infinite values
        elasticity = pct_change / price_increment
        elasticity = elasticity.replace([np.inf, -np.inf], np.nan).dropna()
        
        # Calculate mean elasticity for the segment
        mean_elasticity = elasticity.mean()
        
        # Add segment data to the list
        elasticity_data.append({
            'weekday': weekday,
            'device': device,
            'price_elasticity': mean_elasticity
        })

# Create a dataframe from the elasticity data
elasticity_df = pd.DataFrame(elasticity_data)

# Sort the dataframe by elasticity in descending order
elasticity_df = elasticity_df.sort_values('price_elasticity', ascending=False)

elasticity_df

Unnamed: 0,weekday,device,price_elasticity
2,2,1,0.297524
9,5,2,-0.147073
0,1,1,-0.261801
3,2,2,-0.780976
1,1,2,-0.881467
8,5,1,-1.065893
6,4,1,-1.208157
4,3,1,-1.217803
5,3,2,-1.786841
7,4,2,-2.388769


In [32]:
type(elasticity_df)

pandas.core.series.Series

In [33]:
# Calculate price elasticity for each segment
elasticity_df = raw_data.groupby(['weekday', 'device']).apply(
                            lambda subset: pd.Series(subset['orders'].pct_change() / subset['percentual_price_increment']))
elasticity_df = elasticity_df.reset_index().rename(columns={0: 'price_elasticity'})
elasticity_df = elasticity_df.replace([np.inf, -np.inf], np.nan).dropna()

# Calculate mean elasticity for each segment
mean_elasticity_df = elasticity_df.groupby(['weekday', 'device'])['price_elasticity'].mean().reset_index()

# Sort the dataframe by elasticity in descending order
elasticity_df_sorted = mean_elasticity_df.sort_values('price_elasticity', ascending=False).reset_index(drop=True)

elasticity_df_sorted

Unnamed: 0,weekday,device,price_elasticity
0,2,1,0.297524
1,5,2,-0.147073
2,1,1,-0.261801
3,2,2,-0.780976
4,1,2,-0.881467
5,5,1,-1.065893
6,4,1,-1.208157
7,3,1,-1.217803
8,3,2,-1.786841
9,4,2,-2.388769
