In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

data = pd.read_csv("retail_price.csv")
print(data.head())

In [7]:
print(data.columns)

Index(['product_id', 'product_category_name', 'month_year', 'qty',
       'total_price', 'freight_price', 'unit_price', 'product_name_lenght',
       'product_description_lenght', 'product_photos_qty', 'product_weight_g',
       'product_score', 'customers', 'weekday', 'weekend', 'holiday', 'month',
       'year', 's', 'volume', 'comp_1', 'ps1', 'fp1', 'comp_2', 'ps2', 'fp2',
       'comp_3', 'ps3', 'fp3', 'lag_price'],
      dtype='object')


In [None]:
# Before moving forward, let’s have a look if the data has null values or not:
print(data.isnull().sum())

In [None]:
# Now let’s have a look at the descriptive statistics of the data:
print(data.describe())

In [None]:
# Now let’s have a look at the distribution of the prices of the products:
fig = px.histogram(data,x='total_price',nbins=20,title="Distribution of total price")
fig.show()
# để con trỏ vô hình sẽ thấy hiện thông tin từng cột

# Mime type rendering requires nbformat>=4.2.0 but it is not installed error: 
# 1/ !pip install nbformat (Install this) 
# 2/ Restart your Kernel.

In [13]:
# Now let’s have a look at the distribution of the unit prices using a box plot:
fig = px.box(data,y="unit_price",title="Box plot of the unit price")
fig.show()

In [15]:
# Now let’s have a look at the relationship between quantity and total prices:
fig = px.scatter(data,x='qty',y='total_price',title="Quantity vs Total Price",trendline='ols')
fig.show()

# error: No module named 'statsmodels' 
# pip install statsmodels

# KẾT LUẬN: Thus, the relationship between quantity and total prices is linear. 
# It indicates that the price structure is based on a fixed unit price, 
# where the total price is calculated by multiplying the quantity by the unit price.

In [16]:
# Now let’s have a look at the average total prices by product categories:
fig = px.bar(data,x="product_category_name",y='total_price',title="Average total price by product category")
fig.show()

In [19]:
# Now let’s have a look at the distribution of total prices by weekday using a box plot:
fig = px.box(data,x="weekday",y="total_price",title="Box plot of total price by weekday")
fig.show()


In [20]:
# Now let’s have a look at the distribution of total prices by holiday using a box plot:
fig = px.box(data,x="holiday",y="total_price",title="Box plot of total price by holiday")
fig.show()

In [None]:
# Now let’s have a look at the correlation between the numerical features with each other:
correlation_matrix = data.corr()
fig = go.Figure(go.Heatmap(x=correlation_matrix.columns,y=correlation_matrix.columns,z=correlation_matrix.values))
fig.update_layout(title='Correlation Heatmap of Numerical Features')
fig.show()

# ValueError: could not convert string to float: 'bed1' : chưa biết cách giải quyết bed1 là giá trị của cột product_id

In [24]:
# Analyzing competitors’ pricing strategies is essential in optimizing retail prices. 
# Monitoring and benchmarking against competitors’ prices can help identify opportunities 
# to price competitively, either by pricing below or above the competition, 
# depending on the retailer’s positioning and strategy. 
# Now let’s calculate the average competitor price difference by product category:

data['comp_price_diff'] = data['unit_price'] - data['comp_1']
avg_price_diff_by_category = data.groupby('product_category_name')['comp_price_diff'].mean().reset_index()

fig = px.bar(avg_price_diff_by_category,
             x='product_category_name',
             y='comp_price_diff',
             title="Average competitor price difference by product category")
fig.update_layout(
    xaxis_title = 'Product category',
    yaxis_title = 'Average competitor price difference'
)
fig.show()

Retail Price Optimization Model with Machine Learning

In [26]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

X = data[['qty', 'unit_price', 'comp_1', 
          'product_score', 'comp_price_diff']]
y = data['total_price']

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2,
                                                    random_state=42)

# Train a linear regression model
model = DecisionTreeRegressor()
model.fit(X_train, y_train)

In [27]:
# Now let’s make predictions and have a look at the predicted retail prices and the actual retail prices:

y_pred = model.predict(X_test)

fig = go.Figure()
fig.add_trace(go.Scatter(x=y_test, y=y_pred, mode='markers', 
                         marker=dict(color='blue'), 
                         name='Predicted vs. Actual Retail Price'))
fig.add_trace(go.Scatter(x=[min(y_test), max(y_test)], y=[min(y_test), max(y_test)], 
                         mode='lines', 
                         marker=dict(color='red'), 
                         name='Ideal Prediction'))
fig.update_layout(
    title='Predicted vs. Actual Retail Price',
    xaxis_title='Actual Retail Price',
    yaxis_title='Predicted Retail Price'
)
fig.show()

So this is how you can optimize retail prices with Machine Learning using Python.

Summary
The ultimate aim of optimizing retail prices is to charge a price that helps you make the most money and attracts enough customers to buy your products. It involves using data and pricing strategies to find the right price that maximizes your sales and profits while keeping customers happy. I hope you liked this article on optimizing retail prices with Machine Learning using Python. Feel free to ask valuable questions in the comments section below.

AUTHOR: Aman Kharwal
I'm a writer and data scientist on a mission to educate others about the incredible power of data