In [1]:
import pandas as pd
import numpy as np

In [2]:
# Import the CSV file and assign it to df_dataset
df_dataset = pd.read_csv(link to the file path)

In [3]:
# Verify the data by printing the first few rows
print(df_dataset.head())

  Product ID        Date  Cost of Goods Sold  Current Price  Competitor Price  \
0       P001  2025-01-01                  48          53.86             55.24   
1       P001  2025-01-02                  48          52.19             54.17   
2       P001  2025-01-03                  48          53.70             55.28   
3       P001  2025-01-04                  48          53.67             54.49   
4       P001  2025-01-05                  48          52.29             55.38   

   Sales Volume  
0            15  
1             8  
2            15  
3            13  
4             1  


In [4]:
# Step 1: Create the Classification Label ("Action")
# We define a rule:
# - If the competitor price is more than 2 units higher than the current price, label "increase".
# - If the competitor price is more than 2 units lower than the current price AND sales volume is below the median, label "decrease".
# - Otherwise, label "maintain".

# Compute the median sales volume (for our rule)
median_sales_volume = df_dataset['Sales Volume'].median()

def pricing_action(row, threshold=2):
    price_diff = row['Competitor Price'] - row['Current Price']
    if price_diff > threshold:
        return 'increase'
    elif price_diff < -threshold and row['Sales Volume'] < median_sales_volume:
        return 'decrease'
    else:
        return 'maintain'

# Apply the rule to create a new column "Action"
df_dataset['Action'] = df_dataset.apply(pricing_action, axis=1)

# Check the distribution of the action labels
print("Pricing Action Distribution:")
print(df_dataset['Action'].value_counts())

Pricing Action Distribution:
Action
maintain    544
increase     31
decrease     25
Name: count, dtype: int64


In [5]:
# Step 2: Define Features and Target for the Classification Model
# We will use "Cost of Goods Sold", "Competitor Price", and "Sales Volume" as features
# and "Action" as the target variable.

features = ['Cost of Goods Sold', 'Competitor Price', 'Sales Volume']
target = 'Action'

X = df_dataset[features]
y = df_dataset[target]

In [6]:
# Step 3: Split the Data into Training and Testing Sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print("Training set size:", X_train.shape[0])
print("Testing set size:", X_test.shape[0])

Training set size: 480
Testing set size: 120


In [7]:
# Step 4: Build and Train the Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)
print("Decision Tree Classifier trained successfully.")

Decision Tree Classifier trained successfully.


In [8]:
# Step 5: Evaluate the Classification Model
from sklearn.metrics import accuracy_score, classification_report

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Classification Model Evaluation:")
print("-------------------------------")
print("Accuracy:", accuracy)
print("\nClassification Report:")
print(report)

Classification Model Evaluation:
-------------------------------
Accuracy: 0.875

Classification Report:
              precision    recall  f1-score   support

    decrease       0.20      0.33      0.25         6
    increase       0.60      0.75      0.67         4
    maintain       0.95      0.91      0.93       110

    accuracy                           0.88       120
   macro avg       0.58      0.66      0.62       120
weighted avg       0.90      0.88      0.89       120



In [16]:
# Step 6: Use the Classification Model to Predict the Pricing Action for a New Product
# Example: For a product with:
# - Cost of Goods Sold = 30
# - Competitor Price = 45
# - Sales Volume = 10

new_product = pd.DataFrame({
    'Cost of Goods Sold': [30],
    'Competitor Price': [45],
    'Sales Volume': [10]
})

predicted_action = clf.predict(new_product)[0]
print("The predicted pricing action for the new product is:", predicted_action)

The predicted pricing action for the new product is: maintain
