<a href="https://colab.research.google.com/github/nilupulmadhawa/price-optimization-model/blob/main/model1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


In [4]:

def simulate_environment(action, cost):
    """
    Simulate the environment by applying the chosen action (price or profit margin) to the product.

    Parameters:
    - action: Chosen action (price or profit margin)
    - cost: Cost associated with the product

    Returns:
    - revenue: Revenue generated from the chosen action
    """
    # In this simple example, revenue is calculated based on the product of action and cost.
    # You should replace this with your specific revenue calculation based on your business logic.
    revenue = cost * action
    return revenue

def calculate_reward(action, cost, maximum_profit_margin, minimum_profit_margin,, day_of_week, week_of_month):
    """
    Calculate the reward based on the chosen action (profit margin), cost, and revenue.

    Parameters:
    - action: Chosen action (profit margin)
    - cost: Cost associated with the product
    - maximum_profit_margin: Maximum allowed profit margin
    - minimum_profit_margin: Minimum allowed profit margin

    Returns:
    - reward: Calculated reward (profit)
    """
    # Simulate the environment by calculating revenue
    revenue = simulate_environment(action, cost)
    minimum_profit_margin = cost *minimum_profit_margin
    maximum_profit_margin = cost *maximum_profit_margin
    # Clip the action to ensure it's within the allowed profit margin range
    action = np.clip(action, minimum_profit_margin, maximum_profit_margin)

    day_factor = 1.0 + 0.1 * (day_of_week + 1)  # Example: Increase profit on weekends
    week_factor = 1.0 + 0.05 * week_of_month

    # Calculate profit based on the chosen profit margin
    # profit = revenue - cost
    profit = revenue - cost + (action * cost * day_factor * week_factor)

    return profit



In [7]:
# Load your dataset
df = pd.read_csv('sales_data2.csv')

# Data preprocessing
df['created_at'] = pd.to_datetime(df['created_at'])
df['maximum_profit_margin'] = 20
df['minimum_profit_margin'] = 10

# Extract relevant features
X = df[['cost', 'maximum_profit_margin', 'minimum_profit_margin', 'sku']]

# Feature scaling for numerical features
numerical_features = ['cost', 'maximum_profit_margin', 'minimum_profit_margin']
numerical_transformer = StandardScaler()

# One-hot encoding for categorical features
categorical_features = ['sku']
categorical_transformer = OneHotEncoder(drop='first', sparse=False)

# Combine transformers using ColumnTransformer
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Transform the features
X_transformed = preprocessor.fit_transform(X)

# Define the Q-learning model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_transformed.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='linear'))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Q-learning parameters
gamma = 0.9  # discount factor
epsilon = 1.0  # exploration-exploitation trade-off
epsilon_decay = 0.995  # decay rate for exploration

# Training the Q-learning model
for epoch in range(1):
    total_reward = 0
    for i in range(len(X_transformed)-1):
        state = X_transformed[i:i+1, :]
        if np.random.rand() < epsilon:
            action = np.random.uniform(low=0, high=1)
        else:
            action = model.predict(state)[0][0]

        # Simulate the environment (you need to define this part based on your business logic)
        # For example, calculate profit or revenue based on the chosen action (price)

        # Calculate the reward (you need to define this based on your business logic)
        cost = df.loc[i, 'cost']
        minimum_profit_margin = df.loc[i, 'minimum_profit_margin']
        maximum_profit_margin = df.loc[i, 'maximum_profit_margin']

        # Calculate the reward
        reward = calculate_reward(action, cost, maximum_profit_margin, minimum_profit_margin)
        # Update the Q-value
        next_state = X_transformed[i+1:i+2, :]
        target = reward + gamma * np.max(model.predict(next_state)[0])
        target_f = model.predict(state)
        target_f[0] = reward if i == len(X_transformed) - 1 else target
        model.fit(state, target_f, epochs=1, verbose=0)
        print(f'Epoch: {epoch}, Total range: {i}')

        total_reward += reward

    print(f'Epoch: {epoch+1}, Total Reward: {total_reward}')

    # Decay exploration rate
    epsilon *= epsilon_decay



-10.624347480433123




Epoch: 0, Total range: 0
-14.575755848872651
Epoch: 0, Total range: 1
-20.44751881989975
Epoch: 0, Total range: 2
-0.804843228762234
Epoch: 0, Total range: 3
-2.3692450227452397
Epoch: 0, Total range: 4
-4.788529296236451
Epoch: 0, Total range: 5
-8.081917288633106
Epoch: 0, Total range: 6
-35.582871690950675
Epoch: 0, Total range: 7
-4.708320005688659
Epoch: 0, Total range: 8
-14.475810384593121
Epoch: 0, Total range: 9
-14.859017320864604
Epoch: 0, Total range: 10
-1.3945260890644064
Epoch: 0, Total range: 11
-7.334430730094489
Epoch: 0, Total range: 12
-12.40016603166687
Epoch: 0, Total range: 13
-16.054307772572084
Epoch: 0, Total range: 14
-24.94556233697684
Epoch: 0, Total range: 15
-22.271508186214945
Epoch: 0, Total range: 16
-11.562525242907242
Epoch: 0, Total range: 17
-0.7851871132370078
Epoch: 0, Total range: 18
-3.169804886282215
Epoch: 0, Total range: 19
-24.63429258742363
Epoch: 0, Total range: 20
-73.41159824780343
Epoch: 0, Total range: 21
-14.994515418105431
Epoch: 0,

KeyboardInterrupt: ignored

In [26]:


# # Q-learning parameters
# gamma = 0.9  # discount factor
# epsilon = 1.0  # exploration-exploitation trade-off
# epsilon_decay = 0.995  # decay rate for exploration
# X_transformed = preprocessor.fit_transform(X)

# # Training the Q-learning model
# for epoch in range(1):
#     total_reward = 0
#     for i in range(len(X)):
#         state = X_transformed[i:i+1]

#         if np.random.rand() < epsilon:
#             action = np.random.uniform(low=0, high=1)
#         else:
#             action = model.predict(state)[0][0]

#         # Simulate the environment (you need to define this part based on your business logic)
#         # For example, calculate profit or revenue based on the chosen action (price)

#         # Calculate the reward (you need to define this based on your business logic)
#         cost = df.loc[i, 'cost']
#         minimum_profit_margin = df.loc[i, 'minimum_profit_margin']
#         maximum_profit_margin = df.loc[i, 'maximum_profit_margin']

#         # Calculate the reward
#         reward = calculate_reward(action, cost, maximum_profit_margin, minimum_profit_margin)

#         next_state = preprocessor.fit_transform(X[i+1:i+2])
#         target = reward + gamma * np.max(model.predict(next_state)[0])
#         target_f = model.predict(state)
#         target_f[0] = reward if i == len(X) - 1 else target
#         model.fit(state, target_f, epochs=1, verbose=0)
#         print(f'Epoch: {epoch}, Total range: {i}')

#         total_reward += reward

#     print(f'Epoch: {epoch+1}, Total Reward: {total_reward}')

#     # Decay exploration rate
#     epsilon *= epsilon_decay


In [29]:

# Now, you can use the trained Q-learning model to set prices in a real environment
new_data = pd.read_csv('new_data.csv')  # Replace 'new_data.csv' with the actual path or dataset name
# df['created_at'] = pd.to_datetime(df['created_at'])
new_data_scaled = preprocessor.transform(new_data[['cost', 'maximum_profit_margin', 'minimum_profit_margin', 'sku']])
predicted_prices = model.predict(new_data_scaled)
predicted_prices
costs = new_data['cost'].values
predicted_prices = costs + predicted_prices
predicted_prices



array([[ -99.53500168, -120.30246984, -107.31551053, -112.92759934,
        -103.29174922, -111.03504974, -118.66054929,  -92.51454968,
        -122.03954976, -125.03854975],
       [ -93.72497361, -114.49244177, -101.50548246, -107.11757127,
         -97.48172115, -105.22502167, -112.85052122,  -86.70452161,
        -116.22952169, -119.22852168],
       [ -98.35215562, -119.11962378, -106.13266447, -111.74475328,
        -102.10890316, -109.85220368, -117.47770323,  -91.33170362,
        -120.8567037 , -123.85570369],
       [-101.32316392, -122.09063208, -109.10367277, -114.71576158,
        -105.07991146, -112.82321198, -120.44871153,  -94.30271192,
        -123.827712  , -126.82671199],
       [-109.34358018, -130.11104834, -117.12408903, -122.73617784,
        -113.10032772, -120.84362824, -128.46912779, -102.32312818,
        -131.84812826, -134.84712825],
       [ -98.57754319, -119.34501135, -106.35805204, -111.97014085,
        -102.33429073, -110.07759125, -117.7030908 ,  -91