<a href="https://colab.research.google.com/github/nilupulmadhawa/price-optimization-model/blob/main/dl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Load the dataset
data = pd.read_csv('sales_data.csv')
data['created_at'] = pd.to_datetime(data['created_at'], errors='coerce')
data['brand'] = data['brand'].fillna('unbrand')

data['maximum_profit_margin'] = '20'
data['minimum_profit_margin'] = '10'

data['day_of_week'] = data['created_at'].dt.day_name()
data['month'] = data['created_at'].dt.month_name()

data['order_date'] = data['created_at'].dt.date

data['order_count'] = data.groupby(['order_date','sku'])['id'].transform('count')
data['total_revenue_per_day'] =data['order_count'] * data['sale_price']


# Sort the DataFrame by 'created_at'
data.sort_values(by='created_at', inplace=True)


In [6]:
data['average_sales_per_day'] = data.groupby(['month','day_of_week'])['order_count'].transform('mean')
# Convert the result to a DataFrame and reset the index
# result_df = average_sales_per_day.reset_index()

# Display the result
# print(result_df)
data


Unnamed: 0,id,order_id,sku,brand,category,name,cost,retail_price,sale_price,created_at,maximum_profit_margin,minimum_profit_margin,day_of_week,month,order_date,order_count,total_revenue_per_day,average_sales_per_day
0,171680,118467,4AEC9B9C38FE30D8411F2A18E6324C9D,Tommy Hilfiger,Outerwear & Coats,Tommy Hilfiger Men's Zip Front Jacket,40.315519,89.989998,89.989998,2020-01-02 00:02:00+00:00,20,10,Thursday,January,2020-01-02,1,89.989998,1.009331
1,49137,34024,4ACBEDBE977480D19B7B682D4878CAE2,Scarf_tradinginc,Accessories,Elegant PASHMINA SCARF WRAP SHAWL STOLE,1.107500,2.500000,2.500000,2020-01-02 00:06:00+00:00,20,10,Thursday,January,2020-01-02,1,2.500000,1.009331
2,131963,90971,141FACDACB30C9B2E86172C3CFDBECC1,Quiksilver,Swim,Quiksilver Waterman Men's Cammofin Boardshort,40.796500,69.500000,69.500000,2020-01-02 00:14:00+00:00,20,10,Thursday,January,2020-01-02,1,69.500000,1.009331
3,131244,90487,0F29370D9DA664C1E143182F37301063,NEFF,Fashion Hoodies & Sweatshirts,neff Men's Griffin Fashion Hoodie,32.065000,55.000000,55.000000,2020-01-02 00:23:00+00:00,20,10,Thursday,January,2020-01-02,1,55.000000,1.009331
4,163950,113094,FF97E486DA08BFEC774688CA3EF6AC42,Kenneth Cole REACTION,Outerwear & Coats,Kenneth Cole Reaction Men's Trench Coat Jacket,25.375771,59.990002,59.990002,2020-01-02 00:35:00+00:00,20,10,Thursday,January,2020-01-02,1,59.990002,1.009331
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,35149,24355,F3A4FF4839C56A5F460C88CCE3666A2B,Back to Nature,Socks & Hosiery,Bulldog Cotton Puppy Dog Breed Animal Socks 9-11,5.344180,13.990000,13.990000,2020-06-06 06:46:00+00:00,20,10,Saturday,June,2020-06-06,1,13.990000,1.000000
19996,32048,22224,E3D11D85DEAF57A720D252CD069BCD37,Patty,Outerwear & Coats,Patty Women Black Motorcycle Biker Zip Front J...,28.183141,57.990002,57.990002,2020-06-06 06:49:00+00:00,20,10,Saturday,June,2020-06-06,1,57.990002,1.000000
19997,74826,51719,C5FB84A736219077B64E92BA7D5C58E4,Port & Company,Active,Port & Company Sweatpants with Pockets,13.651380,30.540001,30.540001,2020-06-06 06:53:00+00:00,20,10,Saturday,June,2020-06-06,1,30.540001,1.000000
19998,57660,39933,CCBD8CA962B80445DF1F7F38C57759F0,District Threads,Fashion Hoodies & Sweatshirts,District Threads DT292 - Juniors Marled Full-Z...,16.989549,35.029999,35.029999,2020-06-06 06:55:00+00:00,20,10,Saturday,June,2020-06-06,1,35.029999,1.000000


In [7]:



# Select relevant columns for features and target
features = ['sku', 'brand', 'category', 'name', 'cost', 'day_of_week','demand','total_revenue_per_day']
target = ['sale_price']

X = data[features]
y = data[target]

# Convert categorical features to one-hot encoding
X = pd.get_dummies(X)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build the deep learning model
model = keras.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    layers.Dropout(0.2),  # Add dropout for regularization
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.2),  # Add dropout for regularization
    layers.Dense(1)
])

# Adjust the learning rate or use a different optimizer
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss='mean_squared_error')

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=64, validation_split=0.2)

# Evaluate the model on the test set
y_pred = model.predict(X_test_scaled)
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error on Test Set: {mae}')



KeyError: ignored

In [None]:

# Plot training history
import matplotlib.pyplot as plt

plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Mean Squared Error')
plt.legend()
plt.show()

In [None]:
plt.plot(data['created_at'], data['demand'], label='Demand')
plt.xlabel('Date')
plt.ylabel('Demand')
plt.title('Demand Chart')
plt.legend()
plt.show()


In [None]:
plt.scatter(y_test, y_pred)
plt.xlabel('Actual Sale Price')
plt.ylabel('Predicted Sale Price')
plt.title('Actual vs. Predicted Sale Price')
plt.show()

In [None]:
residuals = y_test - y_pred[:, 0]
plt.hist(residuals, bins=30)
plt.xlabel('Residuals')
plt.ylabel('Frequency')
plt.title('Distribution of Residuals')
plt.show()