In [None]:
import pandas as pd
import numpy  as np
import matplotlib.pyplot as plt
import statistics as st
import seaborn as sns
import datetime 
import pandas as pd
import os
import sqlite3
from sqlite3 import Error
from sqlalchemy import create_engine

In [24]:
# Re-load the datasets after execution state reset
superstore_file_path = "Sample-Superstore.csv"

# Read the datasets
superstore_df = pd.read_csv(superstore_file_path, encoding="ISO-8859-1")


# Ensure numerical columns are correctly interpreted for superstore data
numeric_columns = ["Sales", "Profit", "Discount", "Quantity"]
for col in numeric_columns:
    if col in superstore_df.columns:
        superstore_df[col] = pd.to_numeric(superstore_df[col], errors="coerce")

# 1) Add calculated columns to Sample-Superstore
superstore_df["Estimated Cost"] = superstore_df["Sales"] - (superstore_df["Profit"] + (superstore_df["Sales"] * superstore_df["Discount"]))
superstore_df["Estimated Cost p/unit"] = superstore_df["Estimated Cost"] / superstore_df["Quantity"]
superstore_df["Estimated Sale Price p/unit"] = superstore_df["Sales"] / superstore_df["Quantity"]
superstore_df["Estimated Profit p/unit"] = superstore_df["Estimated Sale Price p/unit"] - superstore_df["Estimated Cost p/unit"]
superstore_df["Estimated Profit Margin p/unit"] = superstore_df["Estimated Profit p/unit"] / superstore_df["Estimated Sale Price p/unit"]

# 2) Add projected profit adjustments and pricing corrections
# Define the minimum projected profit margin requirement
minimum_profit_margin = 0.10  # 10%

# Initialize adjusted discount column
superstore_df["Adjusted Discount"] = superstore_df["Discount"]

# Cases where applying a discount reduction can achieve 10% profit margin
profit_target_sales = superstore_df["Estimated Cost"] / (1 - minimum_profit_margin)
superstore_df["Adjusted Discount"] = 1 - (profit_target_sales / superstore_df["Sales"])

# Ensure the adjusted discount is not negative (meaning no discount applied if it's already profitable)
superstore_df["Adjusted Discount"] = superstore_df["Adjusted Discount"].clip(lower=0)

# Identify cases where discount adjustment alone is not enough (price adjustment needed)
needs_price_adjustment = superstore_df["Estimated Profit Margin p/unit"] < 0.1

# Assign actions based on whether discount or price needs adjusting
superstore_df["Action"] = "Adjust Discount"
superstore_df.loc[needs_price_adjustment, "Action"] = "Adjust Price"

# Adjust Sale Price Per Unit to meet the 10% profit margin requirement
superstore_df.loc[needs_price_adjustment, "Adjusted Sale Price p/unit"] = superstore_df["Estimated Cost p/unit"] / (1 - minimum_profit_margin)

# Ensure adjusted sale price is only changed for "Adjust Price" cases
superstore_df["Adjusted Sale Price p/unit"].fillna(superstore_df["Estimated Sale Price p/unit"], inplace=True)

# Recalculate projected profit per unit for adjusted prices
superstore_df["Projected Profit p/unit"] = superstore_df["Adjusted Sale Price p/unit"] - superstore_df["Estimated Cost p/unit"]

# Calculate the required discount to achieve the minimum projected profit margin
superstore_df["Projected Profit Margin"] = superstore_df["Projected Profit p/unit"] / superstore_df["Adjusted Sale Price p/unit"]

# Recalculate projected total profit for adjusted prices
superstore_df["Projected total profit"] = (superstore_df["Adjusted Sale Price p/unit"] * superstore_df["Quantity"]) - superstore_df["Estimated Cost"]

# Calculate projected sales based on adjusted pricing
superstore_df["Projected Sales"] = superstore_df["Adjusted Sale Price p/unit"] * superstore_df["Quantity"]




The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  superstore_df["Adjusted Sale Price p/unit"].fillna(superstore_df["Estimated Sale Price p/unit"], inplace=True)


In [None]:
negative_profit_transactions = superstore_df[superstore_df["Profit"]<0]
negative_profit_transactions

In [25]:
# Recalculate the "Action" column to correctly account for cases where reducing the discount achieves a 10% total profit margin

# Define the minimum projected profit margin requirement
minimum_profit_margin = 0.10  # 10%

# Recalculate profit target sales to achieve a 10% **total** profit margin
profit_target_sales_total = superstore_df["Estimated Cost"] / (1 - minimum_profit_margin)

# Determine cases where reducing discount would be enough to meet the required total profit margin
adjust_discount_needed = (1 - (profit_target_sales_total / superstore_df["Sales"])) < superstore_df["Discount"]

# Identify cases where price adjustment is still needed
needs_price_adjustment = ~adjust_discount_needed & (superstore_df["Estimated Profit Margin p/unit"] < 0.1)

# Assign "Adjust Discount" where reducing the discount is sufficient
superstore_df["Action"] = "No Action Needed"
superstore_df.loc[adjust_discount_needed, "Action"] = "Adjust Discount"
superstore_df.loc[needs_price_adjustment, "Action"] = "Adjust Price"

# Adjust the discount for the cases where discount reduction is needed
superstore_df.loc[adjust_discount_needed, "Adjusted Discount"] = 1 - (profit_target_sales_total / superstore_df["Sales"])

# Ensure the adjusted discount is not negative
superstore_df["Adjusted Discount"] = superstore_df["Adjusted Discount"].clip(lower=0)

# Adjust Sale Price Per Unit only where necessary
superstore_df.loc[needs_price_adjustment, "Adjusted Sale Price p/unit"] = superstore_df["Estimated Cost p/unit"] / (1 - minimum_profit_margin)

# Ensure adjusted sale price is only changed for "Adjust Price" cases
superstore_df["Adjusted Sale Price p/unit"].fillna(superstore_df["Estimated Sale Price p/unit"], inplace=True)

# Recalculate projected profit per unit for adjusted prices
superstore_df["Projected Profit p/unit"] = superstore_df["Adjusted Sale Price p/unit"] - superstore_df["Estimated Cost p/unit"]

# Calculate the required discount to achieve the minimum projected profit margin
superstore_df["Projected Profit Margin"] = superstore_df["Projected Profit p/unit"] / superstore_df["Adjusted Sale Price p/unit"]

# Recalculate projected total profit for adjusted prices
superstore_df["Projected total profit"] = (superstore_df["Adjusted Sale Price p/unit"] * superstore_df["Quantity"]) - superstore_df["Estimated Cost"]

# Calculate projected sales based on adjusted pricing
superstore_df["Projected Sales"] = superstore_df["Adjusted Sale Price p/unit"] * superstore_df["Quantity"]



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  superstore_df["Adjusted Sale Price p/unit"].fillna(superstore_df["Estimated Sale Price p/unit"], inplace=True)


In [32]:
superstore_df["Transaction Profit Margin"] = superstore_df ["Profit"]/ superstore_df["Sales"]

In [33]:
superstore_df.columns

Index(['Row ID', 'Order ID', 'Order Date', 'Ship Date', 'Ship Mode',
       'Customer ID', 'Customer Name', 'Segment', 'Country', 'City', 'State',
       'Postal Code', 'Region', 'Product ID', 'Category', 'Sub-Category',
       'Product Name', 'Sales', 'Quantity', 'Discount', 'Profit',
       'Estimated Cost', 'Estimated Cost p/unit',
       'Estimated Sale Price p/unit', 'Estimated Profit p/unit',
       'Estimated Profit Margin p/unit', 'Adjusted Discount', 'Action',
       'Adjusted Sale Price p/unit', 'Projected Profit p/unit',
       'Projected Profit Margin', 'Projected total profit', 'Projected Sales',
       'Transaction Profit Margin'],
      dtype='object')

In [34]:
superstore_df

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Estimated Profit p/unit,Estimated Profit Margin p/unit,Adjusted Discount,Action,Adjusted Sale Price p/unit,Projected Profit p/unit,Projected Profit Margin,Projected total profit,Projected Sales,Transaction Profit Margin
0,1.0,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,20.956800,0.1600,0.066667,No Action Needed,130.98000,20.956800,0.1600,41.913600,261.96000,0.1600
1,2.0,CA-2016-152156,11/8/2016,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,73.194000,0.3000,0.222222,No Action Needed,243.98000,73.194000,0.3000,219.582000,731.94000,0.3000
2,3.0,CA-2016-138688,6/12/2016,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,3.435700,0.4700,0.411111,No Action Needed,7.31000,3.435700,0.4700,6.871400,14.62000,0.4700
3,4.0,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,9.575775,0.0500,0.000000,Adjust Discount,202.15525,20.215525,0.1000,101.077625,1010.77625,-0.4000
4,5.0,US-2015-108966,10/11/2015,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,3.495000,0.3125,0.236111,No Action Needed,11.18400,3.495000,0.3125,6.990000,22.36800,0.1125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9990,9991.0,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,7.816600,0.1700,0.077778,No Action Needed,45.98000,7.816600,0.1700,15.633200,91.96000,0.1700
9991,9992.0,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,35.554200,0.2750,0.194444,Adjust Discount,129.28800,35.554200,0.2750,71.108400,258.57600,0.0750
9992,9993.0,CA-2017-121258,2/26/2017,3/3/2017,Standard Class,DB-13060,Dave Brooks,Consumer,United States,Costa Mesa,...,3.330000,0.4500,0.388889,No Action Needed,7.40000,3.330000,0.4500,13.320000,29.60000,0.4500
9993,9994.0,CA-2017-119914,5/4/2017,5/9/2017,Second Class,CC-12220,Chris Cortes,Consumer,United States,Westminster,...,36.474000,0.3000,0.222222,No Action Needed,121.58000,36.474000,0.3000,72.948000,243.16000,0.3000


In [35]:
superstore_df = superstore_df[['Order ID', 'Order Date', 'Ship Date', 'Ship Mode',
       'Customer ID', 'Customer Name', 'Segment', 'Country', 'City', 'State',
       'Postal Code', 'Region', 'Product ID', 'Category', 'Sub-Category',
       'Product Name', 'Sales', 'Quantity', 'Discount', 'Profit','Transaction Profit Margin',
       'Estimated Cost', 'Estimated Cost p/unit',
       'Estimated Sale Price p/unit', 'Estimated Profit p/unit',
       'Estimated Profit Margin p/unit', 'Adjusted Discount', 'Action',
       'Adjusted Sale Price p/unit', 'Projected Profit p/unit',
       'Projected Profit Margin', 'Projected total profit', 'Projected Sales',
       ]]

In [39]:
superstore_df["Transaction Profit Margin"]

0                            0.1600
1                            0.3000
2                            0.4700
3                           -0.4000
4                            0.1125
                              ...  
9990                         0.1700
9991                         0.0750
9992                         0.4500
9993                         0.3000
Transaction Profit Margin       NaN
Name: Transaction Profit Margin, Length: 9995, dtype: float64

In [None]:
superstore_df.to_csv("superstore_update.csv")


In [None]:
#TODO - check discount of same items in different states
# do another kpi for sale price

In [None]:
price_per_unit_kpi = superstore_df.groupby[]

In [41]:


# # Ensure numerical columns are correctly interpreted
# numeric_columns = ["Sales", "Quantity"]
# for col in numeric_columns:
#     if col in superstore_df.columns:
#         superstore_df[col] = pd.to_numeric(superstore_df[col], errors="coerce")

# Convert "Order Date" to datetime format if available
if "Order Date" in superstore_df.columns:
    superstore_df["Order Date"] = pd.to_datetime(superstore_df["Order Date"], errors="coerce")
    superstore_df["Year-Month"] = superstore_df["Order Date"].dt.to_period("M")

# Group data by Year-Month and calculate Price per Unit
price_per_unit_kpi = superstore_df.groupby("Year-Month").agg(
    Total_Sales=("Sales", "sum"),
    Total_Quantity=("Quantity", "sum")
).reset_index()

# Calculate Price per Unit
price_per_unit_kpi["Price per Unit"] = price_per_unit_kpi["Total_Sales"] / price_per_unit_kpi["Total_Quantity"]




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  superstore_df["Order Date"] = pd.to_datetime(superstore_df["Order Date"], errors="coerce")


In [42]:
price_per_unit_kpi

Unnamed: 0,Year-Month,Total_Sales,Total_Quantity,Price per Unit
0,2014-01,14236.895,284.0,50.129912
1,2014-02,4519.892,159.0,28.426994
2,2014-03,55691.009,585.0,95.198306
3,2014-04,28295.345,536.0,52.789823
4,2014-05,23648.287,466.0,50.747397
5,2014-06,34595.1276,521.0,66.401397
6,2014-07,33946.393,550.0,61.720715
7,2014-08,27909.4685,609.0,45.828356
8,2014-09,81777.3508,1000.0,81.777351
9,2014-10,31453.393,573.0,54.892483
