In [None]:
import pandas as pd
import numpy as np
import random
import sqlite3
np.random.seed(42)

# --- PRODUCT SETUP ---
core_data = {'men': ['shirt','t-shirt','jacket','Jeans','Trackpants'],
             'women':['Dress','Kurtas','Tops','t-shirt','Jeans','Trackpants'],
             'kids':['shirt','t-shirt','jacket','Jeans','Trackpants','Dress','Kurtas','Tops']}

data = []
id = 0
for category, products in core_data.items():
    for product in products:
        data.append([id, product, category])
        id += 1

data_product_category = pd.DataFrame(data, columns=['id','product','category'])
data_product_category = data_product_category.loc[data_product_category['id'].isin([1,8]), :]

# --- DATE SETUP ---
dates = pd.date_range(start="2023-06-01", end="2026-02-01", freq="MS")
dates_df = pd.DataFrame({"date": dates})
df_expanded = data_product_category.merge(dates_df, how="cross")

# --- METADATA ---
metadata = {}
for id in df_expanded.id.unique():
    metadata[id.item()] = {
        'base_price': random.randint(100, 200)*10,
        'base_demand': (random.randint(100, 1000)//100) * 100,
        'elasticity': -1 * random.randint(80,120)/100
    }
    metadata[id.item()]['margin'] = -1.0 / metadata[id.item()]['elasticity']

# --- SALES GENERATOR ---
def generate_sales(row):
    base_price = metadata[row["id"]]['base_price']
    margin = metadata[row["id"]]['margin']
    elasticity = metadata[row["id"]]['elasticity']

    cost = round(base_price / (1 + margin))
    price = base_price + np.random.randint(-10, 10)
    promotion = np.random.choice([0, 10, 20, 30], p=[0.5, 0.2, 0.2, 0.1])
    price = round(price * (1 - promotion / 100))

    month = row["date"].month
    seasonality = 1.3 if month in [11, 12] else 1.1 if month in [6, 7, 8] else 0.9

    sales = round(100 * (500 / price) * seasonality * (1.2 if promotion > 0 else 1.0))
    sales_value = price * sales
    profit = (price - cost) * sales
    profit_margin = round((price - cost) / price, 2) if price != 0 else 0.0

    return pd.Series([
        price, cost, promotion, seasonality, sales, round(sales_value), round(profit),
        round(elasticity, 2), profit_margin
    ])

df_expanded[[
    "price", "cost", "promotion", "seasonality", "sales",
    "sales_value", "profit", "price_elasticity", "profit_margin"
]] = df_expanded.apply(generate_sales, axis=1)

# --- CURRENT PRODUCT INFORMATION (one row per SKU) ---
latest_ids = df_expanded.groupby('id')['date'].idxmax()  # get last record index for each SKU
latest_records = df_expanded.loc[latest_ids]

current_product_information = pd.DataFrame([
    {
        'sku_id': row['id'],
        'base_price': metadata[row['id']]['base_price'],
        'base_demand': metadata[row['id']]['base_demand'],
        'elasticity': metadata[row['id']]['elasticity'],
        'margin': metadata[row['id']]['margin']
    }
    for _, row in latest_records.iterrows()
])


# --- SPLIT DATASETS ---
historical_data = df_expanded[df_expanded['date'] <= '2025-08-01']
forecast_data = df_expanded[df_expanded['date'] > '2025-08-01']

# --- FORECAST ---
forecast_data = forecast_data[['id', 'product', 'category', 'date']]
historical_average = historical_data.groupby(['id', 'product', 'category'])['sales'].mean().round().reset_index()
forecast_data = forecast_data.merge(historical_average, on=['id', 'product', 'category'], how='left')

# --- INVENTORY ---
inventory_data = historical_average.rename(columns={'sales':'stock'})
inventory_data['stock'] = inventory_data['stock'] * np.random.randint(2, 5, size=len(inventory_data))

# --- COMPETITOR PRICING ---
our_price = historical_data.groupby(['id', 'product', 'category'])['price'].mean().round().reset_index()
competitior_price = our_price.copy()
competitior_price['price'] = competitior_price['price'].map(lambda x:round(x*np.random.choice([0.8,0.9,1.1,1.2])))
competitior_price['promotion'] = competitior_price.apply(lambda x: np.random.choice(['0','0.1','0.2','BOGO','BTGO'], p=[0.2,0.1,0.1,0.3,0.3]),axis=1)

promotion_to_price_mapping = {'0':1,'0.1':0.9,'0.2':0.8,'BOGO':0.5,'BTGO':0.7}
competitior_price['price_change'] = competitior_price['promotion'].map(promotion_to_price_mapping)
competitior_price['price'] = competitior_price['price'] * competitior_price['price_change']
competitior_price = competitior_price.rename(columns={"price": "competitor_price", "promotion": "competitor_promotion"})

# --- MERGE INTO HISTORICAL DATA ---
# historical_data = historical_data.merge(
#     competitior_price[['id', 'product', 'category', 'competitor_price', 'competitor_promotion']],
#     on=['id', 'product', 'category'],
#     how='left'
# )

# --- SAVE TO SQLITE ---
dataframes = {
    "historical_data": historical_data,
    "forecast_data": forecast_data,
    "inventory_data": inventory_data,
    "competitior_price": competitior_price,
    "current_product_information":current_product_information
}

conn = sqlite3.connect("enhanced_retail_data.db")
for table_name, df in dataframes.items():
    df.to_sql(table_name, conn, if_exists="replace", index=False)
    print(f"✅ Saved {table_name} to SQLite")
conn.close()


✅ Saved historical_data to SQLite
✅ Saved forecast_data to SQLite
✅ Saved inventory_data to SQLite
✅ Saved competitior_price to SQLite


In [6]:
for i in dataframes.keys():
    print(i)
    print(dataframes[i].head())

historical_data
   id  product category       date   price   cost  promotion  seasonality  \
0   1  t-shirt      men 2023-06-01  1061.0  703.0       20.0          1.1   
1   1  t-shirt      men 2023-07-01  1067.0  703.0       20.0          1.1   
2   1  t-shirt      men 2023-08-01  1326.0  703.0        0.0          1.1   
3   1  t-shirt      men 2023-09-01  1330.0  703.0        0.0          0.9   
4   1  t-shirt      men 2023-10-01  1323.0  703.0        0.0          0.9   

   sales  sales_value   profit  price_elasticity  profit_margin  
0   62.0      65782.0  22196.0             -1.12           0.34  
1   62.0      66154.0  22568.0             -1.12           0.34  
2   41.0      54366.0  25543.0             -1.12           0.47  
3   34.0      45220.0  21318.0             -1.12           0.47  
4   34.0      44982.0  21080.0             -1.12           0.47  
forecast_data
   id  product category       date  sales
0   1  t-shirt      men 2025-09-01   45.0
1   1  t-shirt      men 202

In [None]:
historical_data_renaming_dict = {
    'id': 'sku_id',
    'product': 'product_name',
    'price': 'unit_price',
    'cost': 'unit_cost',
    'promotion': 'discount_pct',
    'seasonality': 'seasonality_factor',
    'sales': 'units_sold',
    'sales_value': 'revenue'
}

forecast_data_rename_dict = {
    'id': 'sku_id',
    'product': 'product_name',
    'sales': 'units_sale'
}

inventory_data_rename_dict = rename_dict = {
    'id': 'sku_id',
    'product': 'product_name'
}

competitor_data_rename_dict =  {
    'id': 'sku_id',
    'product': 'product_name',
    'competitor_price': 'unit_price',
    'competitor_promotion': 'promotion',
    'price_change': 'discount_pct'
}








In [None]:
[sku_id, product_name, category, date, unit_price, unit_cost, discount_pct, seasonality_factor, units_sold, revenue, profit]
['id', 'product', 'category', 'date', 'price', 'cost', 'promotion',
       'seasonality', 'sales', 'sales_value', 'profit', 'price_elasticity',
       'profit_margin']

In [None]:
TABLE: historical_data - Contains historical monthly-level sales, pricing, and profit data for each SKU.

COLUMN: sku_id - Unique identifier for each product-category combination.

COLUMN: product_name - Name of the specific product.

COLUMN: category - Category or department the product belongs to.

COLUMN: date - Month (or month-start date) representing the sales period.

COLUMN: unit_price - Actual selling price per unit after applying discounts.

COLUMN: unit_cost - Cost to produce or acquire one unit of the product.

COLUMN: discount_pct - Percentage discount applied to the base price.

COLUMN: seasonality_factor - Seasonal adjustment factor reflecting demand fluctuations.

COLUMN: units_sold - Total quantity sold for the SKU during the given period.

COLUMN: revenue - Total revenue generated = unit_price * units_sold.

COLUMN: profit - Total profit = (unit_price - unit_cost) * units_sold.

#########################################################################################

TABLE: current_product_information - Stores reference or metadata information about each SKU used for pricing and forecasting.

COLUMN: sku_id - Unique identifier for each product-category combination.

COLUMN: base_price - Reference or standard list price of the product.

COLUMN: base_demand - Baseline expected demand level for the product.

COLUMN: elasticity - Price elasticity coefficient indicating sensitivity of demand to price changes.

COLUMN: margin - Target profit margin ratio derived from elasticity or business rules.

#########################################################################################
TABLE: forecast_data - Contains forecasted monthly unit sales for each SKU based on predictive modeling.

COLUMN: sku_id - Unique identifier for each product-category combination.

COLUMN: product_name - Name of the specific product.

COLUMN: category - Category or department the product belongs to.

COLUMN: date - Forecast month or future period.

COLUMN: units_sale - Forecasted number of units expected to be sold.

#########################################################################################

TABLE: inventory_data - Tracks current stock levels for each SKU in the inventory.

COLUMN: sku_id - Unique identifier for each product-category combination.

COLUMN: product_name - Name of the specific product.

COLUMN: category - Category or department the product belongs to.

COLUMN: stock - Current quantity of the SKU available in inventory.

#########################################################################################

TABLE: competitior_information - Captures competitor pricing and promotion details for comparative analysis.

COLUMN: sku_id - Unique identifier representing the same or equivalent SKU.

COLUMN: product_name - Name of the product for cross-reference with competitors.

COLUMN: category - Product category for comparison.

COLUMN: unit_price - Competitor’s selling price for the product.

COLUMN: promotion - Competitor’s promotion or offer label (e.g., "BOGO","NONE" or discount value).

COLUMN: discount_pct - Discount percentage applied by the competitor, if available.
"""
  