In [1]:
# EDA PROJECT
# The goal of this project is to simulate a realistic dataset using the Faker library and perform Exploratory Data Analysis (EDA) to uncover insights, patterns, and anomalies. 
# This approach allows practicing data analysis techniques on synthetic data that mimics real-world scenarios 


# Step1: Import libraries
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from faker import Faker


In [2]:
!pip install faker




[notice] A new release of pip is available: 23.3.1 -> 25.3
[notice] To update, run: C:\Users\Micheal\AppData\Local\Programs\Python\Python311\python.exe -m pip install --upgrade pip


In [3]:
# INITIATE FAKER FOR GENERATING FAKE DATA
fake = Faker()

In [7]:
# Defining the base limit

Categories = {
    "Furniture": ["Office Chair", "Study Table", "Sofa", "Bookshelf", "Dining Table"],
    "Office Supplies": ["Pen", "Notebook", "Stapler", "File Folder", "Calculator"],
    "Electronics": ["Laptop", "Keyboard", "Mouse", "Headphones", "Monitor"],
    "Grocery": ["Rice Bag", "Cooking Oil", "Sugar", "Snacks", "Juice Pack"]
}

Regions = ["North", "West", "South","East"]
Payment_modes = ["Cash", "Credit Card", "Transfer", "Net Banking"]
Delivery_status = ["Delivered", "Pending", "Returned", "Cancelled"]
Customer_segments = ["Consumer","Corporate", "Home Office"]

In [8]:
# Generating the datasets
records = []  # Empty list to store all rows

for i in range(10000):  # 10000 fake orders
    order_id = f"ORD{1000 + i}"
    order_date = fake.date_between(start_date='-3y', end_date='today')
    ship_date = order_date + timedelta(days=random.randint(1, 7))

    # Creation of customer details
    customer_name = fake.name()
    customer_id = f"CUST{random.randint(100, 999)}"
    customer_segment = random.choice(Customer_segments)

    # Creation product category we listed above
    category = random.choice(list(Categories.keys()))
    product_name = random.choice(Categories[category])
    product_id = f"PROD{random.randint(100, 999)}"

    region = random.choice(Regions)
    state = fake.state()
    city = fake.city()

    quantity = random.randint(1, 10)
    unit_price = random.randint(100, 5000)
    discount = random.choice([0, 5, 10, 15, 20])

    sales_amount = quantity * unit_price * (1 - discount / 100)
    cost_price = sales_amount * random.uniform(0.6, 0.9)
    profit = sales_amount - cost_price

    # Creation of an inventory record to track stock and products level  
    stock_left = random.randint(0, 50)
    if stock_left < 10:
        auto_reorder = 'Yes'
        reorder_quantity = random.randint(20, 50)
    else:
        auto_reorder = 'No'
        reorder_quantity = 0
        
    # Creation of a fake supplier company names 
    supplier_name = fake.company()
    supplier_email = fake.company_email()
    payment_mode = random.choice(Payment_modes)
    delivery = random.choice(Delivery_status)

    # ✅ Append the row here
    records.append({
        "order_id": order_id,
        "order_date": order_date,
        "ship_date": ship_date,
        "customer_name": customer_name,
        "customer_id": customer_id,
        "customer_segment": customer_segment,
        "category": category,
        "product_name": product_name,
        "product_id": product_id,
        "region": region,
        "state": state,
        "city": city,
        "quantity": quantity,
        "unit_price": unit_price,
        "discount": discount,
        "sales_amount": sales_amount,
        "cost_price": cost_price,
        "profit": profit,
        "stock_left": stock_left,
        "auto_reorder": auto_reorder,
        "reorder_quantity": reorder_quantity,
        "supplier_name": supplier_name,
        "supplier_email": supplier_email,
        "payment_mode": payment_mode,
        "delivery": delivery
    })


In [11]:
# Creation of the DataFrame 

data = pd.DataFrame(records)

In [12]:
data.head(20)

Unnamed: 0,order_id,order_date,ship_date,customer_name,customer_id,customer_segment,category,product_name,product_id,region,...,sales_amount,cost_price,profit,stock_left,auto_reorder,reorder_quantity,supplier_name,supplier_email,payment_mode,delivery
0,ORD1000,2024-01-20,2024-01-21,Christopher Allen,CUST225,Home Office,Electronics,Keyboard,PROD441,West,...,18335.0,11272.868667,7062.131333,43,No,0,"Lee, Roberts and Jenkins",alexander75@tate.com,Cash,Cancelled
1,ORD1001,2025-08-04,2025-08-06,Christopher Fernandez,CUST794,Consumer,Grocery,Snacks,PROD759,North,...,30763.8,23967.284001,6796.515999,20,No,0,Lopez-Grant,mitchelldavid@vega-howe.net,Net Banking,Pending
2,ORD1002,2024-07-09,2024-07-11,Jonathan Carter,CUST413,Home Office,Electronics,Mouse,PROD372,North,...,6679.8,5113.834279,1565.965721,8,Yes,36,Horn-Medina,xhenry@bush.com,Credit Card,Cancelled
3,ORD1003,2024-11-30,2024-12-02,Amanda Wilson,CUST406,Consumer,Furniture,Dining Table,PROD290,West,...,7879.5,5373.609872,2505.890128,37,No,0,Williams Inc,websterstephanie@vargas.com,Transfer,Pending
4,ORD1004,2025-07-04,2025-07-09,Megan Jimenez,CUST907,Home Office,Furniture,Bookshelf,PROD299,South,...,20345.2,15686.444701,4658.755299,12,No,0,Hopkins Group,jamesrangel@wright.com,Transfer,Pending
5,ORD1005,2022-12-08,2022-12-15,Carly Stein,CUST681,Home Office,Grocery,Juice Pack,PROD216,South,...,37728.0,32331.868395,5396.131605,47,No,0,Evans Inc,kathleenguerrero@montoya.com,Transfer,Cancelled
6,ORD1006,2023-03-03,2023-03-05,William Sanchez,CUST374,Corporate,Office Supplies,Stapler,PROD977,North,...,9184.6,5839.97827,3344.62173,2,Yes,24,Aguilar-Martinez,sking@ortega.com,Transfer,Pending
7,ORD1007,2024-09-14,2024-09-21,Melissa Kramer,CUST708,Home Office,Office Supplies,Stapler,PROD431,East,...,49220.0,41906.721099,7313.278901,21,No,0,"Ruiz, Walter and Campbell",samantha00@rivera-carter.com,Credit Card,Cancelled
8,ORD1008,2025-01-15,2025-01-22,Michael Phillips,CUST129,Corporate,Electronics,Keyboard,PROD538,East,...,21416.4,18291.098941,3125.301059,4,Yes,47,"Alexander, Russell and Diaz",bobbycastro@rice-navarro.info,Credit Card,Cancelled
9,ORD1009,2023-02-22,2023-02-26,Charles Lambert,CUST304,Corporate,Office Supplies,File Folder,PROD977,South,...,14448.0,12847.143475,1600.856525,22,No,0,"Smith, Chandler and Rice",bmacias@jones.net,Credit Card,Cancelled


In [13]:
data = pd.DataFrame(records)
try:
    data.to_csv("Superstore_Management_System.csv", index=False)
    print("Dataset generated successfully")
except PermissionError:
    print("Please close the file")


Dataset generated successfully
