In [1]:
import random
import pandas as pd
from faker import Faker
from datetime import timedelta, datetime  # Import datetime

In [2]:
data_path = "./data/"

In [3]:
fake = Faker()

In [4]:
# Configuration
NUM_CLIENTS = 50  # Example value
NUM_COMPANIES = 20
NUM_SUPPLIERS = 30
NUM_SUBCONTRACTORS = 40
NUM_MATERIALS = 100
NUM_PROJECTS = 60
NUM_TASKS = 150
NUM_PURCHASEORDERS = 100
NUM_INVOICES = 80

In [5]:
# 1. Generate Clients
clients = []
for i in range(NUM_CLIENTS):
    clients.append({
        "id": f"client_{i}",
        "name": fake.company() + " (Client)",
        "contactInfo": fake.phone_number()
    })

# 2. Generate Construction Companies
companies = []
for i in range(NUM_COMPANIES):
    companies.append({
        "id": f"company_{i}",
        "companyName": fake.company() + " Construction",
        "licenseNumber": f"LIC-{random.randint(10000, 99999)}",
        "location": fake.city()
    })

# 3. Generate Suppliers
suppliers = []
for i in range(NUM_SUPPLIERS):
    suppliers.append({
        "id": f"supplier_{i}",
        "supplierName": fake.company() + " Supplies",
        "location": fake.city(),
        "rating": round(random.uniform(1.0, 5.0), 1)
    })

# 4. Generate SubContractors
specialties = ["Electrical", "Plumbing", "HVAC", "Roofing", "Carpentry", "Masonry", "Painting", "Flooring"]
subContractors = []
for i in range(NUM_SUBCONTRACTORS):
    subContractors.append({
        "id": f"subcontractor_{i}",
        "subContractorName": fake.company() + " " + random.choice(specialties),
        "specialty": random.choice(specialties),
        "licenseNumber": f"SUB-{random.randint(10000, 99999)}"
    })

# 5. Generate Materials
material_types = ["Lumber", "Cement", "Steel Rebar", "Bricks", "Tiles", "Glass", "Insulation", "Pipe", "Wiring", "Drywall", "Concrete Blocks"]
materials = []
for i in range(NUM_MATERIALS):
    mat_type = random.choice(material_types)
    materials.append({
        "id": f"material_{i}",
        "materialName": mat_type,
        "unitCost": round(random.uniform(1.0, 500.0), 2)
    })

# 6. Generate Projects
project_statuses = ["Planning", "In Progress", "On Hold", "Completed", "Cancelled"]
projects = []
for i in range(NUM_PROJECTS):
    p_client = random.choice(clients)
    p_company = random.choice(companies)
    start_date = fake.date_between(start_date='-3y', end_date='today')
    end_date = start_date + timedelta(days=random.randint(60, 730))  # up to 2 years from start
    proj_status = random.choice(project_statuses)
    projects.append({
        "id": f"project_{i}",
        "projectName": f"Project_{i}_{fake.word()}",
        "location": fake.city(),
        "startDate": start_date.isoformat(),
        "endDate": end_date.isoformat(),
        "totalBudget": round(random.uniform(100000, 3000000), 2),
        "status": proj_status,
        "clientID": p_client["id"],
        "companyID": p_company["id"]
    })

# 7. Generate Tasks
tasks = []
for i in range(NUM_TASKS):
    t_project = random.choice(projects)
    # Corrected: Convert start and end dates to datetime objects
    t_start = fake.date_between(start_date=datetime.fromisoformat(t_project["startDate"]), end_date=datetime.fromisoformat(t_project["endDate"]))
    # end date is after start date, but not beyond project end date
    t_end = t_start + timedelta(days=random.randint(1, 90))
    if t_end > datetime.fromisoformat(t_project["endDate"]).date(): #Corrected Comparison with task end date and project end date
        t_end = fake.date_between(start_date=t_start, end_date=datetime.fromisoformat(t_project["endDate"]))

    t_status = random.choice(["Not Started", "In Progress", "Completed", "Blocked"])
    cost_est = round(random.uniform(1000, 50000), 2)

    # assigned to either a sub or the main construction company
    assigned_entity_type = random.choice(["sub", "main"])
    if assigned_entity_type == "sub":
        assignedID = random.choice(subContractors)["id"]
    else:
        assignedID = t_project["companyID"]

    # random sample of 0-3 materials used
    num_mats = random.randint(0, 3)
    used_material_ids = []
    if num_mats > 0:
        used_material_ids = [random.choice(materials)["id"] for _ in range(num_mats)]

    tasks.append({
        "id": f"task_{i}",
        "taskName": f"Task_{i}_{fake.word()}",
        "startDate": t_start.isoformat(),
        "endDate": t_end.isoformat(),
        "status": t_status,
        "costEstimate": cost_est,
        "projectID": t_project["id"],
        "assignedTo": assignedID,
        "materialIDs": used_material_ids
    })

# 8. Generate PurchaseOrders
purchaseOrders = []
for i in range(NUM_PURCHASEORDERS):
    po_num = f"PO-{random.randint(10000, 99999)}"
    po_project = random.choice(projects)
    po_supplier = random.choice(suppliers)
    # Corrected: Convert start and end dates to datetime objects
    po_date = fake.date_between(start_date=datetime.fromisoformat(po_project["startDate"]), end_date=datetime.fromisoformat(po_project["endDate"]))
    total_cost = round(random.uniform(500.0, 50000.0), 2)
    purchaseOrders.append({
        "id": f"po_{i}",
        "orderNumber": po_num,
        "orderDate": po_date.isoformat(),
        "totalCost": total_cost,
        "supplierID": po_supplier["id"],
        "projectID": po_project["id"]
    })

# 9. Generate Invoices
invoices = []
for i in range(NUM_INVOICES):
    inv_num = f"INV-{random.randint(10000, 99999)}"
    inv_date = fake.date_between(start_date='-2y', end_date='today')
    inv_amount = round(random.uniform(1000.0, 50000.0), 2)
    inv_status = random.choice(["Open", "Paid", "Overdue", "Cancelled"])
    # invoicer can be a SubContractor or a Supplier
    invoicer_type = random.choice(["sub", "sup"])
    if invoicer_type == "sub":
        invoicerID = random.choice(subContractors)["id"]
    else:
        invoicerID = random.choice(suppliers)["id"]
    # invoicedTo: pick random construction company
    to_company = random.choice(companies)["id"]

    invoices.append({
        "id": f"invoice_{i}",
        "invoiceNumber": inv_num,
        "invoiceDate": inv_date.isoformat(),
        "amount": inv_amount,
        "status": inv_status,
        "invoicedByID": invoicerID,
        "invoicedToID": to_company
    })

# Summaries
print("Number of Clients:", len(clients))
print("Number of Construction Companies:", len(companies))
print("Number of Suppliers:", len(suppliers))
print("Number of SubContractors:", len(subContractors))
print("Number of Materials:", len(materials))
print("Number of Projects:", len(projects))
print("Number of Tasks:", len(tasks))
print("Number of PurchaseOrders:", len(purchaseOrders))
print("Number of Invoices:", len(invoices), "\n")

# Print a few samples
print("Sample Client:", clients[0])
print("Sample ConstructionCompany:", companies[0])
print("Sample Supplier:", suppliers[0])
print("Sample SubContractor:", subContractors[0])
print("Sample Material:", materials[0])
print("Sample Project:", projects[0])
print("Sample Task:", tasks[0])
print("Sample PurchaseOrder:", purchaseOrders[0] if purchaseOrders else None)
print("Sample Invoice:", invoices[0] if invoices else None)

Number of Clients: 50
Number of Construction Companies: 20
Number of Suppliers: 30
Number of SubContractors: 40
Number of Materials: 100
Number of Projects: 60
Number of Tasks: 150
Number of PurchaseOrders: 100
Number of Invoices: 80 

Sample Client: {'id': 'client_0', 'name': 'Hall, Gregory and Hart (Client)', 'contactInfo': '836-860-6391'}
Sample ConstructionCompany: {'id': 'company_0', 'companyName': 'Forbes and Sons Construction', 'licenseNumber': 'LIC-83636', 'location': 'Portershire'}
Sample Supplier: {'id': 'supplier_0', 'supplierName': 'Miller, Martinez and Gonzalez Supplies', 'location': 'Holmeshaven', 'rating': 2.8}
Sample SubContractor: {'id': 'subcontractor_0', 'subContractorName': 'Johnston PLC Plumbing', 'specialty': 'Carpentry', 'licenseNumber': 'SUB-23949'}
Sample Material: {'id': 'material_0', 'materialName': 'Bricks', 'unitCost': 232.05}
Sample Project: {'id': 'project_0', 'projectName': 'Project_0_should', 'location': 'Lake Daniel', 'startDate': '2024-10-16', 'endDat

In [6]:
# persist the data
pd.DataFrame(clients).to_csv(data_path+"clients.csv", encoding = "utf-8", escapechar = "\\", index=False)
pd.DataFrame(companies).to_csv(data_path+"companies.csv", encoding = "utf-8", escapechar = "\\", index=False)
pd.DataFrame(suppliers).to_csv(data_path+"suppliers.csv", encoding = "utf-8", escapechar = "\\", index=False)
pd.DataFrame(subContractors).to_csv(data_path+"subContractors.csv", encoding = "utf-8", escapechar = "\\", index=False)
pd.DataFrame(materials).to_csv(data_path+"materials.csv", encoding = "utf-8", escapechar = "\\", index=False)
pd.DataFrame(projects).to_csv(data_path+"projects.csv", encoding = "utf-8", escapechar = "\\", index=False)
pd.DataFrame(purchaseOrders).to_csv(data_path+"purchaseOrders.csv", encoding = "utf-8", escapechar = "\\", index=False)
pd.DataFrame(invoices).to_csv(data_path+"invoices.csv", encoding = "utf-8", escapechar = "\\", index=False)