In [1]:
import pandas as pd
from faker import Faker
import random

# Initialize Faker
fake = Faker()

# Parameters
n_staff = 1000
n_models = 50
n_parts_per_model = 20
n_sales = 1500
n_services = 1500
n_feedback = 1000
n_dealerships = 50
n_suppliers = 200
n_inventory = n_models * n_parts_per_model
n_schedule = 100

# Generate Staff Details
staff_df = pd.DataFrame({
    "staff_id": [f"S{i:04}" for i in range(1, n_staff + 1)],
    "staff_name": [fake.name() for _ in range(n_staff)],
    "position": [random.choice(["Salesperson", "Technician", "Manager", "Clerk"]) for _ in range(n_staff)],
    "department": [random.choice(["Sales", "Service", "Administration", "Management"]) for _ in range(n_staff)],
    "start_date": [fake.date_between(start_date="-5y", end_date="today") for _ in range(n_staff)],
    "email": [fake.email() for _ in range(n_staff)]
})

# Generate Car Models
model_df = pd.DataFrame({
    "model_id": [f"M{i:03}" for i in range(1, n_models + 1)],
    "model_name": [random.choice(["Corolla", "Camry", "RAV4", "Highlander", "Yaris", "Prius", "Hilux", "Fortuner"]) + f" {random.choice(['2020', '2021', '2022', '2023'])}" for _ in range(n_models)],
    "manufacturing_year": [random.choice(["2020", "2021", "2022", "2023"]) for _ in range(n_models)],
    "type": [random.choice(["Sedan", "SUV", "Truck", "Hybrid"]) for _ in range(n_models)],
    "market": ["Thailand" for _ in range(n_models)]
})

# Generate Spare Parts
parts_df = pd.DataFrame({
    "part_id": [f"P{i:05}" for i in range(1, n_models * n_parts_per_model + 1)],
    "model_id": [f"M{random.randint(1, n_models):03}" for _ in range(n_models * n_parts_per_model)],
    "serial_number": [fake.unique.bothify(text='SN-????-#####') for _ in range(n_models * n_parts_per_model)],
    "part_name": [random.choice(["Engine", "Transmission", "Brake Pad", "Steering Wheel", "Air Filter", "Oil Filter", "Headlight", "Battery"]) for _ in range(n_models * n_parts_per_model)],
    "part_type": [random.choice(["OEM", "Aftermarket"]) for _ in range(n_models * n_parts_per_model)],
    "stock_quantity": [random.randint(10, 100) for _ in range(n_models * n_parts_per_model)],
    "price": [round(random.uniform(50, 1000), 2) for _ in range(n_models * n_parts_per_model)]
})

# Generate Sales Records
sales_df = pd.DataFrame({
    "sale_id": [f"SL{i:05}" for i in range(1, n_sales + 1)],
    "model_id": [f"M{random.randint(1, n_models):03}" for _ in range(n_sales)],
    "staff_id": [f"S{random.randint(1, n_staff):04}" for _ in range(n_sales)],
    "sale_date": [fake.date_between(start_date="-2y", end_date="today") for _ in range(n_sales)],
    "sale_price": [round(random.uniform(20000, 100000), 2) for _ in range(n_sales)],
    "customer_name": [fake.name() for _ in range(n_sales)],
    "payment_method": [random.choice(["Cash", "Credit Card", "Bank Transfer"]) for _ in range(n_sales)]
})

# Generate Service Records
service_df = pd.DataFrame({
    "service_id": [f"SV{i:05}" for i in range(1, n_services + 1)],
    "car_serial_number": [fake.bothify(text='CSN-????-#####') for _ in range(n_services)],
    "model_id": [f"M{random.randint(1, n_models):03}" for _ in range(n_services)],
    "staff_id": [f"S{random.randint(1, n_staff):04}" for _ in range(n_services)],
    "service_date": [fake.date_between(start_date="-2y", end_date="today") for _ in range(n_services)],
    "service_type": [random.choice(["Routine Maintenance", "Repair", "Inspection", "Tire Change", "Battery Replacement"]) for _ in range(n_services)],
    "cost": [round(random.uniform(50, 2000), 2) for _ in range(n_services)]
})

# Generate Customer Feedback
feedback_df = pd.DataFrame({
    "feedback_id": [f"FB{i:04}" for i in range(1, n_feedback + 1)],
    "customer_name": [fake.name() for _ in range(n_feedback)],
    "model_id": [f"M{random.randint(1, n_models):03}" for _ in range(n_feedback)],
    "feedback_date": [fake.date_between(start_date="-2y", end_date="today") for _ in range(n_feedback)],
    "rating": [random.choice([1, 2, 3, 4, 5]) for _ in range(n_feedback)],
    "comments": [fake.sentence(nb_words=10) for _ in range(n_feedback)]
})

# Generate Dealerships and Showrooms
dealership_df = pd.DataFrame({
    "dealership_id": [f"D{i:03}" for i in range(1, n_dealerships + 1)],
    "name": [fake.company() + " Toyota" for _ in range(n_dealerships)],
    "location": [fake.city() for _ in range(n_dealerships)],
    "contact_number": [fake.phone_number() for _ in range(n_dealerships)],
    "email": [fake.email() for _ in range(n_dealerships)],
    "manager_id": [f"S{random.randint(1, n_staff):04}" for _ in range(n_dealerships)]
})

# Generate Supply Chain Data
supplier_df = pd.DataFrame({
    "supplier_id": [f"SP{i:04}" for i in range(1, n_suppliers + 1)],
    "part_id": [f"P{random.randint(1, n_models * n_parts_per_model):05}" for _ in range(n_suppliers)],
    "supplier_name": [fake.company() for _ in range(n_suppliers)],
    "location": [fake.city() for _ in range(n_suppliers)],
    "delivery_times": [f"{random.randint(1, 14)} days" for _ in range(n_suppliers)],
    "contact_info": [fake.phone_number() for _ in range(n_suppliers)]
})

# Generate Inventory Records
inventory_df = pd.DataFrame({
    "inventory_id": [f"I{i:05}" for i in range(1, n_inventory + 1)],
    "part_id": [f"P{i:05}" for i in range(1, n_inventory + 1)],
    "current_stock": [random.randint(0, 100) for _ in range(n_inventory)],
    "minimum_stock_level": [random.randint(10, 20) for _ in range(n_inventory)],
    "warehouse_location": [fake.city() for _ in range(n_inventory)]
})

# Generate Manufacturing Schedule
schedule_df = pd.DataFrame({
    "schedule_id": [f"MS{i:04}" for i in range(1, n_schedule + 1)],
    "model_id": [f"M{random.randint(1, n_models):03}" for _ in range(n_schedule)],
    "start_date": [fake.date_between(start_date="-1y", end_date="today") for _ in range(n_schedule)],
    "end_date": [fake.date_between(start_date="today", end_date="+1y") for _ in range(n_schedule)],
    "quantity": [random.randint(100, 1000) for _ in range(n_schedule)],
    "status": [random.choice(["Planned", "In Progress", "Completed"]) for _ in range(n_schedule)]
})

# Save CSV Files
staff_df.to_csv('../data/toyota_staff_details.csv', index=False)
model_df.to_csv('../data/toyota_car_models.csv', index=False)
parts_df.to_csv('../data/toyota_spare_parts.csv', index=False)
sales_df.to_csv('../data/toyota_sales_records.csv', index=False)
service_df.to_csv('../data/toyota_service_records.csv', index=False)
feedback_df.to_csv('../data/toyota_customer_feedback.csv', index=False)
dealership_df.to_csv('../data/toyota_dealerships_showrooms.csv', index=False)
supplier_df.to_csv('../data/toyota_supply_chain_data.csv', index=False)
inventory_df.to_csv('../data/toyota_inventory_records.csv', index=False)
schedule_df.to_csv('../data/toyota_manufacturing_schedule.csv', index=False)
