This script creats a clean dataset of products. Each product is created with a random number of random Parts from a processing plant. 

1. Get a dictionary of the Processing Plants and the part created
2. Get a dictionary of the Products and the parts used.
3. Randomly generate parts for a product, a name, and a price.

In [1]:
import arcpy
import pandas as pd
import arcgis
from pprint import pprint
import random
import os
from faker import Faker

In [2]:
gdb = r"C:\DemoData\KNOW\SupplyChain\SupplyChain.gdb"
pplants = "ProcessingPlants"
products = "Products"
pplantPath = os.path.join(gdb, pplants)
productsPath = os.path.join(gdb, products)

In [3]:
sdf = pd.DataFrame.spatial.from_featureclass(pplantPath)
sdf.head(5)

Unnamed: 0,OBJECTID,X_WGS1984,Y_WGS1984,Type,Name,Material,Part,Distributors,PlantID,SHAPE
0,1,-117.125496,32.761458,Processing Plant,Plant 1,"Metals, Plastics",Part 1,"Distributor 2, Distributor 4, Distributor 9, D...",1,"{""x"": -117.12549565567934, ""y"": 32.76145804719..."
1,2,29.006001,41.066001,Processing Plant,Plant 2,"Aluminum, Plastics, Steel, Metals",Part 2,"Distributor 6, Distributor 10, Distributor 12,...",2,"{""x"": 29.00600140272212, ""y"": 41.0660009628414..."
2,3,72.826002,19.077003,Processing Plant,Plant 3,"Plastics, Steel, Metals",Part 3,"Distributor 6, Distributor 10, Distributor 12,...",3,"{""x"": 72.82600233462313, ""y"": 19.0770029830576..."
3,4,90.407142,23.709915,Processing Plant,Plant 4,"Steel, Plastics",Part 4,"Distributor 2, Distributor 4, Distributor 9, D...",4,"{""x"": 90.40714232325718, ""y"": 23.7099149438166..."
4,5,120.165003,30.252996,Processing Plant,Plant 5,"Plastics, Aluminum",Part 5,"Distributor 1, Distributor 3, Distributor 7, D...",5,"{""x"": 120.16500334735883, ""y"": 30.252996010603..."


In [4]:
partsFromPlant = {}
parts = list(sdf['Part'].unique())
for part in parts:
    pplant = list(sdf.loc[sdf['Part'] == part]['Name'])[0]
    partsFromPlant[pplant]=part
print(partsFromPlant)

{'Plant 1': 'Part 1', 'Plant 2': 'Part 2', 'Plant 3': 'Part 3', 'Plant 4': 'Part 4', 'Plant 5': 'Part 5', 'Plant 6': 'Part 6', 'Plant 7': 'Part 7'}


In [5]:
from faker import Faker

fake = Faker()

def generate_company_name():
    return fake.company()

# Generate 10 random company names
for _ in range(10):
    print(generate_company_name())


Brennan and Sons
Mercer-Wade
Pearson PLC
Anderson, Ewing and Ortiz
Williams-Willis
Vasquez Inc
Carter Inc
Armstrong Ltd
Santana-Norris
Campbell-Clark


In [6]:
# Gerneate fake names

def generate_company_name_list():
    companyList = []
    fakeCompany = Faker()
    for _ in range(10): # random number
        companyList.append(fakeCompany.company())    
    return companyList

# Generate a fake price
def generate_msrp():
    fakeMSRP = Faker()
    price = f"${fakeMSRP.random_int(min=10, max=1000)}"
    return price

# Helper function to remove unwanted characters
def removeChars(arg1):
    strFromList = str(arg1)
    for char in ['[',']',"'"]:
        strFromList = strFromList.replace(char,"")
    return strFromList

In [7]:
randCompanyList = generate_company_name_list()
iCur = arcpy.da.InsertCursor(productsPath,["ProductName","Parts","Company","MSRP"])
i=1

for i in range(1, 17): #17 is arbitrary
    row=[]
    row.append(f"Product {i}")
    num = int(random.choice([2,3,4]))
    productParts = removeChars(random.sample(list(partsFromPlant.values()), num))
    row.append(productParts)
    row.append(random.choice(randCompanyList))
    row.append(generate_msrp())
    print(row)
    iCur.insertRow(row)
    i+=1
del iCur
print("Data added")

['Product 1', 'Part 3, Part 7', 'Wood, Gonzalez and Carter', '$41']
['Product 2', 'Part 3, Part 7, Part 4', 'Kim PLC', '$830']
['Product 3', 'Part 7, Part 3, Part 1, Part 4', 'Kennedy, Hanson and Aguilar', '$976']
['Product 4', 'Part 1, Part 7, Part 2', 'David-Baker', '$412']
['Product 5', 'Part 1, Part 5, Part 6, Part 3', 'Rivera-Thomas', '$380']
['Product 6', 'Part 7, Part 2, Part 5', 'Kim PLC', '$579']
['Product 7', 'Part 3, Part 5', 'Kim PLC', '$567']
['Product 8', 'Part 4, Part 1, Part 7', 'Wood, Gonzalez and Carter', '$255']
['Product 9', 'Part 1, Part 6, Part 2, Part 5', 'Kim PLC', '$258']
['Product 10', 'Part 5, Part 1', 'Kim PLC', '$837']
['Product 11', 'Part 6, Part 7, Part 4', 'Rivera-Thomas', '$410']
['Product 12', 'Part 6, Part 2', 'Wood, Gonzalez and Carter', '$667']
['Product 13', 'Part 4, Part 7, Part 5', 'Moody-Martin', '$129']
['Product 14', 'Part 3, Part 6', 'Wood, Gonzalez and Carter', '$926']
['Product 15', 'Part 6, Part 1', 'Bell Ltd', '$922']
['Product 16', 'Part