In [1]:
import pandas as pd

In [2]:
r_product_component = pd.read_csv('cleaned_data/r_product_component.csv', delimiter=';')
r_supplier_component = pd.read_csv('cleaned_data/r_supplier_component.csv', delimiter=';')

In [None]:
# Grouping suppliers for each component: get a unique list of suppliers and the average price
supplier_group = (
    r_supplier_component
    .groupby('component_id')
    .agg(
        suppliers=('supplier_id', lambda x: list(x.unique())),
        avg_supplier_price=('price', 'mean')
    )
)

# Grouping products for each component: get a unique list of products that use the component
product_group = (
    r_product_component
    .groupby('component_id')
    .agg(
        products=('product_id', lambda x: list(x.unique())),
        bom_price=('expected_price', 'mean')
    )
)

# Merge the two groups on the component_id index
matrix = supplier_group.join(product_group, how='outer').reset_index()

# If there are components without product data, fill with an empty list
matrix['products'] = matrix['products'].apply(lambda x: x if isinstance(x, list) else [])

# Reorder columns
matrix = matrix[['component_id', 'suppliers', 'products', 'avg_supplier_price', 'bom_price']]
matrix

Unnamed: 0,component_id,suppliers,products,avg_supplier_price,bom_price
0,A.01,[Supplier 2],"[A, B, C, D]",3.898074,4.392500
1,A.02,"[Supplier 3, Supplier 7, Supplier 9]","[A, B, C, D]",6.107868,6.100000
2,A.03,[Supplier 1],"[A, B, D]",11.669604,11.670000
3,A.04,"[Supplier 2, Supplier 14, Supplier 17]","[A, B, D]",17.850343,17.850000
4,A.05,"[Supplier 11, Supplier 12]","[A, B, D]",3.885458,3.881667
...,...,...,...,...,...
56,D.10,[Supplier 6],[D],13.671246,13.670000
57,D.11,"[Supplier 3, Supplier 10, Supplier 19, Supplie...",[D],5.540362,5.470000
58,D.12,"[Supplier 4, Supplier 6, Supplier 7, Supplier ...",[D],7.244711,7.360000
59,D.13,,[D],,8.240000


In [12]:
# Filter for rows where 'suppliers' is NaN (empty)
empty_suppliers = matrix[matrix['suppliers'].isna()]
empty_suppliers

Unnamed: 0,component_id,suppliers,products,avg_supplier_price,bom_price
17,A.18,,[A],,7.0
18,A.19,,[A],,73.0
20,B.07,,"[B, C]",,7.16
49,C.16,,"[C, D]",,1.375
50,C.17,,[D],,0.02
51,C.18,,[D],,0.01
52,C.19,,[D],,0.01
53,C.20,,[D],,78.0
54,C.21,,[D],,54.0
59,D.13,,[D],,8.24
