### **Problem 3: Material Flow Trace and Cleanup**

You're investigating if specific material types are being mixed up or mislabeled across batches.

**Your tasks:**

1. Standardize `Material Type` and `Color` fields (e.g., title case, strip spaces).
2. Extract the numeric part of the `Machine Barcode` and validate it’s 13 digits.
3. Remove or flag any rows with invalid barcodes.
4. Create a composite key (`Material` + `Color` + `Lot Number`) and check for duplicates.
5. Group by `Material Type` and compute average batch weight and scrap.
6. Identify if any `Material Type` shows high scrap and low weight simultaneously.
7. Recommend whether to isolate any material for QA recheck based on findings.

*Hint: Combine logical conditions in your flagging logic.*

In [196]:
import pandas as pd
import numpy as np
import re

In [197]:
data = pd.read_csv('Spool_Manufacturing_Batch_Log.csv')

In [198]:
df = pd.DataFrame(data)

In [199]:
df.head(3)

Unnamed: 0,Batch ID,Date Produced,Material Type,Color,Production Line,Weight (g),Scrap Rate (%),Pass/Fail,Operator,Phone,Email,Shift,Machine Barcode,Lot Number
0,eb6221c8-f45a-49f6-8c0c-ee28f5a29fc0,2025-05-01,PLA,Black,Line 2,1024.84,1.79,Pass,Jacqueline Bass,001-988-061-3911x7775,haynesdavid@yahoo.com,Shift C,MCH-001,L9935
1,9748d109-45e1-4bb0-98af-53396946b791,2025-05-01,PLA,Red,Line 1,1032.38,4.28,Pass,Kristen Cole,300-905-2906x4997,theodore63@yahoo.com,Shift A,MCH-001,L4257
2,35de154c-67d6-4144-a9e2-8afe65353fb2,2025-05-01,ABS,Blue,Line 4,988.29,1.65,Pass,Sherry Bryant,001-741-699-1830x254,timothy04@knox.net,Shift C,MCH-001,L3615


In [200]:
# 1. Standardize `Material Type` and `Color` fields (e.g., title case, strip spaces).
df['Standardized Material'] = df['Material Type'].str.title().str.strip()

In [201]:
df['Standardized Color'] = df['Color'].str.title().str.strip()

In [202]:
df[['Material Type', 'Standardized Material', 'Color', 'Standardized Color']].head(3)

Unnamed: 0,Material Type,Standardized Material,Color,Standardized Color
0,PLA,Pla,Black,Black
1,PLA,Pla,Red,Red
2,ABS,Abs,Blue,Blue


In [203]:
# 2. Extract the numeric part of the `Machine Barcode` and validate it’s 3 digits.
pattern_to_check = r'^[A-Z+]{3}-(\d{3}+)$'

In [204]:
df['Barcode Numeric Part'] = df['Machine Barcode'].str.extract(pattern_to_check)

In [205]:
df[['Machine Barcode', 'Barcode Numeric Part']].head()

Unnamed: 0,Machine Barcode,Barcode Numeric Part
0,MCH-001,1
1,MCH-001,1
2,MCH-001,1
3,MCH-003,3
4,MCH-001,1


In [206]:
df['Is last 3 Barcode Digits?'] = df['Barcode Numeric Part'].apply(lambda x: 'TRUE' if re.fullmatch(r'\d{3}', str(x)) else 'FALSE')

In [207]:
df[['Machine Barcode', 'Barcode Numeric Part', 'Is last 3 Barcode Digits?']].head()

Unnamed: 0,Machine Barcode,Barcode Numeric Part,Is last 3 Barcode Digits?
0,MCH-001,1,True
1,MCH-001,1,True
2,MCH-001,1,True
3,MCH-003,3,True
4,MCH-001,1,True


In [208]:
# 3. Remove or flag any rows with invalid barcodes.
valid_barcode_pattern = r'^([MCH]{3}-\d{3}+)$'

In [209]:
df['Is Barcode Valid?'] = df['Machine Barcode'].apply(lambda x: 'Valid' if re.fullmatch(valid_barcode_pattern, str(x)) else 'Invalid')

In [210]:
df[['Machine Barcode', 'Barcode Numeric Part', 'Is last 3 Barcode Digits?', 'Is Barcode Valid?']].head()

Unnamed: 0,Machine Barcode,Barcode Numeric Part,Is last 3 Barcode Digits?,Is Barcode Valid?
0,MCH-001,1,True,Valid
1,MCH-001,1,True,Valid
2,MCH-001,1,True,Valid
3,MCH-003,3,True,Valid
4,MCH-001,1,True,Valid


In [211]:
# 4. Create a composite key (`Material` + `Color` + `Lot Number`) and check for duplicates.
df['Composite Key'] = df['Material Type'] + df['Color'] + df['Lot Number']

In [212]:
df['Composite Key'] = df['Composite Key'].str.strip()

In [213]:
df['Is Composite Key Duplicated'] = df['Composite Key'].duplicated()

In [214]:
df[df['Is Composite Key Duplicated'] == True]['Composite Key']

460    PLAWhiteL1920
Name: Composite Key, dtype: object

In [215]:
duplicate_keys = df[df['Composite Key'].duplicated(keep=False)]  # to see al rows with duplicated keys

In [216]:
duplicate_keys

Unnamed: 0,Batch ID,Date Produced,Material Type,Color,Production Line,Weight (g),Scrap Rate (%),Pass/Fail,Operator,Phone,...,Shift,Machine Barcode,Lot Number,Standardized Material,Standardized Color,Barcode Numeric Part,Is last 3 Barcode Digits?,Is Barcode Valid?,Composite Key,Is Composite Key Duplicated
181,f6da2d6e-ad58-411f-b93e-50d01c229539,2025-05-11,PLA,White,Line 4,994.56,2.6,Pass,Paul Macdonald,493-813-3210x19538,...,Shift A,MCH-003,L1920,Pla,White,3,True,Valid,PLAWhiteL1920,False
460,335a6dd7-33be-4e4c-8785-4c3171607e7d,2025-05-26,PLA,White,Line 6,1053.68,0.46,Pass,Paul Macdonald,493-813-3210x19538,...,Shift A,MCH-003,L1920,Pla,White,3,True,Valid,PLAWhiteL1920,True


In [217]:
# 5. Group by `Material Type` and compute average batch weight and scrap.
average_weight_per_material = round(df.groupby('Material Type')['Weight (g)'].mean())

In [218]:
average_weight_per_material

Material Type
ABS     998.0
PLA    1006.0
TPU    1000.0
Name: Weight (g), dtype: float64

In [219]:
df['Average Weight (g) per Material'] = df['Material Type'].map(average_weight_per_material)

In [220]:
average_scrap_per_material = round(df.groupby('Material Type')['Scrap Rate (%)'].mean(), 2)

In [221]:
average_weight_per_material

Material Type
ABS     998.0
PLA    1006.0
TPU    1000.0
Name: Weight (g), dtype: float64

In [222]:
df['Average Scrap Rate (%) per Material'] = df['Material Type'].map(average_scrap_per_material)

In [223]:
df[['Material Type','Average Weight (g) per Material', 'Average Scrap Rate (%) per Material']].head()

Unnamed: 0,Material Type,Average Weight (g) per Material,Average Scrap Rate (%) per Material
0,PLA,1006.0,2.06
1,PLA,1006.0,2.06
2,ABS,998.0,2.27
3,ABS,998.0,2.27
4,PLA,1006.0,2.06


In [224]:
# Alternative solution
summary = df.groupby('Material Type')[['Weight (g)', 'Scrap Rate (%)']].sum()

In [225]:
summary

Unnamed: 0_level_0,Weight (g),Scrap Rate (%)
Material Type,Unnamed: 1_level_1,Unnamed: 2_level_1
ABS,181644.27,413.92
PLA,192119.41,394.36
TPU,167081.99,347.35


In [226]:
# 6. Identify if any `Material Type` shows high scrap and low weight simultaneously.
scrap_threshold = average_scrap_per_material.mean() - average_scrap_per_material.std()

In [227]:
weight_threshold = average_weight_per_material.mean() - average_weight_per_material

In [228]:
high_scarp_low_weight = (average_scrap_per_material > scrap_threshold) & (average_weight_per_material < weight_threshold)

In [229]:
result = pd.DataFrame(
    {
        'Average Weight (g)': average_weight_per_material,
        'Average Scrap Rate (%)': average_scrap_per_material,
        'High Scrap and Low Weight?': high_scarp_low_weight
    }
)

In [230]:
result.head()

Unnamed: 0_level_0,Average Weight (g),Average Scrap Rate (%),High Scrap and Low Weight?
Material Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ABS,998.0,2.27,False
PLA,1006.0,2.06,False
TPU,1000.0,2.08,False


In [231]:
# display flagged materials
result[result['High Scrap and Low Weight?'] == True]

Unnamed: 0_level_0,Average Weight (g),Average Scrap Rate (%),High Scrap and Low Weight?
Material Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1


In [234]:
# 7. Recommend whether to isolate any material for QA recheck based on findings.
result['Quality Recommendation'] = result['High Scrap and Low Weight?'].apply(lambda x: 'Isolate' if x else 'No Problem')

In [235]:
result[['High Scrap and Low Weight?', 'Quality Recommendation']]

Unnamed: 0_level_0,High Scrap and Low Weight?,Quality Recommendation
Material Type,Unnamed: 1_level_1,Unnamed: 2_level_1
ABS,False,No Problem
PLA,False,No Problem
TPU,False,No Problem
