# AI Pipeline for Predictive Quality Assurance in Manufacturing

**Copyright (c) 2026 Shrikara Kaudambady. All rights reserved.**

This notebook implements a multi-modal AI pipeline for quality assurance. The system analyzes both **image data** (a picture of a part) and **sensor data** (from the manufacturing process) to detect defects. It then automatically generates a natural language diagnostic report for the repair team.

### 1. Setup and Library Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras import layers, models

sns.set_theme(style="whitegrid")

### 2. Data Simulation
We'll generate a synthetic dataset. Each 'part' will have an ID, sensor readings, an image, and a label ('OK' or 'Defective'). Defective parts will have anomalies in both their sensor data and their image.

In [None]:
def generate_qa_data(n_samples=1000, img_size=64):
    np.random.seed(42)
    data = []
    images = []
    labels = []

    for i in range(n_samples):
        part_id = f"PART-{i:04d}"
        is_defective = np.random.rand() > 0.8 # 20% defective rate
        
        # Generate Sensor Data
        if is_defective:
            temp = np.random.normal(250, 15) # Higher temp for defects
            pressure = np.random.normal(105, 5)
            vibration = np.random.normal(40, 8) # Higher vibration for defects
            label = 1
        else:
            temp = np.random.normal(220, 5)
            pressure = np.random.normal(100, 2)
            vibration = np.random.normal(20, 2)
            label = 0
        
        data.append({'part_id': part_id, 'temperature': temp, 'pressure': pressure, 'vibration': vibration})
        labels.append(label)
        
        # Generate Image Data
        image = np.ones((img_size, img_size, 1)) * 0.5 + np.random.normal(0, 0.02, (img_size, img_size, 1))
        if is_defective:
            # Add a 'crack' to the image
            x1, y1 = np.random.randint(10, img_size-10, 2)
            x2, y2 = x1 + np.random.randint(-15, 15), y1 + np.random.randint(15, 25)
            cv2.line(image, (x1, y1), (x2, y2), (0, 0, 0), 1) # Using cv2 notation for simplicity
        images.append(np.clip(image, 0, 1))
        
    return pd.DataFrame(data), np.array(images), np.array(labels)

# A simple line drawing function to avoid a full OpenCV dependency for the notebook
def cv2_line_mock(img, pt1, pt2, color, thickness):
    from skimage.draw import line
    rr, cc = line(pt1[1], pt1[0], pt2[1], pt2[0])
    img[rr, cc] = color[0]
    return img
cv2.line = cv2_line_mock


df_sensors, images, labels = generate_qa_data()
print("Data simulation complete.")
print("Sensor Data Shape:", df_sensors.shape)
print("Image Data Shape:", images.shape)
print("Labels Shape:", labels.shape)

### 3. Train the Vision Model (CNN)
First, we train a simple CNN to classify images as 'OK' or 'Defective'.

In [None]:
X_train_img, X_test_img, y_train_img, y_test_img = train_test_split(images, labels, test_size=0.2, random_state=42, stratify=labels)

vision_model = models.Sequential([
    layers.Input(shape=(64, 64, 1)),
    layers.Conv2D(16, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])
vision_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
vision_model.fit(X_train_img, y_train_img, epochs=5, batch_size=32, validation_split=0.1, verbose=1)

### 4. Train the Sensor Data Model (Random Forest)
Next, we train a Random Forest model on the tabular sensor data.

In [None]:
X_sensors = df_sensors[['temperature', 'pressure', 'vibration']]
X_train_sens, X_test_sens, y_train_sens, y_test_sens = train_test_split(X_sensors, labels, test_size=0.2, random_state=42, stratify=labels)

sensor_model = RandomForestClassifier(n_estimators=100, random_state=42)
sensor_model.fit(X_train_sens, y_train_sens)
print("Sensor model training complete.")
print("Sensor Model Accuracy:", sensor_model.score(X_test_sens, y_test_sens))

### 5. Build the Full QA Pipeline with Report Generation
Now we create a class that encapsulates both models and contains the logic to fuse their results and generate a diagnostic report.

In [None]:
class QualityAssurancePipeline:
    def __init__(self, vision_model, sensor_model):
        self.vision_model = vision_model
        self.sensor_model = sensor_model
    
    def _generate_report(self, part_id, vision_pred, sensor_pred):
        vision_status = "Defect Detected" if vision_pred > 0.5 else "OK"
        sensor_status = "Anomaly Detected" if sensor_pred == 1 else "OK"
        
        # Fusion Logic
        if vision_status == "Defect Detected":
            final_status = "REJECTED"
            notes = "A visual defect (crack) was identified by the vision system."
            action = "Route part to manual inspection and repair station."
        elif sensor_status == "Anomaly Detected":
            final_status = "FLAGGED FOR REVIEW"
            notes = "Sensor data showed anomalies during production. No visual defect was found, but part may have internal stress."
            action = "Route part to advanced screening (e.g., X-ray). Monitor production machine for calibration issues."
        else:
            final_status = "APPROVED"
            notes = "No issues detected by vision or sensor models."
            action = "Proceed to next manufacturing step."
            
        # Template-based Natural Language Generation
        report = f"""
        *** AUTOMATED QA REPORT ***
        --------------------------------------------------
        PART ID:         {part_id}
        TIMESTAMP:       {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}
        
        FINAL STATUS:    {final_status}
        --------------------------------------------------
        EVIDENCE:
        - Vision System:   {vision_status}
        - Sensor System:   {sensor_status}
        
        NOTES:           {notes}
        
        RECOMMENDED ACTION: {action}
        --------------------------------------------------
        """
        return report

    def analyze_part(self, part_id, image_data, sensor_data):
        # Ensure data is in the correct format
        image_input = np.expand_dims(image_data, axis=0) # Add batch dimension
        sensor_input = sensor_data.values.reshape(1, -1) # Reshape for single prediction
        
        # Get predictions from both models
        vision_prediction = self.vision_model.predict(image_input)[0][0]
        sensor_prediction = self.sensor_model.predict(sensor_input)[0]
        
        # Generate the final report
        report = self._generate_report(part_id, vision_prediction, sensor_prediction)
        print(report)

### 6. Run Demonstration
Let's test the full pipeline on a defective part and a normal part from our test set.

In [None]:
# Initialize the pipeline with our trained models
qa_pipeline = QualityAssurancePipeline(vision_model, sensor_model)

# --- Scenario 1: Analyze a DEFECTIVE part ---
defective_idx = np.where(y_test_img == 1)[0][0]
part_id_defective = df_sensors.loc[X_test_sens.index[defective_idx], 'part_id']
image_defective = X_test_img[defective_idx]
sensors_defective = X_test_sens.iloc[defective_idx]

qa_pipeline.analyze_part(part_id_defective, image_defective, sensors_defective)

# --- Scenario 2: Analyze a NORMAL part ---
normal_idx = np.where(y_test_img == 0)[0][0]
part_id_normal = df_sensors.loc[X_test_sens.index[normal_idx], 'part_id']
image_normal = X_test_img[normal_idx]
sensors_normal = X_test_sens.iloc[normal_idx]

qa_pipeline.analyze_part(part_id_normal, image_normal, sensors_normal)