In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json

# Import Clean data
# clean_df = pd.read_pickle("train_set.pkl")
# val_df = pd.read_pickle("val_set.pkl")

# Convert NumPy arrays to JSON before saving
# clean_df['Wafer_Map'] = clean_df['Wafer_Map'].apply(lambda x: json.dumps(x.tolist()))
# val_df['Wafer_Map'] = val_df['Wafer_Map'].apply(lambda x: json.dumps(x.tolist()))

# Save to CSV
# clean_df.to_csv("train_set.csv", index=False)
# val_df.to_csv("val_set.csv", index=False)

# Import Clean data
clean_df = pd.read_csv('train_set.csv')
val_df = pd.read_csv('val_set.csv')

# For WSL2
# data_dir = "/mnt/c/Users/custu/My Drive/MSDS 422/M10"
# clean_df = pd.read_csv(f"{data_dir}/train_set.csv")
# val_df = pd.read_csv(f"{data_dir}/val_set.csv")

# Decode JSON to NumPy arrays
clean_df['Wafer_Map'] = clean_df['Wafer_Map'].apply(lambda x: np.array(json.loads(x), dtype=np.uint8))
val_df['Wafer_Map'] = val_df['Wafer_Map'].apply(lambda x: np.array(json.loads(x), dtype=np.uint8))



In [2]:
import tensorflow as tf

# Set GPU as default
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
        tf.config.set_visible_devices(gpus[0], 'GPU')
        print("Using GPU:", gpus[0])
    except RuntimeError as e:
        print(e)


2025-04-02 23:47:12.863952: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-02 23:47:13.409042: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-04-02 23:47:14.957943: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


Using GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


W0000 00:00:1743662835.951170   30171 gpu_device.cc:2429] TensorFlow was not built with CUDA kernel binaries compatible with compute capability 12.0. CUDA kernels will be jit-compiled from PTX, which could take 30 minutes or longer.


In [3]:
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

### CNN Models

In [4]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical
import tensorflow.keras.optimizers.schedules as schedules
from tensorflow.keras.metrics import Precision, Recall
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import keras_tuner as kt

random_state = 2

# Prepare data
X_train = np.stack(clean_df['Wafer_Map'].values)  
X_train = X_train.reshape(-1, 45, 45, 1).astype('float32')  
X_test = np.stack(val_df['Wafer_Map'].values)  
X_test = X_test.reshape(-1, 45, 45, 1).astype('float32') 

# Encode target variable
label_encoder = pd.factorize(clean_df['Defect_Class'])
y_train = label_encoder[0] 
y_classes = label_encoder[1]
y_test = val_df['Defect_Class'].map({cls: i for i, cls in enumerate(y_classes)}).fillna(-1).astype(int).values

# Convert to OHE
y_train = to_categorical(y_train, num_classes=len(y_classes))
y_test = to_categorical(y_test, num_classes=len(y_classes))

# Build model
def build_model(hp):
    model = Sequential()
    
    # First Convolutional Block
    model.add(Conv2D(filters=hp.Choice('conv1_filters', values=[16]),kernel_size=(5, 5), activation='relu', input_shape=(45, 45, 1)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(hp.Choice('dropout1', values=[0.0]))) 

    # Second Convolutional Block
    model.add(Conv2D(filters=hp.Choice('conv2_filters', values=[64]),  kernel_size=(3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(hp.Choice('dropout2', values=[0.0])))  

    # Third Convolutional Block
    model.add(Conv2D(filters=hp.Choice('conv3_filters', values=[128]), kernel_size=(3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(hp.Choice('dropout3', values=[0.0]))) 

    # Dense Layers + Output
    model.add(Flatten())
    model.add(Dense(hp.Choice('dense_units', values=[256]),activation='relu'))
    model.add(Dropout(hp.Choice('dropout_fc', values=[0.4]))) 
    model.add(Dense(hp.Choice('dense_units2', values=[128]),activation='relu'))
    model.add(Dropout(hp.Choice('dropout_fc2', values=[0.4]))) 
    model.add(Dense(9, activation='softmax'))

    # Compile model
    model.compile(optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate', values=[0.001])),
        loss='categorical_crossentropy',metrics=['categorical_accuracy', Precision(), Recall()])
    return model

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


# Hyperband tuner
tuner = kt.Hyperband(
    build_model,
    objective='val_categorical_accuracy',
    max_epochs=30, 
    factor=3, 
    directory='hyperband_tuning',
    project_name='CNN_Model_1_v2'
)
tuner.search(X_train, y_train, epochs=30, validation_data=(X_test, y_test), callbacks=[early_stopping])

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hps.values)

# Start time
import time
start_time = time.time()


# Train
best_model = tuner.hypermodel.build(best_hps)
history = best_model.fit(X_train, y_train, epochs=50, batch_size=256, validation_data=(X_test, y_test), callbacks=[early_stopping])

# End time
end_time = time.time()
print(f"Training Time: {end_time - start_time:.2f} seconds")

# Evaluate
start_time = time.time()
eval_results = best_model.evaluate(X_test, y_test)
end_time = time.time()
print(f"Evaluation Time: {end_time - start_time:.2f} seconds")    
print(f"Validation Accuracy: {eval_results[1]:.4f}")    
print(f"Training Accuracy: {history.history['categorical_accuracy'][-1]:.4f}")

# Get predictions
y_pred = best_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1) 
y_true_classes = np.argmax(y_test, axis=1) 

# Confusion matrix
cm = confusion_matrix(y_true_classes, y_pred_classes)

# Normalize confusion matrix
cm_normalized = cm.astype('float') / cm.sum(axis=1, keepdims=True)

# Plot normalized confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm_normalized, annot=True, fmt='.2f', cmap='Blues', xticklabels=y_classes, yticklabels=y_classes)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Normalized Confusion Matrix Custom CNN')
plt.show()

# Classification report
print(classification_report(y_true_classes, y_pred_classes, target_names=y_classes))

W0000 00:00:1743662836.483932   30171 gpu_device.cc:2429] TensorFlow was not built with CUDA kernel binaries compatible with compute capability 12.0. CUDA kernels will be jit-compiled from PTX, which could take 30 minutes or longer.
I0000 00:00:1743662836.630018   30171 gpu_device.cc:2018] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13067 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 5080, pci bus id: 0000:01:00.0, compute capability: 12.0
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-04-02 23:47:16.969544: W tensorflow/compiler/mlir/tools/kernel_gen/tf_gpu_runtime_wrappers.cc:40] 'cuModuleLoadData(&module, data)' failed with 'CUDA_ERROR_INVALID_PTX'

2025-04-02 23:47:16.969585: W tensorflow/compiler/mlir/tools/kernel_gen/tf_gpu_runtime_wrappers.cc:40] 'cuModuleGetFunction(&function, module, kernel_name)' failed with 'CUDA_ERROR_INVALID_HANDLE'

2025-04-02 23:47:16.969595: W tensorflow/core/framework/op_kernel.cc:1843] INTERNAL: 

InternalError: {{function_node __wrapped__Cast_device_/job:localhost/replica:0/task:0/device:GPU:0}} 'cuLaunchKernel(function, gridX, gridY, gridZ, blockX, blockY, blockZ, 0, reinterpret_cast<CUstream>(stream), params, nullptr)' failed with 'CUDA_ERROR_INVALID_HANDLE' [Op:Cast] name: 

In [None]:
# Print model
print(best_model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 41, 41, 16)        416       
                                                                 
 batch_normalization (BatchN  (None, 41, 41, 16)       64        
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 20, 20, 16)       0         
 )                                                               
                                                                 
 dropout (Dropout)           (None, 20, 20, 16)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 18, 18, 64)        9280      
                                                                 
 batch_normalization_1 (Batc  (None, 18, 18, 64)       2