In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import ImageFolder
from tqdm import tqdm


In [2]:
LEARNING_RATE = 0.001
BATCH_SIZE = 32
NUM_EPOCHS = 10

In [3]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
print("Device =", device)

Device = mps


In [8]:
# Define model for defect mask generation
class DefectMaskGenerator(nn.Module):
    def __init__(self):
        super(DefectMaskGenerator, self).__init__()

        self.encoder = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 32, 2, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 16, 2, stride=2),
            nn.ReLU(),
            nn.ConvTranspose2d(16, 1, 2, stride=2)
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

model = DefectMaskGenerator()
print(model)


DefectMaskGenerator(
  (encoder): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (decoder): Sequential(
    (0): ConvTranspose2d(64, 32, kernel_size=(2, 2), stride=(2, 2))
    (1): ReLU()
    (2): ConvTranspose2d(32, 16, kernel_size=(2, 2), stride=(2, 2))
    (3): ReLU()
    (4): ConvTranspose2d(16, 1, kernel_size=(2, 2), stride=(2, 2))
  )
)


In [4]:


class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        # Define encoder layers
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(512, 1024, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(1024, 1024, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
        )
        
        # Define decoder layers
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 1, kernel_size=1),
            nn.Sigmoid() # To ensure values between 0 and 1 for mask
        )

    def forward(self, x):
        # Encoder
        x1 = self.encoder(x)
        # Decoder
        x = self.decoder(x1)
        return x

# Example usage:
model = UNet()
# Assuming 'image' is your input image tensor with shape [batch_size, 3, height, width]
#mask = model(image)





In [7]:
# Load the defect image dataset using ImageFolder and appy transformations
DATASET_PATH = "/Users/somrawee/Coding/DataSet/CastingProduct/casting_data/casting_data/train"
dataset = ImageFolder(root=DATASET_PATH, transform=transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
]))

# Create DataLoader to handle batch loading of data
dataset_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# Initialize the model and move it to GPU
model = model.to(device)

# Define Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Training Loop
for epoch in tqdm(range(NUM_EPOCHS), position=0, leave=True):
    for i, (images, targets) in enumerate(dataset_loader):
        images = images.to(device)
        targets = targets.to(device)

        #print("Target's shape: ", targets.shape)
        #print("Images' shape: ", images.shape)

        # Forward pass
        outputs = model(images)

        #print("Output's shape: ", outputs.shape)
        
        #print(targets)

        # Compute loss
        loss = criterion(outputs, images)

        # Backward pass and update weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                  .format(epoch+1, NUM_EPOCHS, i+1, len(dataset_loader), loss.item()))
            



  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/10], Step [100/208], Loss: 0.0554
Epoch [1/10], Step [200/208], Loss: 0.0422


  return F.mse_loss(input, target, reduction=self.reduction)
 10%|█         | 1/10 [04:01<36:11, 241.29s/it]

Epoch [2/10], Step [100/208], Loss: 0.0177
Epoch [2/10], Step [200/208], Loss: 0.0097


 20%|██        | 2/10 [08:32<34:32, 259.05s/it]

Epoch [3/10], Step [100/208], Loss: 0.0049
Epoch [3/10], Step [200/208], Loss: 0.0037


 30%|███       | 3/10 [13:19<31:42, 271.77s/it]

Epoch [4/10], Step [100/208], Loss: 0.0030
Epoch [4/10], Step [200/208], Loss: 0.0033


 40%|████      | 4/10 [18:09<27:52, 278.77s/it]

Epoch [5/10], Step [100/208], Loss: 0.0025
Epoch [5/10], Step [200/208], Loss: 0.0024


 50%|█████     | 5/10 [23:49<25:04, 300.91s/it]

Epoch [6/10], Step [100/208], Loss: 0.0021
Epoch [6/10], Step [200/208], Loss: 0.0020


 60%|██████    | 6/10 [29:45<21:18, 319.56s/it]

Epoch [7/10], Step [100/208], Loss: 0.0018
Epoch [7/10], Step [200/208], Loss: 0.0016


 70%|███████   | 7/10 [34:31<15:26, 308.72s/it]

Epoch [8/10], Step [100/208], Loss: 0.0017
Epoch [8/10], Step [200/208], Loss: 0.0016


 80%|████████  | 8/10 [38:49<09:45, 292.62s/it]

Epoch [9/10], Step [100/208], Loss: 0.0014
Epoch [9/10], Step [200/208], Loss: 0.0016


 90%|█████████ | 9/10 [43:40<04:52, 292.16s/it]

Epoch [10/10], Step [100/208], Loss: 0.0015
Epoch [10/10], Step [200/208], Loss: 0.0017


100%|██████████| 10/10 [48:35<00:00, 291.59s/it]


In [8]:
# Once training is completed, generate defect mask images
def generate_defect_masks(image):
    model.eval()
    with torch.no_grad():
        output = model(image.to(device))
    return output

# Save the trained model

In [14]:
from PIL import Image
import numpy as np

image_file = "/Users/somrawee/Coding/DataSet/CastingProduct/casting_data/casting_data/train/def_front/cast_def_0_26.jpeg"
SAVED_PATH = "/Users/somrawee/GitProjects/Results"
SAVED_FILE = SAVED_PATH + "/" + "maskimage.jpg"

image = Image.open(image_file).convert("RGB")
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])
#print("input_image's shape: ", input_image.shape)
input_image = transform(image).unsqueeze(0) # Add an extra dimension for batch
print("input_image's shape: ", input_image.shape)

mask_image = generate_defect_masks(input_image)
print("mask_image's shape: ", mask_image.shape)

defect_mask = mask_image.squeeze(0).cpu().numpy()
print("defect_mask's shape: ", defect_mask.shape)
defect_mask = (defect_mask * 255).astype(np.uint8)
print("defect_mask's shape: ", defect_mask.shape)
print("defect_mask's type: ", defect_mask.dtype)
print("SAVED_FILE = ", SAVED_FILE)

np_img = np.squeeze(defect_mask, axis=0)
Image.fromarray(np_img).save(SAVED_FILE)



input_image's shape:  torch.Size([1, 3, 128, 128])
mask_image's shape:  torch.Size([1, 1, 128, 128])
defect_mask's shape:  (1, 128, 128)
defect_mask's shape:  (1, 128, 128)
defect_mask's type:  uint8
SAVED_FILE =  /Users/somrawee/GitProjects/Results/maskimage.jpg
