<a href="https://colab.research.google.com/github/ramvsiva/defect_classification/blob/main/defect_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [1]:
import os
import shutil
import pandas as pd
from PIL import Image
import re


def extract_classes(filename):
    # Use a regular expression to find all occurrences of 'C' followed by one or more digits
    class_pattern = re.compile(r'C(\d+)')
    class_indices = [int(match.group(1)) for match in class_pattern.finditer(filename)]
    return class_indices


def create_label_vector(class_indices, num_classes=11):
    label_vector = [0] * num_classes
    for index in class_indices:
        label_vector[index] = 1
    return label_vector


def process_directory(source_directory, target_directory, csv_filename):
    data = []
    num_classes = 12  # This should cover C0 to C10; adjust if more classes exist
    if not os.path.exists(target_directory):
        os.makedirs(target_directory)

    image_id = 0
    for subdir, dirs, files in os.walk(source_directory):
        for file in files:
            if file.startswith('written') and file.endswith('.png'):
                source_path = os.path.join(subdir, file)
                unique_filename = f"{image_id:08d}_{file}"
                target_path = os.path.join(target_directory, unique_filename)

                shutil.copy2(source_path, target_path)

                class_indices = extract_classes(file)
                label_vector = create_label_vector(class_indices, num_classes)

                data.append([unique_filename] + label_vector)

                image_id += 1

    columns = ['filename'] + [f'C{i}' for i in range(num_classes)]
    df = pd.DataFrame(data, columns=columns)
    df.to_csv(csv_filename, index=False)
    print(f"CSV file created: {csv_filename}")
    print(f"Images copied to: {target_directory}")


source_directory = '/content/drive/MyDrive/berlin/'
target_directory = 'training_images/'
csv_filename = 'image_labels.csv'

process_directory(source_directory, target_directory, csv_filename)

CSV file created: image_labels.csv
Images copied to: training_images/


In [4]:
import os
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
from transformers import ViTForImageClassification, AdamW, get_scheduler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm.auto import tqdm


class EarlyStopping:
    def __init__(self, patience=5, verbose=False, delta=0, path='path_to_save_model'):
        """
        Args:
            patience (int): How many epochs to wait after last time validation loss improved.
                            Default: 5
            verbose (bool): If True, prints a message for each validation loss improvement.
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path to save the best model
        """
        self.patience = patience
        self.verbose = verbose
        self.delta = delta
        self.path = path
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.best_loss = float('inf')

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decreases.'''
        if val_loss < self.best_loss:
            self.best_loss = val_loss
            if self.verbose:
                print(f'Validation loss decreased ({self.best_loss:.6f} --> {val_loss:.6f}). Saving model...')
            model.save_pretrained(self.path)


class CustomDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.img_labels = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path).convert('RGB')
        labels = torch.tensor(self.img_labels.iloc[idx, 1:].values.astype(float))

        if self.transform:
            image = self.transform(image)

        return image, labels


# Image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Dataset and DataLoader
dataset = CustomDataset(csv_file='image_labels.csv', img_dir='training_images', transform=transform)
train_size = int(0.95 * len(dataset))
eval_size = len(dataset) - train_size
train_dataset, eval_dataset = torch.utils.data.random_split(dataset, [train_size, eval_size])
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
eval_loader = DataLoader(eval_dataset, batch_size=16, shuffle=False)

# Load pre-trained ViT model
# Load pre-trained ViT model
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224',
                                                  num_labels=12, # Specify your number of classes here
                                                  ignore_mismatched_sizes=True) # To ignore the original head size mismatch

# Check your classifier's layer dimensions
print("New classifier weight shape:", model.classifier.weight.shape)
print("New classifier bias shape:", model.classifier.bias.shape)
device = torch.device("cuda")
model.to(device)

# Optimizer and learning rate scheduler setup
optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 60
num_training_steps = num_epochs * len(train_loader)
lr_scheduler = get_scheduler(
    name="linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
early_stopping = EarlyStopping(patience=5, verbose=True, path='trained_models')

# Training and evaluation loop
model.train()
for epoch in range(num_epochs):
    progress_bar = tqdm(train_loader, desc="Training Epoch {:1d}".format(epoch + 1), leave=False, disable=False)
    for batch in progress_bar:
        batch = [item.to(device) for item in batch]
        inputs, labels = batch
        outputs = model(inputs)
        loss = torch.nn.BCEWithLogitsLoss()(outputs.logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        lr_scheduler.step()

        progress_bar.set_postfix({'training_loss': loss.item()})

    # Evaluation phase
    model.eval()
    eval_loss = 0
    eval_steps = 0
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch in eval_loader:
            batch = [item.to(device) for item in batch]
            inputs, labels = batch
            outputs = model(inputs)
            loss = torch.nn.BCEWithLogitsLoss()(outputs.logits, labels)
            eval_loss += loss.item()
            eval_steps += 1

            preds = torch.sigmoid(outputs.logits).cpu().numpy() > 0.5
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())

    # Calculate metrics
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')
    accuracy = accuracy_score(all_labels, all_preds)
    print(
        f'Epoch {epoch + 1}: Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}, Accuracy: {accuracy:.4f}, Avg Eval Loss: {eval_loss / eval_steps:.4f}')

    # Early stopping check
    avg_eval_loss = eval_loss / eval_steps
    early_stopping(avg_eval_loss, model)
    if early_stopping.early_stop:
        print("Early stopping triggered.")
        break

# Save the model if not saved by early stopping
if not early_stopping.early_stop:
    model_path = 'trained_vit_model'
    model.save_pretrained(model_path)
    print(f'Model saved to {model_path}')


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([12]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([12, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


New classifier weight shape: torch.Size([12, 768])
New classifier bias shape: torch.Size([12])




Training Epoch 1:   0%|          | 0/58 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Epoch 1: Precision: 0.2051, Recall: 0.2323, F1: 0.2150, Accuracy: 0.6939, Avg Eval Loss: 0.1393
Validation loss decreased (0.139254 --> 0.139254). Saving model...


Training Epoch 2:   0%|          | 0/58 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Epoch 2: Precision: 0.4061, Recall: 0.3745, F1: 0.3714, Accuracy: 0.7347, Avg Eval Loss: 0.1354
Validation loss decreased (0.135360 --> 0.135360). Saving model...


Training Epoch 3:   0%|          | 0/58 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Epoch 3: Precision: 0.4086, Recall: 0.3384, F1: 0.3389, Accuracy: 0.7755, Avg Eval Loss: 0.1081
Validation loss decreased (0.108127 --> 0.108127). Saving model...


Training Epoch 4:   0%|          | 0/58 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Epoch 4: Precision: 0.4169, Recall: 0.3741, F1: 0.3750, Accuracy: 0.7959, Avg Eval Loss: 0.0993
Validation loss decreased (0.099297 --> 0.099297). Saving model...


Training Epoch 5:   0%|          | 0/58 [00:00<?, ?it/s]

Epoch 5: Precision: 0.4642, Recall: 0.3622, F1: 0.3815, Accuracy: 0.7959, Avg Eval Loss: 0.0996
EarlyStopping counter: 1 out of 5


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Training Epoch 6:   0%|          | 0/58 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Epoch 6: Precision: 0.4115, Recall: 0.3950, F1: 0.3845, Accuracy: 0.7755, Avg Eval Loss: 0.0963
Validation loss decreased (0.096272 --> 0.096272). Saving model...


Training Epoch 7:   0%|          | 0/58 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Epoch 7: Precision: 0.4169, Recall: 0.3463, F1: 0.3500, Accuracy: 0.7959, Avg Eval Loss: 0.0947
Validation loss decreased (0.094750 --> 0.094750). Saving model...


Training Epoch 8:   0%|          | 0/58 [00:00<?, ?it/s]

Epoch 8: Precision: 0.4086, Recall: 0.3741, F1: 0.3671, Accuracy: 0.7959, Avg Eval Loss: 0.1075
EarlyStopping counter: 1 out of 5


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Training Epoch 9:   0%|          | 0/58 [00:00<?, ?it/s]

Epoch 9: Precision: 0.4086, Recall: 0.3463, F1: 0.3421, Accuracy: 0.7959, Avg Eval Loss: 0.1131
EarlyStopping counter: 2 out of 5


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Training Epoch 10:   0%|          | 0/58 [00:00<?, ?it/s]

Epoch 10: Precision: 0.4086, Recall: 0.3463, F1: 0.3421, Accuracy: 0.7959, Avg Eval Loss: 0.1050
EarlyStopping counter: 3 out of 5


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Training Epoch 11:   0%|          | 0/58 [00:00<?, ?it/s]

Epoch 11: Precision: 0.4201, Recall: 0.3741, F1: 0.3841, Accuracy: 0.7959, Avg Eval Loss: 0.1043
EarlyStopping counter: 4 out of 5


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


Training Epoch 12:   0%|          | 0/58 [00:00<?, ?it/s]

Epoch 12: Precision: 0.4201, Recall: 0.3979, F1: 0.3939, Accuracy: 0.8163, Avg Eval Loss: 0.1065
EarlyStopping counter: 5 out of 5
Early stopping triggered.


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


In [11]:
import torch
from PIL import Image
from torchvision import transforms
from transformers import ViTForImageClassification


def load_model(model_path, num_labels):
    # Load the trained model
    model = ViTForImageClassification.from_pretrained(model_path, num_labels=num_labels)
    model.eval()
    model.to('cuda' if torch.cuda.is_available() else 'cpu')
    return model


def process_image(image_path):
    # Load and transform the image
    image = Image.open(image_path).convert('RGB')
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    return transform(image).unsqueeze(0)  # Add batch dimension


def predict_image(model, processed_image):
    # Predict the image
    with torch.no_grad():
        outputs = model(processed_image.to(model.device))
        predictions = torch.sigmoid(outputs.logits).squeeze(0)  # Apply sigmoid and remove batch dimension
    return predictions


def print_class_predictions(predictions, threshold=0.5):
    # Print class predictions
    class_names = [f'C{i}' for i in range(predictions.shape[0])]  # Assuming class names are like 'C0', 'C1', etc.
    predicted_labels = [class_names[i] for i, pred in enumerate(predictions) if pred > threshold]
    print("Predicted classes:", predicted_labels)

# Example usage
model_path = '/content/trained_models'  # Specify the path to your trained model directory
num_labels = 12  # Specify the correct number of classes
image_path = '/content/written_26_C1.png'  # Specify the path to your test image

model = load_model(model_path, num_labels)
processed_image = process_image(image_path)
predictions = predict_image(model, processed_image)
print_class_predictions(predictions)

Predicted classes: ['C0']
