In [5]:
import pandas as pd
import os
import torch
import numpy as np
import PIL

#from datasets import load_dataset, Image, Dataset
#from transformers import AutoFeatureExtractor, ViTFeatureExtractor,ViTForImageClassification,TrainingArguments, Trainer, BeitFeatureExtractor, TrainerCallback
#from torchvision.transforms import (
#    CenterCrop,
#    Compose,
#    Normalize,
#    RandomHorizontalFlip,
#    RandomResizedCrop,
#    Resize,
#    ToTensor)

from sklearn.metrics import f1_score, accuracy_score
from sklearn.metrics import classification_report



# Data exploration

In [6]:
import os
os.getcwd()

'C:\\Users\\paulc\\colas-deep-learning\\model'

In [7]:
data = pd.read_csv('labels_train.csv')
data.head()

Unnamed: 0,filename,FISSURE,REPARATION,FISSURE LONGITUDINALE,FAÏENCAGE,MISE EN DALLE
0,BDCAEROD0000000017183099_runway_3_gridsize_512...,0,0,1,1,0
1,BDCAEROD0000000017183055_runway_1_gridsize_512...,0,0,1,0,0
2,BDCAEROD0000000017183118_runway_1_gridsize_512...,1,0,1,0,0
3,BDCAEROD0000000017183028_runway_1_gridsize_512...,1,0,0,0,0
4,BDCAEROD0000000017183088_runway_1_gridsize_512...,0,0,0,0,0


# Converting images to dataset object

In [3]:
# Storing all the paths to images in a dict

path_start = os.getcwd() + "\\dataset\\train\\"
list_path = [path_start + filename for filename in os.listdir('dataset/train') ]
path_dict = {"image":list_path}

# Converting the dict to a dataset object

dataset = Dataset.from_dict(path_dict).cast_column("image", Image())
dataset[0]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=512x512 at 0x2643766C220>}

### Label management

In [4]:
data['label'] = data.apply(lambda x: [x.FISSURE, x.REPARATION, x['FISSURE LONGITUDINALE'], x.FAÏENCAGE, x['MISE EN DALLE']] , axis=1)
data['label']

0      [0, 0, 1, 1, 0]
1      [0, 0, 1, 0, 0]
2      [1, 0, 1, 0, 0]
3      [1, 0, 0, 0, 0]
4      [0, 0, 0, 0, 0]
            ...       
825    [0, 0, 0, 0, 0]
826    [1, 1, 1, 0, 1]
827    [0, 1, 0, 1, 0]
828    [0, 1, 1, 1, 0]
829    [0, 0, 0, 1, 0]
Name: label, Length: 830, dtype: object

In [5]:
data.filename[0]

'BDCAEROD0000000017183099_runway_3_gridsize_512_idx_7_idy_0.jpg'

### Creating the column of labels to be added to the dataset

In [6]:
column_of_labels = []

for i in range(dataset.shape[0]):
    filename = list_path[i][65:]
    row_dataset = data[data.filename == filename]
    list_label = list(row_dataset['label'])[0]
    list_label = np.array(list_label, dtype = np.float32).tolist()
    column_of_labels.append(list_label)
    
column_of_labels[:15]

[[0.0, 0.0, 0.0, 0.0, 0.0],
 [1.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0],
 [1.0, 0.0, 1.0, 0.0, 1.0],
 [0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0],
 [1.0, 0.0, 1.0, 0.0, 1.0],
 [1.0, 0.0, 1.0, 1.0, 1.0],
 [1.0, 0.0, 1.0, 1.0, 1.0],
 [0.0, 0.0, 0.0, 0.0, 0.0]]

In [7]:
dataset = dataset.add_column(name="label", column=column_of_labels)

In [8]:
dataset[0]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=512x512 at 0x26437720430>,
 'label': [0.0, 0.0, 0.0, 0.0, 0.0]}

In [13]:
ALL_LABELS = ['FISSURE','REPARATION','FISSURE LONGITUDINALE','FAÏENCAGE','MISE EN DALLE']

In [14]:
id2label = {k:l for k, l in enumerate(ALL_LABELS)}
label2id = {l:k for k, l in enumerate(ALL_LABELS)}

In [15]:
id2label

{0: 'FISSURE',
 1: 'REPARATION',
 2: 'FISSURE LONGITUDINALE',
 3: 'FAÏENCAGE',
 4: 'MISE EN DALLE'}

In [16]:
label2id

{'FISSURE': 0,
 'REPARATION': 1,
 'FISSURE LONGITUDINALE': 2,
 'FAÏENCAGE': 3,
 'MISE EN DALLE': 4}

# Extracting pixel data from our dataset

In [17]:
model_checkpoint = "google/vit-base-patch16-224" # pre-trained model from which to fine-tune
batch_size = 4 # batch size for training and evaluation

In [18]:
feature_extractor = ViTFeatureExtractor.from_pretrained(model_checkpoint)
feature_extractor 

NameError: name 'ViTFeatureExtractor' is not defined

### Splitting

In [15]:
splits = dataset.train_test_split(test_size=0.2)

train_ds = splits['train']
val_ds = splits['test']

### Defining data augmentation fonctions and creating dataset

In [16]:
normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
train_transforms = Compose(
        [
            RandomResizedCrop(feature_extractor.size),
            RandomHorizontalFlip(),
            ToTensor(),
            normalize,
        ]
    )

val_transforms = Compose(
        [
            Resize(feature_extractor.size),
            CenterCrop(feature_extractor.size),
            ToTensor(),
            normalize,
        ]
    )

def preprocess_train(example_batch):
    """Apply train_transforms across a batch."""
    example_batch["pixel_values"] = [
        train_transforms(image.convert("RGB")) for image in example_batch["image"]
    ]
    return example_batch

def preprocess_val(example_batch):
    """Apply val_transforms across a batch."""
    example_batch["pixel_values"] = [val_transforms(image.convert("RGB")) for image in example_batch["image"]]
    return example_batch

In [17]:
train_ds.set_transform(preprocess_train)
val_ds.set_transform(preprocess_train)

In [30]:
train_ds[0]['pixel_values'].shape

torch.Size([3, 224, 224])

## Model building

5

In [18]:
model = ViTForImageClassification.from_pretrained(
    model_checkpoint,
    num_labels = 5,
    problem_type="multi_label_classification",
    ignore_mismatched_sizes = True, # provide this in case you're planning to fine-tune an already fine-tuned checkpoint,
    use_auth_token='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'
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([5, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([5]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:
model.save_pretrained(model_checkpoint)
feature_extractor.save_pretrained(model_checkpoint)

In [20]:
def collate_fn(examples):
    pixel_values = torch.stack([example["pixel_values"] for example in examples])
    labels = torch.tensor([example["label"] for example in examples])
    return {"pixel_values": pixel_values, "labels": labels}

# Using default trainer

In [None]:
args = TrainingArguments(
    model_checkpoint,
    remove_unused_columns=False,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=7,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    push_to_hub=False
)

trainer = Trainer(
    model,
    args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    tokenizer=feature_extractor,
    data_collator=collate_fn,
    #hub_token = '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'
)

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

In [None]:
train_results = trainer.train()
# rest is optional but nice to have
#trainer.save_model()
#trainer.log_metrics("train", train_results.metrics)
#trainer.save_metrics("train", train_results.metrics)
#trainer.save_state()

In [None]:
trainer.evaluate()

In [24]:
x=range(0,5)

4

# Using custom trainer

### Defining precisely the loss we want

In [21]:
GLOBAL_SCORE_INDICES = range(0, 5)

def get_preds_from_logits(logits):
    ret = np.zeros(logits.shape)
    
    # We fill 1 to every class whose score is higher than some threshold
    # In this example, we choose that threshold = 0.0
    ret[:, GLOBAL_SCORE_INDICES] = np.array(logits[:, GLOBAL_SCORE_INDICES] >= 0.0).astype(int)
    
    return ret

In [22]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    final_metrics = {}
    
    # Deduce predictions from logits
    predictions = get_preds_from_logits(logits)

    # The global f1_metrics
    final_metrics["f1_micro"] = f1_score(labels, predictions, average="micro")
    final_metrics["f1_macro"] = f1_score(labels, predictions, average="macro")
    final_metrics["f1_weight"] = f1_score(labels, predictions, average="weighted")
    
    # Classification report
    print("Classification report for global scores: ")
    print(classification_report(labels[:, GLOBAL_SCORE_INDICES], predictions[:, GLOBAL_SCORE_INDICES], zero_division=0))
    return final_metrics

### Adapting the Huggingface trainer class

In [23]:
class MultiTaskClassificationTrainer(Trainer):
    def __init__(self, group_weights=None, **kwargs):
        super().__init__(**kwargs)
        
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs[0]
        
        loss = torch.nn.functional.binary_cross_entropy_with_logits(logits[:, GLOBAL_SCORE_INDICES], labels[:, GLOBAL_SCORE_INDICES])
        
        return (loss, outputs) if return_outputs else loss

In [24]:
class PrinterCallback(TrainerCallback):
    def on_epoch_end(self, args, state, control, logs=None, **kwargs):
        print(f"Epoch {state.epoch}: ")

### Running trainer

In [25]:
training_args = TrainingArguments(
    model_checkpoint,
    remove_unused_columns=False,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=batch_size,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=4,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    push_to_hub=False,
    metric_for_best_model="f1_macro"
)

trainer = MultiTaskClassificationTrainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    callbacks=[PrinterCallback]
)

In [26]:
trainer.train()

***** Running training *****
  Num examples = 664
  Num Epochs = 4
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 4
  Total optimization steps = 164


Epoch,Training Loss,Validation Loss,F1 Micro,F1 Macro,F1 Weight
0,0.3187,0.311613,0.542373,0.344955,0.487651
1,0.2758,0.312056,0.519231,0.36309,0.486611
2,0.2125,0.255859,0.655602,0.442901,0.600154
3,0.2275,0.262213,0.688525,0.491834,0.644665


***** Running Evaluation *****
  Num examples = 166
  Batch size = 4


Epoch 0.9879518072289156: 
Classification report for global scores: 
              precision    recall  f1-score   support

           0       0.78      0.68      0.73        53
           1       0.00      0.00      0.00        22
           2       0.68      0.48      0.56        44
           3       0.00      0.00      0.00         8
           4       1.00      0.28      0.44        25

   micro avg       0.76      0.42      0.54       152
   macro avg       0.49      0.29      0.34       152
weighted avg       0.63      0.42      0.49       152
 samples avg       0.21      0.17      0.18       152



Saving model checkpoint to google/vit-base-patch16-224\checkpoint-41
Configuration saved in google/vit-base-patch16-224\checkpoint-41\config.json
Model weights saved in google/vit-base-patch16-224\checkpoint-41\pytorch_model.bin
***** Running Evaluation *****
  Num examples = 166
  Batch size = 4


Epoch 1.9879518072289155: 
Classification report for global scores: 
              precision    recall  f1-score   support

           0       0.96      0.47      0.63        53
           1       0.00      0.00      0.00        22
           2       0.95      0.41      0.57        44
           3       0.00      0.00      0.00         8
           4       1.00      0.44      0.61        25

   micro avg       0.96      0.36      0.52       152
   macro avg       0.58      0.26      0.36       152
weighted avg       0.77      0.36      0.49       152
 samples avg       0.16      0.13      0.14       152



Saving model checkpoint to google/vit-base-patch16-224\checkpoint-82
Configuration saved in google/vit-base-patch16-224\checkpoint-82\config.json
Model weights saved in google/vit-base-patch16-224\checkpoint-82\pytorch_model.bin
***** Running Evaluation *****
  Num examples = 166
  Batch size = 4


Epoch 2.9879518072289155: 
Classification report for global scores: 
              precision    recall  f1-score   support

           0       0.90      0.70      0.79        53
           1       0.00      0.00      0.00        22
           2       0.85      0.64      0.73        44
           3       0.00      0.00      0.00         8
           4       0.93      0.56      0.70        25

   micro avg       0.89      0.52      0.66       152
   macro avg       0.54      0.38      0.44       152
weighted avg       0.71      0.52      0.60       152
 samples avg       0.24      0.20      0.21       152



Saving model checkpoint to google/vit-base-patch16-224\checkpoint-123
Configuration saved in google/vit-base-patch16-224\checkpoint-123\config.json
Model weights saved in google/vit-base-patch16-224\checkpoint-123\pytorch_model.bin
***** Running Evaluation *****
  Num examples = 166
  Batch size = 4


Epoch 3.9879518072289155: 
Classification report for global scores: 
              precision    recall  f1-score   support

           0       0.95      0.72      0.82        53
           1       1.00      0.09      0.17        22
           2       0.85      0.66      0.74        44
           3       0.00      0.00      0.00         8
           4       0.94      0.60      0.73        25

   micro avg       0.91      0.55      0.69       152
   macro avg       0.75      0.41      0.49       152
weighted avg       0.88      0.55      0.64       152
 samples avg       0.25      0.22      0.23       152



Saving model checkpoint to google/vit-base-patch16-224\checkpoint-164
Configuration saved in google/vit-base-patch16-224\checkpoint-164\config.json
Model weights saved in google/vit-base-patch16-224\checkpoint-164\pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from google/vit-base-patch16-224\checkpoint-164 (score: 0.49183360568097).


TrainOutput(global_step=164, training_loss=0.3026995945994447, metrics={'train_runtime': 1628.8208, 'train_samples_per_second': 1.631, 'train_steps_per_second': 0.101, 'total_flos': 2.0520430589494886e+17, 'train_loss': 0.3026995945994447, 'epoch': 3.99})

# Trying to predict

In [28]:
from model_builder import *

In [31]:
os.chdir('/Users/paulc/colas-deep-learning'+'/model/dataset')

In [None]:
# Encode the image

image_test = test_ds[6]['image']
encoding = feature_extractor(image_test.convert("RGB"), return_tensors="pt")

# Call the model to get predictions

outputs = model(**encoding)
logits = outputs.logits

# Decode the result

preds = get_preds_from_logits(logits)
decoded_preds = [[id2label[i] for i, l in enumerate(row) if l == 1] for row in preds]
decoded_preds

In [None]:
train_ds[19]

# Defining submission