In [1]:
import cv2
import math
import pandas as pd
import torch
import re

from tqdm.notebook import tqdm
from pathlib import Path
from transformers import (
    ViTForImageClassification, 
    ViTImageProcessor)
from typing import Tuple

!pip install xlsxwriter



In [2]:
# === Codigo usado para copiar as imagens do dataset com oclusao todas para uma unica pasta

from pathlib import Path

regex = r"\w+(?= )"

dst_dir = Path(r"C:\Users\12ped\Desktop\UNIFESP2_completo")
dst_dir.mkdir(parents=True, exist_ok=True)

dataset_path = Path(r"C:\Users\12ped\Desktop\COM APARATO - 10 RN")

for f_path in dataset_path.rglob('*'):
   if f_path.is_file() and f_path.suffix in ['.jpg', '.jpeg', '.png']:
       infant_id = re.match(regex, f_path.parent.name)[0]
       dst_path = dst_dir / f'{infant_id}_{f_path.name}'
       dst_path.write_bytes(f_path.read_bytes()) # Copy file

In [3]:
# = Data (Images + Face masks)
#FULL_IMAGES_PATH = Path(r'D:\ComputerScience\Mestrado\data\datasets\Occlusion\images')
#FULL_IMAGES_PATH = Path(r'D:\ComputerScience\Mestrado\data\UNIFESP\all')
#CROPS_PATH = Path(r'D:\ComputerScience\Mestrado\lncc\data\Results\Exp1-Mosaic\UNIFESP 360\Dataset\Regioes\com_dor')

# FULL_IMAGES_PATH = Path(r'D:\ComputerScience\Mestrado\lncc\data\Datasets\UNIFESP2_completo\com_dor')
# CROPS_PATH = Path(r'D:\ComputerScience\Mestrado\lncc\data\Results\Exp1-Mosaic\UNIFESP2_completo\Dataset\Regioes\sem_dor')

FULL_IMAGES_PATH = Path(r'C:\Users\12ped\Desktop\UNIFESP2_completo')
CROPS_PATH = Path(r'D:\ComputerScience\Mestrado\lncc\data\Results\Exp1-Mosaic\UNIFESP2_completo\Dataset\Regioes\sem_dor')

# = Trained models
VIT_PATH = Path(r'D:\ComputerScience\Mestrado\results\classification_models\ViT')

# = Mapping variables
ID2LABEL = {0: 'presente', 1: 'ausente'}
ACRONYM2REGION = {
    'BA':'Boca',
    'BE':'Boca',
    'FP':'Fenda palpebral',
    'FS':'Fronte',
    'SN':'Sulco nasolabial',
}
REGION2ACRONYM = {
    region: acronym for acronym, region in ACRONYM2REGION
}

# = Aux constants
BATCH_SIZE = 10

# = Excel writer
writer = pd.ExcelWriter("results.xlsx", engine="xlsxwriter")

In [4]:
def load_vit(training_set_name:str, region:str) -> Tuple[ViTForImageClassification,ViTImageProcessor]:
    model_path = VIT_PATH / training_set_name / region
    model = ViTForImageClassification.from_pretrained(model_path)
    processor = ViTImageProcessor.from_pretrained(model_path)
    model.to('cuda')
    return model, processor

In [5]:
def process_images_vit(image_paths, model, processor):
    images = [cv2.cvtColor(cv2.imread(str(img_path)), cv2.COLOR_BGR2RGB) for img_path in image_paths]
    inputs = processor(images=images, return_tensors="pt") # pt = torch.tensor
    inputs.to('cuda')
    logits = model(**inputs).logits
    return [model.config.id2label[lgt.argmax(-1).item()] for lgt in logits]


In [6]:
def highlight(col):
    """Process a pandas dataframe to set background color to red or green depending on the cell value. Use with dataframe.style.apply."""
    return ['background-color: red;' if value == 0 else 'background-color: green;' for value in col.values]

def batch(iterable, n=1):
    """Create a batched list"""
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

---

## CLASSIFICAR COM IMAGEM COMPLETA

In [8]:
DATASET = 'UNIFESP360RegiaoImagem'

# = Image paths
#img_files = sorted(list(FULL_IMAGES_PATH.glob('*')), key=lambda a: int(a.stem[1:]))
img_files = sorted(list(FULL_IMAGES_PATH.glob('*')))

cols = {region: [] for region in ACRONYM2REGION.keys()}
index = [f.stem for f in img_files]
for region in tqdm(ACRONYM2REGION.keys()):
    vit_model, vit_processor = load_vit(DATASET, region)
    for files_batch in tqdm(batch(img_files, n=BATCH_SIZE), total=math.ceil(len(img_files)/BATCH_SIZE)):
        vit_predictions = process_images_vit(files_batch, vit_model, vit_processor)
        cols[region].extend(vit_predictions)
        del files_batch

df = pd.DataFrame(cols, index=index)
df_image = df.style.apply(highlight, axis=0)
display(df_image)

df_image.to_excel(writer, sheet_name=DATASET)

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/18 [00:00<?, ?it/s]

  0%|          | 0/18 [00:00<?, ?it/s]

  0%|          | 0/18 [00:00<?, ?it/s]

  0%|          | 0/18 [00:00<?, ?it/s]

  0%|          | 0/18 [00:00<?, ?it/s]

Unnamed: 0,BA,BE,FP,FS,SN
10_20190417_113901,0,1,1,1,1
10_20190417_113908,0,1,1,0,1
10_20190417_113915,0,1,1,0,1
10_20190417_113929,0,1,1,1,1
10_20190417_113954,0,1,1,1,1
10_20190417_114013,0,0,1,1,1
10_20190417_114020,1,1,1,1,1
10_20190417_114024,1,1,1,1,1
10_20190417_114026,1,1,1,1,1
10_20190417_114029,1,1,1,1,1


---

# Classificar com recorte das partes da face

In [7]:
DATASET = 'UNIFESP360RegiaoRecorte'

cols = {region: [] for region in ACRONYM2REGION.keys()}
files = [[f for f in list((CROPS_PATH/region).glob('*'))] for region in ACRONYM2REGION.values()] # Get all file names
files_set = list(map(lambda paths: set(f.stem for f in paths), files))
index = sorted(set.intersection(*files_set)) # Get the intersection of file names (so we only process the files that have all regions, just in case)
if len(diff := (set.difference(set.union(*files_set), index))): # union - intersection
    print('Some regions are missing, please check if these files exists in all folders:', ','.join(list(diff)))
else:
    for region in tqdm(ACRONYM2REGION.keys()):
        img_files = sorted(list((CROPS_PATH/ACRONYM2REGION[region]).glob('*')))
        vit_model, vit_processor = load_vit(DATASET, region)
        for files_batch in tqdm(batch(img_files, n=BATCH_SIZE), total=math.ceil(len(img_files)/BATCH_SIZE)):
            try:
                vit_predictions = process_images_vit(files_batch, vit_model, vit_processor)
            except Exception as e:
                print(e)
            cols[region].extend(vit_predictions)

    df = pd.DataFrame(cols, index=index)
    df_crop = df.style.apply(highlight, axis=0)
    display(df_crop)

    df_crop.to_excel(writer, sheet_name=DATASET)

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

Unnamed: 0,BA,BE,FP,FS,SN
10_20190417_113901,0,1,1,1,0
10_20190417_113908,0,1,1,1,0
10_20190417_113915,0,0,1,1,1
10_20190417_113929,0,1,1,1,1
10_20190417_113954,0,1,1,1,1
10_20190417_114013,0,1,1,1,0
10_20190417_114020,0,1,1,1,0
10_20190417_114024,1,1,1,1,0
10_20190417_114026,1,1,1,1,1
10_20190417_114029,0,1,1,1,1


In [16]:
writer.close() # Save changes and close