In [1]:
import numpy as np
import pandas as pd
import torch.nn as nn
import PIL
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True #imagenes ligeramente corruptas OK
from transformers import AutoImageProcessor
from transformers import AutoModelForImageClassification

In [2]:
data=pd.read_feather("data/stage1.feather")

In [3]:
data["predicted"]=np.full(data.shape[0],"")

In [4]:
data["certainty"]=np.full(data.shape[0],"")

In [5]:
model_name="./models-legacy/swinv2-base-patch4-window16-256-retrained-f1-85/"

In [15]:
device="cuda:2"

In [16]:
model = AutoModelForImageClassification.from_pretrained(model_name).to(device)

In [17]:
image_processor = AutoImageProcessor.from_pretrained(model_name)

In [18]:
def load_image(path):
    return PIL.Image.open(path)

In [19]:
def process(image):
    return image_processor(images=image, return_tensors="pt").to(model.device)["pixel_values"]

In [20]:
def consume(pixels):
    logits=model(pixel_values=pixels).logits
    return nn.functional.softmax(logits, dim=-1).detach().cpu().numpy().flatten()

In [21]:
def classify(probabilities):
    idx=probabilities.argmax(-1).item()
    return model.config.id2label[idx]

In [22]:
from fastprogress import progress_bar as pb

In [23]:
for idx,r in pb(data.iterrows(), data.shape[0]):
    try:
        probs=consume(process(load_image(r["path"].replace("\\","/"))))
        cl=classify(probs)
        crt=probs.max()
    except Exception as E:
        print("ERROR:",E)
        print(r)
        cl="UNK"
        crt=0
    data.loc[idx,"predicted"]=cl
    data.loc[idx,"certainty"]=crt

ERROR: Unsupported number of image dimensions: 2
path                 images\2000\dic\p1215003.jpg
volcano_certainty                               0
day_night                                       0
has_fume                                        0
is_explosion                                    0
predicted                                     UNK
certainty                                       0
Name: 135, dtype: object
ERROR: Unsupported number of image dimensions: 2
path                 images\2000\dic\p1219008.jpg
volcano_certainty                               0
day_night                                       0
has_fume                                        0
is_explosion                                    0
predicted                                     UNK
certainty                                       0
Name: 156, dtype: object
ERROR: Unsupported number of image dimensions: 2
path                 images\2000\dic\p122400a.jpg
volcano_certainty                               0
day

In [24]:
data

Unnamed: 0,path,volcano_certainty,day_night,has_fume,is_explosion,predicted,certainty
0,images\1999\ene\p0131991.jpg,10,10,8,4,EXP+FUM,0.999914
1,images\1999\ene\p0131992.jpg,10,8,9,2,EXP+FUM,0.999994
2,images\1999\ene\p0131993.jpg,10,10,10,0,FUM,0.998226
3,images\1999\ene\p0131994.jpg,10,10,10,0,FUM,0.999997
4,images\1999\ene\p0131995.jpg,10,10,10,2,FUM,0.99649
...,...,...,...,...,...,...,...
20172,images\2024\image_95.jpg,-1,-1,-1,-1,UNK,0.816377
20173,images\2024\image_96.jpg,-1,-1,-1,-1,UNK,0.999911
20174,images\2024\image_97.jpg,-1,-1,-1,-1,FUM,0.999968
20175,images\2024\image_98.jpg,-1,-1,-1,-1,INA,0.927969


In [34]:
data.drop(data[(data["predicted"]=="UNK") & (data["certainty"]>=.999)].index, inplace=True)

In [37]:
data.sort_values(by=["predicted", "certainty"],inplace=True,ascending=False)

In [41]:
data.reset_index(drop=True, inplace=True)

In [42]:
data

Unnamed: 0,path,volcano_certainty,day_night,has_fume,is_explosion,predicted,certainty
0,images\2006\jun\p0622063.jpg,-1,-1,-1,-1,UNK,0.998998
1,images\2023\jul\p0704235.jpg,-1,-1,-1,-1,UNK,0.998997
2,images\2013\sep\p0926132.jpg,-1,-1,-1,-1,UNK,0.998995
3,images\2019\ene\p0120193.jpg,-1,-1,-1,-1,UNK,0.998994
4,images\2018\jul\p0723181.jpg,-1,-1,-1,-1,UNK,0.998969
...,...,...,...,...,...,...,...
19345,images\2015\may\p0530155.jpg,-1,-1,-1,-1,EXP,0.489222
19346,images\2016\oct\p1028164.jpg,-1,-1,-1,-1,EXP,0.461512
19347,images\2018\dic\p1231184.jpg,-1,-1,-1,-1,EXP,0.459877
19348,images\2018\jun\p0603184.jpg,-1,-1,-1,-1,EXP,0.421862


In [43]:
data.to_feather(f"data/stage2.feather")