In [1]:
from transformers.utils import send_example_telemetry

send_example_telemetry("image_classification_notebook", framework="pytorch")

In [None]:
pip install -U tf-keras

In [2]:
import pandas as pd
import numpy as np
import evaluate

import os
import gc

from tqdm.notebook import tqdm
import PIL

import torch
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer, AutoImageProcessor
from datasets import Dataset, Image
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor




In [3]:
df = pd.read_csv('train.csv')
df

Unnamed: 0,unified_class,class_id,image_name
0,Оленевые,5,3cf4207b958eade893a2f1618cf062b8.JPG
1,Кошки,2,37698901280c871f426d40afe5c373cd.JPG
2,Заяц,0,20e7b30026001cbfe0b5c0ee16c9ff56.JPG
3,Кошки,2,a1bc8ea546206ee8fc0f1836fda9a5c1.JPG
4,Оленевые,5,54eb76914b84db8a0d56f98125abf588.JPG
...,...,...,...
28010,Оленевые,5,07b420b4fe265b4ed918b46435c025d7.JPG
28011,Пантеры,6,2d1c5918357bbdd729bf79085e55d35e.JPG
28012,Заяц,0,1531efa9f8687e390adf780355acd606.JPG
28013,Кабан,1,2b15eaef0ce9b57b6570709f95a4bea4.JPG


In [4]:
#del df 
#gc.collect()

In [5]:
len(set(df.class_id))

10

In [6]:
df.drop(columns=['unified_class'], inplace = True)
df

Unnamed: 0,class_id,image_name
0,5,3cf4207b958eade893a2f1618cf062b8.JPG
1,2,37698901280c871f426d40afe5c373cd.JPG
2,0,20e7b30026001cbfe0b5c0ee16c9ff56.JPG
3,2,a1bc8ea546206ee8fc0f1836fda9a5c1.JPG
4,5,54eb76914b84db8a0d56f98125abf588.JPG
...,...,...
28010,5,07b420b4fe265b4ed918b46435c025d7.JPG
28011,6,2d1c5918357bbdd729bf79085e55d35e.JPG
28012,0,1531efa9f8687e390adf780355acd606.JPG
28013,1,2b15eaef0ce9b57b6570709f95a4bea4.JPG


In [7]:
df['image_name'] = 'train/'+df['image_name']

In [8]:
df = df[df['image_name'].apply(lambda x: len(np.array(PIL.Image.open(x)).shape)==3)]

In [9]:
df

Unnamed: 0,class_id,image_name
0,5,train/3cf4207b958eade893a2f1618cf062b8.JPG
1,2,train/37698901280c871f426d40afe5c373cd.JPG
2,0,train/20e7b30026001cbfe0b5c0ee16c9ff56.JPG
3,2,train/a1bc8ea546206ee8fc0f1836fda9a5c1.JPG
4,5,train/54eb76914b84db8a0d56f98125abf588.JPG
...,...,...
28010,5,train/07b420b4fe265b4ed918b46435c025d7.JPG
28011,6,train/2d1c5918357bbdd729bf79085e55d35e.JPG
28012,0,train/1531efa9f8687e390adf780355acd606.JPG
28013,1,train/2b15eaef0ce9b57b6570709f95a4bea4.JPG


In [10]:
df = df.reset_index(drop=True)

In [11]:
df = Dataset.from_pandas(df)

In [12]:
dataset = df.cast_column("image_name", Image())

In [13]:
dataset = dataset.class_encode_column("class_id")

Stringifying the column:   0%|          | 0/28014 [00:00<?, ? examples/s]

Casting to class labels:   0%|          | 0/28014 [00:00<?, ? examples/s]

In [14]:
dataset

Dataset({
    features: ['class_id', 'image_name'],
    num_rows: 28014
})

In [15]:
m = "microsoft/resnet-50"

In [16]:
image_processor = AutoImageProcessor.from_pretrained(m)

In [17]:
splits = dataset.train_test_split(test_size=0.1,stratify_by_column='class_id')
train = splits['train']
val = splits['test']

In [18]:
def load_image(example_batch):
    # Process each image individually and collect pixel values as tensors
    di = [image_processor(image.convert("RGB"), return_tensors="pt")['pixel_values'].squeeze(0) 
          for image in example_batch["image_name"]]
    # Stack the list of tensors into a single tensor with batch dimension
    example_batch["pixel_values"] = torch.stack(di)  
    example_batch["labels"] = torch.tensor(example_batch["class_id"])

    # Remove original columns no longer needed
    del example_batch["image_name"]
    del example_batch["class_id"]

    return example_batch


In [19]:
train.set_transform(load_image)
val.set_transform(load_image)

In [20]:
train[0]

{'pixel_values': tensor([[[-1.4329, -1.4672, -1.5014,  ..., -1.7412, -1.8953, -1.8268],
          [-1.4672, -1.4500, -1.4843,  ..., -1.4843, -1.6727, -1.7240],
          [-1.5870, -1.5185, -1.5185,  ..., -0.6794, -0.9705, -1.3130],
          ...,
          [-1.8097, -1.7925, -1.7583,  ..., -1.6213, -1.6555, -1.6555],
          [-1.7754, -1.7754, -1.7754,  ..., -1.6384, -1.6555, -1.6555],
          [-1.7754, -1.7754, -1.7754,  ..., -1.6555, -1.6555, -1.6555]],
 
         [[-1.4405, -1.4755, -1.5105,  ..., -1.7731, -1.9307, -1.8606],
          [-1.4755, -1.4580, -1.4930,  ..., -1.4930, -1.6856, -1.7381],
          [-1.5980, -1.5280, -1.5280,  ..., -0.6702, -0.9678, -1.3179],
          ...,
          [-1.7906, -1.7731, -1.7381,  ..., -1.6331, -1.6681, -1.6681],
          [-1.7556, -1.7556, -1.7556,  ..., -1.6506, -1.6681, -1.6681],
          [-1.7556, -1.7556, -1.7556,  ..., -1.6681, -1.6681, -1.6681]],
 
         [[-1.2119, -1.2467, -1.2816,  ..., -1.5256, -1.6824, -1.6127],
          [-

In [21]:
metric = evaluate.load("f1")

In [22]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels, average = 'macro' )

In [23]:
model = AutoModelForImageClassification.from_pretrained(
    m,
    num_labels=10,
    ignore_mismatched_sizes=True
)

Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([10]) in the model instantiated
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([10, 2048]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
args = TrainingArguments(
    remove_unused_columns=False,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=64,
    #gradient_accumulation_steps=4,
    per_device_eval_batch_size=64,
    num_train_epochs=5,
    #warmup_ratio=0.1,
    #logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    output_dir = 'save'
)

In [None]:
trainer = Trainer(
    model = model,
    args  =  args,
    train_dataset=train,
    eval_dataset=val,
    tokenizer=image_processor,
    compute_metrics=compute_metrics
)

  trainer = Trainer(


In [None]:
trainer.train()
torch.cuda.empty_cache()

  0%|          | 0/1755 [00:00<?, ?it/s]

  0%|          | 0/88 [00:00<?, ?it/s]

{'eval_loss': 0.25091850757598877, 'eval_f1': 0.8510007796379627, 'eval_runtime': 31.5472, 'eval_samples_per_second': 177.607, 'eval_steps_per_second': 2.789, 'epoch': 1.0}
{'loss': 0.52, 'grad_norm': 0.7057911157608032, 'learning_rate': 0.00021452991452991453, 'epoch': 1.42}


  0%|          | 0/88 [00:00<?, ?it/s]

{'eval_loss': 0.1296033263206482, 'eval_f1': 0.922401637299774, 'eval_runtime': 31.0949, 'eval_samples_per_second': 180.19, 'eval_steps_per_second': 2.83, 'epoch': 2.0}
{'loss': 0.106, 'grad_norm': 1.186615228652954, 'learning_rate': 0.00012905982905982903, 'epoch': 2.85}


  0%|          | 0/88 [00:00<?, ?it/s]

{'eval_loss': 0.10487987846136093, 'eval_f1': 0.9446050525090781, 'eval_runtime': 31.575, 'eval_samples_per_second': 177.45, 'eval_steps_per_second': 2.787, 'epoch': 3.0}


  0%|          | 0/88 [00:00<?, ?it/s]

{'eval_loss': 0.10234136879444122, 'eval_f1': 0.9466697829533823, 'eval_runtime': 30.8799, 'eval_samples_per_second': 181.445, 'eval_steps_per_second': 2.85, 'epoch': 4.0}
{'loss': 0.0382, 'grad_norm': 1.642487645149231, 'learning_rate': 4.358974358974359e-05, 'epoch': 4.27}


  0%|          | 0/88 [00:00<?, ?it/s]

{'eval_loss': 0.10092779248952866, 'eval_f1': 0.9522334677747806, 'eval_runtime': 31.455, 'eval_samples_per_second': 178.127, 'eval_steps_per_second': 2.798, 'epoch': 5.0}
{'train_runtime': 1138.7592, 'train_samples_per_second': 98.401, 'train_steps_per_second': 1.541, 'train_loss': 0.191856851604929, 'epoch': 5.0}


In [None]:
def load_image(example_batch):
    # Process each image individually and collect pixel values as tensors
    di = [image_processor(image.convert("RGB"), return_tensors="pt")['pixel_values'].squeeze(0) 
          for image in example_batch["image_name"]]
    # Stack the list of tensors into a single tensor with batch dimension
    example_batch["pixel_values"] = torch.stack(di)
    
    del example_batch["image_name"]