In [1]:
import warnings
warnings.filterwarnings('ignore')
import random
import torch
import numpy as np
import os
from datasets import load_dataset
from evaluate import load
from transformers import AutoModelForImageClassification, AutoImageProcessor, TrainingArguments, Trainer

In [2]:
'''계산 리소스 최적화'''
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42)  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
'''dataset 불러오기'''
dataset = load_dataset("imagefolder", data_dir="./gray_data")
dataset = dataset.rename_column("label", "labels")

model_name = "facebook/convnext-tiny-224"
processor = AutoImageProcessor.from_pretrained(model_name)

Resolving data files:   0%|          | 0/36274 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/6670 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/7839 [00:00<?, ?it/s]

In [5]:
'''정렬함수 정의'''
def transform(example_batch):
    # 그레이스케일 이미지를 3채널로 변환
    images = [np.repeat(np.array(x.convert('L'))[:, :, np.newaxis], 3, axis=2) for x in example_batch['image']]
    inputs = processor(images, return_tensors='pt')
    inputs['labels'] = example_batch['labels']
    return inputs

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

In [6]:
prepared_ds = dataset.with_transform(transform).shuffle()

metric = load("accuracy", trust_remote_code=True)
def compute_metrics(p):
    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)

In [None]:
'''사전 훈련 모델 로드 '''
labels = dataset['train'].features['labels'].names

model = AutoModelForImageClassification.from_pretrained(
    model_name,
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)},
    ignore_mismatched_sizes=True
)

Some weights of ConvNextForImageClassification were not initialized from the model checkpoint at facebook/convnext-tiny-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([7, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([7]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
'''모델 훈련을 위한 하이퍼파라미터 설정'''
training_args = TrainingArguments(
    output_dir="./results/swinvit-experience-1",
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    save_total_limit=2,
    num_train_epochs=200,
    learning_rate=5e-5,
    remove_unused_columns=False,
    label_smoothing_factor=0.1,
    warmup_ratio=0.1,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    seed=42
)

In [9]:
'''Hugging Face Trainer 객체 생성'''
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=prepared_ds["train"],
    eval_dataset=prepared_ds["validation"],
    tokenizer=processor,
)

In [10]:
'''모델 훈련'''
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

Epoch,Training Loss,Validation Loss,Accuracy
1,1.9518,1.925319,0.201199
2,1.8337,1.692545,0.473013
3,1.5188,1.373206,0.57901
4,1.2568,1.18781,0.638831
5,1.1312,1.111095,0.664018
6,1.0588,1.072216,0.686357
7,1.012,1.022906,0.704498
8,0.9795,1.015304,0.708096
9,0.9488,1.030653,0.698051
10,0.9194,0.993672,0.717691


KeyboardInterrupt: 

In [None]:
'''훈려된 모델 평가'''
metrics = trainer.evaluate(prepared_ds['validation'])
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

In [1]:
import warnings
warnings.filterwarnings('ignore')
import random
import torch
import numpy as np
import os
from datasets import load_dataset
from evaluate import load
from transformers import AutoModelForImageClassification, AutoImageProcessor, TrainingArguments, Trainer
from PIL import Image, ImageEnhance
from sklearn.metrics import precision_recall_fscore_support

'''계산 리소스 최적화'''
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42)  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

'''dataset 불러오기'''
dataset = load_dataset("imagefolder", data_dir="./gray_data")
dataset = dataset.rename_column("label", "labels")

model_name = "facebook/convnext-tiny-224"
processor = AutoImageProcessor.from_pretrained(model_name)

'''데이터 증강 함수들'''
def add_salt_and_pepper(image, amount=0.05):
    img_array = np.array(image)
    h, w = img_array.shape[:2]
    n_salt = np.ceil(amount * img_array.size * 0.5)
    n_pepper = np.ceil(amount * img_array.size * 0.5)
    
    # Salt
    coords = [np.random.randint(0, i - 1, int(n_salt)) for i in img_array.shape[:2]]
    img_array[tuple(coords)] = 255
    
    # Pepper
    coords = [np.random.randint(0, i - 1, int(n_pepper)) for i in img_array.shape[:2]]
    img_array[tuple(coords)] = 0
    
    return Image.fromarray(img_array)

def adjust_brightness(image, factor):
    enhancer = ImageEnhance.Brightness(image)
    return enhancer.enhance(factor)

def random_rotate(image, angle_range=(-15, 15)):
    angle = random.uniform(*angle_range)
    return image.rotate(angle, resample=Image.BILINEAR, expand=True)

'''정렬함수 정의'''
def transform(example_batch):
    # 그레이스케일 이미지를 3채널로 변환
    images = []
    for img in example_batch['image']:
        img = img.convert('L')
        
        # 데이터 증강 적용
        if random.random() < 0.5:
            img = add_salt_and_pepper(img)
        if random.random() < 0.5:
            brightness_factor = random.uniform(0.8, 1.2)
            img = adjust_brightness(img, brightness_factor)
        if random.random() < 0.5:
            img = random_rotate(img)
        
        # 3채널로 변환
        img_array = np.repeat(np.array(img)[:, :, np.newaxis], 3, axis=2)
        images.append(img_array)
    
    inputs = processor(images, return_tensors='pt')
    inputs['labels'] = example_batch['labels']
    return inputs

def collate_fn(batch):
    return {
        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
        'labels': torch.tensor([x['labels'] for x in batch])
    }

prepared_ds = dataset.with_transform(transform).shuffle()

metric = load("accuracy", trust_remote_code=True)
def compute_metrics(p):
    predictions = np.argmax(p.predictions, axis=1)
    labels = p.label_ids
    
    accuracy = metric.compute(predictions=predictions, references=labels)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='weighted')
    
    return {
        'accuracy': accuracy['accuracy'],
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

'''사전 훈련 모델 로드 '''
labels = dataset['train'].features['labels'].names

model = AutoModelForImageClassification.from_pretrained(
    model_name,
    num_labels=len(labels),
    id2label={str(i): c for i, c in enumerate(labels)},
    label2id={c: str(i) for i, c in enumerate(labels)},
    ignore_mismatched_sizes=True
)

'''모델 훈련을 위한 하이퍼파라미터 설정'''
training_args = TrainingArguments(
    output_dir="./results/swinvit-experience-1",
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    save_total_limit=2,
    num_train_epochs=50,
    learning_rate=5e-5,
    remove_unused_columns=False,
    label_smoothing_factor=0.1,
    warmup_ratio=0.1,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    seed=42
)

'''Hugging Face Trainer 객체 생성'''
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    train_dataset=prepared_ds["train"],
    eval_dataset=prepared_ds["validation"],
    tokenizer=processor,
)

'''모델 훈련'''
train_results = trainer.train()
trainer.save_model()
trainer.log_metrics("train", train_results.metrics)
trainer.save_metrics("train", train_results.metrics)
trainer.save_state()

'''훈련된 모델 평가'''
metrics = trainer.evaluate(prepared_ds['validation'])
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

# 결과를 출력합니다
print(f"Accuracy: {metrics['eval_accuracy']:.4f}")
print(f"Precision: {metrics['eval_precision']:.4f}")
print(f"Recall: {metrics['eval_recall']:.4f}")
print(f"F1-score: {metrics['eval_f1']:.4f}")

Resolving data files:   0%|          | 0/26099 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/4667 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/5408 [00:00<?, ?it/s]

Some weights of ConvNextForImageClassification were not initialized from the model checkpoint at facebook/convnext-tiny-224 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([5, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([5]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,1.5781,1.450495,0.550889,0.586173,0.550889,0.539571
2,1.141,0.895274,0.765588,0.776042,0.765588,0.765685
3,0.789,0.726305,0.836297,0.838888,0.836297,0.836103
4,0.6928,0.682548,0.855582,0.86299,0.855582,0.855045
5,0.6492,0.646335,0.877437,0.878725,0.877437,0.8777
6,0.6188,0.627162,0.881723,0.884801,0.881723,0.881805
7,0.5939,0.625833,0.884937,0.888143,0.884937,0.885284
8,0.5781,0.614301,0.889436,0.889041,0.889436,0.889061
9,0.5655,0.607187,0.891365,0.891487,0.891365,0.891266
10,0.5466,0.603843,0.902507,0.904162,0.902507,0.902361


***** train metrics *****
  epoch                    =          50.0
  total_flos               = 30540889859GF
  train_loss               =        0.5058
  train_runtime            =    5:38:11.98
  train_samples_per_second =        64.309
  train_steps_per_second   =         0.335


***** eval metrics *****
  epoch                   =       50.0
  eval_accuracy           =     0.9053
  eval_f1                 =     0.9052
  eval_loss               =     0.6354
  eval_precision          =     0.9058
  eval_recall             =     0.9053
  eval_runtime            = 0:00:44.39
  eval_samples_per_second =     105.13
  eval_steps_per_second   =      2.208
Accuracy: 0.9053
Precision: 0.9058
Recall: 0.9053
F1-score: 0.9052
