In [3]:
import os
import cv2
import json
import torch
import numpy as np

from PIL import Image
from torch.utils.data import Dataset
from matplotlib import pyplot as plt
# !pip install torch torchvision pandas scikit-learn

In [4]:
def collect_image_age_pairs(image_root_dir, json_root_dir):
    pairs = []
    
    # subject_id dir 반복
    subject_ids = sorted([d for d in os.listdir(image_root_dir) if os.path.isdir(os.path.join(image_root_dir, d))])
    
    print(f"총 subject 수: {len(subject_ids)}")
    
    for subject_id in subject_ids:
        subject_image_dir = os.path.join(image_root_dir, subject_id)
        subject_json_dir = os.path.join(json_root_dir, subject_id)
        if not os.path.exists(subject_json_dir):
            print(f"[경고] JSON 디렉토리 없음: {subject_json_dir}")
            continue
        
        #이미지 파일 중 Jpg만 추출
        jpg_files = [f for f in os.listdir(subject_image_dir) if f.endswith('.jpg')]
        
        for jpg_file in jpg_files:
            base_name = jpg_file.replace('.jpg', '')
            json_filename = f"{base_name}_00.json"
            json_path = os.path.join(subject_json_dir, json_filename)
            jpg_path = os.path.join(subject_image_dir, jpg_file)
            
            if os.path.exists(json_path):
                try:
                    with open(json_path, 'r', encoding='utf-8') as f:
                        data = json.load(f)
                        age = data['info']['age'] #info[4]
                        pairs.append((jpg_path, age))
                        
                except Exception as e:
                    print(f"[오류] JSON 파싱 실패: {json_path} - {e}")
            else:
                print(f"[경고] JSON 파일 없음: {json_path}")
                
    return pairs

In [6]:
#루트 경로 
image_root = '/home/alpaco/yskim/data/Training/origin/digitalCamera'
json_root = '/home/alpaco/yskim/data/Training/lable/digitalCamera'

# 실행
image_age_pairs = collect_image_age_pairs(image_root, json_root)

# 결과
print(f"총 매칭 이미지 수: {len(image_age_pairs)}")
print("샘플 10:")
for i in range(min(10, len(image_age_pairs))):
    print(image_age_pairs[i])

총 subject 수: 858
총 매칭 이미지 수: 6006
샘플 10:
('/home/alpaco/yskim/data/Training/origin/digitalCamera/0002/0002_01_L15.jpg', 50)
('/home/alpaco/yskim/data/Training/origin/digitalCamera/0002/0002_01_Fb.jpg', 50)
('/home/alpaco/yskim/data/Training/origin/digitalCamera/0002/0002_01_L30.jpg', 50)
('/home/alpaco/yskim/data/Training/origin/digitalCamera/0002/0002_01_Ft.jpg', 50)
('/home/alpaco/yskim/data/Training/origin/digitalCamera/0002/0002_01_F.jpg', 50)
('/home/alpaco/yskim/data/Training/origin/digitalCamera/0002/0002_01_R15.jpg', 50)
('/home/alpaco/yskim/data/Training/origin/digitalCamera/0002/0002_01_R30.jpg', 50)
('/home/alpaco/yskim/data/Training/origin/digitalCamera/0003/0003_01_Fb.jpg', 24)
('/home/alpaco/yskim/data/Training/origin/digitalCamera/0003/0003_01_F.jpg', 24)
('/home/alpaco/yskim/data/Training/origin/digitalCamera/0003/0003_01_L15.jpg', 24)


In [7]:
import pandas as pd

df = pd.DataFrame(image_age_pairs, columns=['image_path', 'age'])
print(df.head())

                                          image_path  age
0  /home/alpaco/yskim/data/Training/origin/digita...   50
1  /home/alpaco/yskim/data/Training/origin/digita...   50
2  /home/alpaco/yskim/data/Training/origin/digita...   50
3  /home/alpaco/yskim/data/Training/origin/digita...   50
4  /home/alpaco/yskim/data/Training/origin/digita...   50


In [8]:
# 데이터 준비
df = pd.read_csv(image_age_pairs, columns=['image_path', 'age'])
print(df.head())

TypeError: read_csv() got an unexpected keyword argument 'columns'

### ResNet 학습
1. data준비
2. custom dataset 클래스 정의
3. 이미지 전처리
    - Horizontal equalize
4. 모델 구성(ResNet)
5. Regression Loss (MSELoss)
6. 학습루프 작성(Train/val)
7. 성능평가(MAE, RMSE)

In [9]:
!pip install torch torchvision pandas scikit-learn timm pandas



In [10]:
df = pd.DataFrame(image_age_pairs, columns=['image_path', 'age'])

In [11]:
# Dataset 정의 
class AgeDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        img_path = self.df.iloc[idx]['image_path']
        age = self.df.iloc[idx]['age']
        image = Image.open(img_path).convert("RGB")
    
        if self.transform:
            image = self.transform(image)
    
        return image, torch.tensor(age, dtype=torch.float32)

In [13]:
!pip install scikit-image



In [14]:
!pip install --upgrade gdown

Collecting gdown
  Using cached gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Using cached gdown-5.2.0-py3-none-any.whl (18 kB)
Installing collected packages: gdown
  Attempting uninstall: gdown
    Found existing installation: gdown 4.6.3
    Uninstalling gdown-4.6.3:
      Successfully uninstalled gdown-4.6.3
Successfully installed gdown-5.2.0


In [15]:
!pip install gdown==v4.6.3

Collecting gdown==v4.6.3
  Using cached gdown-4.6.3-py3-none-any.whl.metadata (4.4 kB)
Using cached gdown-4.6.3-py3-none-any.whl (14 kB)
Installing collected packages: gdown
  Attempting uninstall: gdown
    Found existing installation: gdown 5.2.0
    Uninstalling gdown-5.2.0:
      Successfully uninstalled gdown-5.2.0
Successfully installed gdown-4.6.3


In [16]:
from skimage import io, exposure, img_as_ubyte
from skimage.color import rgb2gray
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
from PIL import Image
import torch
import os
from torchvision import transforms

%matplotlib inline

In [None]:
# Horizontal Equalizing
image_root = '/home/alpaco/yskim/data/Training/origin/digitalCamera'
output_folder = '/home/alpaco/yskim/data/Training/CLAHE/digitalCamera'
os.makedirs(output_folder, exist_ok=True)

image_files = [f for f in os.listdir(image_root) if f.lower().endswith('.jpg')]

for file in image_files:
    image_path = os.path.join(image_root, file)
    image = io.imread(image_path)

    if image.ndim == 3:
        image_gray = rgb2gray(image)
    else:
        image_gray = image

    equalized = exposure.equalize_adapthist(image_gray, clip_limit=0.03)
    output_path = os.path.join(output_folder, f'equalized_{file}')
    io.imsave(output_path, img_as_ubyte(equalized))
    print(f"Saved: {output_path}")

In [18]:
# Custom Data

class AgeRegressionDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe
        self.transform = transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        img_path = self.df.iloc[idx]['image_path']
        age = self.df.iloc[idx]['age']
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(age, dtype=torch.float32)

In [None]:
# transform 정의 
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)  

In [None]:
#데이터 분할
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader

df = pd.DataFrame(image_age_pairs, columns=['image_path', 'age'])
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

train_dataset = AgeRegressionDataset(train_df, transform=transform)
val_dataset = AgeRegressionDataset(val_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [None]:
#ResNet regression 학습
class AgeRegressor(nn.Module):
    def __init__(self):
        super(AgeRegressor, self).__init__()
        self.backbone = models.resnet50(pretrained=True)
        self.backbone.fc = nn.Linear(self.backbone.fc.in_features, 1) 
    
    def forward(self, x):
        return self.backbone(x).squeeze(1)

In [None]:
#학습 루프
import torch
import torch.nn.functional as F
from torch import optim
from PIL import ImageFile
from tqdm import tqdm
from skimage import io, exposure, img_as_ubyte
from skimage.color import rgb2gray

ImageFile.LOAD_TRUNCATED_IMAGES = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AgeRegressor().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.MSELoss()



##성능개선_(1)

optimizer Adam with lr=0.001

Epoch loop : 학습과 검증을 하나의 loop에 통합

Early stopping : val_loss 기준 patient= 3 

성능기록 및 시각화 = loss,MAE,RMSE

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import torch
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

# --- Optimizer 및 Scheduler 설정 ---
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)

# --- Early Stopping 설정 ---
best_val_loss = float('inf')
patience = 3
counter = 0
best_model_wts = None

# --- 결과 저장 리스트 ---
train_loss_history = []
train_mae_history = []
train_rmse_history = []

val_loss_history = []
val_mae_history = []
val_rmse_history = []

for epoch in range(30):
    # ----- Train -----
    model.train()
    train_loss = 0
    all_preds = []
    all_labels = []

    for images, ages in tqdm(train_loader, desc=f"[Epoch {epoch+1}] Training"):
        images, ages = images.to(device), ages.to(device)

        preds = model(images).squeeze()
        loss = criterion(preds, ages.float())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * images.size(0)
        all_preds.extend(preds.detach().cpu().numpy())
        all_labels.extend(ages.detach().cpu().numpy())

    avg_loss = train_loss / len(train_loader.dataset)
    mae = mean_absolute_error(all_labels, all_preds)
    rmse = mean_squared_error(all_labels, all_preds, squared=False)

    train_loss_history.append(avg_loss)
    train_mae_history.append(mae)
    train_rmse_history.append(rmse)

    print(f"[Epoch {epoch+1}] Train Loss: {avg_loss:.4f} | MAE: {mae:.4f} | RMSE: {rmse:.4f}")

    # ----- Validation -----
    model.eval()
    total_loss = 0.0
    true_ages, pred_ages = [], []

    with torch.no_grad():
        for images, ages in tqdm(val_loader, desc=f"[Epoch {epoch+1}] Validation"):
            images = images.to(device)
            ages = ages.to(device)

            preds = model(images).squeeze()
            loss = criterion(preds, ages.float())

            total_loss += loss.item() * images.size(0)
            true_ages.extend(ages.detach().cpu().numpy())
            pred_ages.extend(preds.detach().cpu().numpy())

    val_avg_loss = total_loss / len(val_loader.dataset)
    v_mae = mean_absolute_error(true_ages, pred_ages)
    v_rmse = mean_squared_error(true_ages, pred_ages, squared=False)

    val_loss_history.append(val_avg_loss)
    val_mae_history.append(v_mae)
    val_rmse_history.append(v_rmse)

    print(f"[Epoch {epoch+1}] Val Loss: {val_avg_loss:.4f} | MAE: {v_mae:.4f} | RMSE: {v_rmse:.4f}")

    # ----- Scheduler & Early Stopping -----
    scheduler.step(val_avg_loss)

    if val_avg_loss < best_val_loss:
        best_val_loss = val_avg_loss
        counter = 0
        best_model_wts = model.state_dict()
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping triggered at epoch {epoch+1}. Best Val Loss: {best_val_loss:.4f}")
            break

# --- 최고 성능 모델 복원 ---
if best_model_wts is not None:
    model.load_state_dict(best_model_wts)


In [None]:
epochs = range(1, len(train_loss_history) + 1)

plt.figure(figsize=(18, 5))

plt.subplot(1, 3, 1)
plt.plot(epochs, train_loss_history, label='Train Loss')
plt.plot(epochs, val_loss_history, label='Val Loss')
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.subplot(1, 3, 2)
plt.plot(epochs, train_mae_history, label='Train MAE', color='green')
plt.plot(epochs, val_mae_history, label='Val MAE', color='orange')
plt.title('MAE')
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.legend()
plt.grid(True)

plt.subplot(1, 3, 3)
plt.plot(epochs, train_rmse_history, label='Train RMSE', color='red')
plt.plot(epochs, val_rmse_history, label='Val RMSE', color='purple')
plt.title('RMSE')
plt.xlabel('Epoch')
plt.ylabel('RMSE')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

In [1]:
#모델 저장 
torch.save(best_model_wts, 'best_model.pth')

NameError: name 'torch' is not defined