In [None]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from tqdm import tqdm  # Import tqdm for the progress bar
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import RandomOverSampler
# Import configuration settings
# import config
import config

In [None]:
import random
# 이미지 경로 설정
base_dir = 'data/cropdata'

# 결과를 저장할 리스트 초기화
data = []

df = pd.read_csv('data/images_data_filtered.csv')

approved_df = df[df['image_status'] == 'Approved']
declined_df = df[df['image_status'] == 'Declined']

approved_sample = approved_df.sample(n=2000, random_state=42)
df_undersampled = pd.concat([approved_sample, declined_df])

# 데이터 셔플링 (랜덤 정렬)
df_undersampled = df_undersampled.sample(frac=1, random_state=42).reset_index(drop=True)

# 결과를 CSV 파일로 저장
df_undersampled.to_csv('data/undersampled_images_data.csv', index=False)
print("Under-sampled CSV file created.")

In [None]:
data = pd.read_csv('data/undersampled_images_data.csv')

# Display all columns
pd.set_option('display.max_columns', None)

# Display all rows
pd.set_option('display.max_rows', None)

# Display the first few rows of the data
data.head()
import pandas as pd
import matplotlib.pyplot as plt

plt.figure(figsize=(6, 4))
data['image_status'].value_counts().plot(kind='bar', color=['skyblue', 'salmon'])
plt.title('Distribution of Approved and Declined Images')
plt.xlabel('Image Status')
plt.ylabel('Frequency')
plt.xticks(rotation=0)
plt.show()

In [None]:
# Load the balanced CSV file
df = pd.read_csv(config.CSV_FILE_PATH)

# Define a PyTorch Dataset
class SmokeAlarmDataset(Dataset):
    def __init__(self, df, image_dir, transform=None):
        self.df = df
        self.image_dir = image_dir
        self.transform = transform
        self.label_map = {"Approved": 1, "Declined": 0}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        job_no = str(self.df.iloc[idx]["job_no"])
        image_name = self.df.iloc[idx]["image_name"]
        label = self.df.iloc[idx]["image_status"]
        label = self.label_map.get(label, 0)

        image_path = os.path.join(self.image_dir, job_no, image_name)

        if not os.path.exists(image_path):
            print(f"Warning: Image not found at {image_path}. Skipping this file.")
            return None, None

        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image, label

# Custom collate function to handle missing files
def collate_fn(batch):
    batch = [item for item in batch if item[0] is not None]
    return torch.utils.data.dataloader.default_collate(batch)

# Define transformations for data preprocessing
transform = transforms.Compose([
    transforms.Resize((229, 229)), # InceptionV3 requires 299x299 input size
    transforms.ToTensor(),
    transforms.Normalize(mean=config.NORMALIZE_MEAN, std=config.NORMALIZE_STD)
])


# Split data into train, validation, and test sets
train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Initialize the datasets and dataloaders
train_dataset = SmokeAlarmDataset(train_df, config.IMAGE_DIR, transform=transform)
val_dataset = SmokeAlarmDataset(val_df, config.IMAGE_DIR, transform=transform)
test_dataset = SmokeAlarmDataset(test_df, config.IMAGE_DIR, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=config.BATCH_SIZE, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=config.BATCH_SIZE, shuffle=False, collate_fn=collate_fn)

#Define the Inception model
class InceptionModel(nn.Module):
    def __init__(self):
        super(InceptionModel, self).__init__()
        self.model = models.inception_v3(pretrained=True, aux_logits=True)  # Inception v3 with auxiliary logits
        self.model.fc = nn.Sequential(
            nn.Linear(self.model.fc.in_features, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 1)  # Binary output
        )
        # Optional: Adjust the auxiliary classifier if needed
        self.model.AuxLogits.fc = nn.Linear(self.model.AuxLogits.fc.in_features, 1)

    def forward(self, x):
        if self.model.training:
            x, aux = self.model(x)
            return x, aux  # Return main and auxiliary outputs for training
        else:
            x = self.model(x)
            return x

model = InceptionModel().to(config.DEVICE)

# Define loss and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE)




In [None]:
import matplotlib.pyplot as plt

train_losses = []
val_losses = []
f1_scores = []

for epoch in range(config.NUM_EPOCHS):
    model.train()
    running_loss = 0.0

    # Training phase
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{config.NUM_EPOCHS}", leave=False)
    for images, labels in progress_bar:
        labels = labels.unsqueeze(1).float().to(config.DEVICE)
        images = images.to(config.DEVICE)

        optimizer.zero_grad()

        outputs, aux_outputs = model(images)  # Expect main and auxiliary outputs
        loss1 = criterion(outputs, labels)
        loss2 = criterion(aux_outputs, labels)
        loss = loss1 + 0.4 * loss2  # Main loss + weighted auxiliary loss

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

    train_loss = running_loss / len(train_loader)
    train_losses.append(train_loss)
    print(f"Epoch [{epoch + 1}/{config.NUM_EPOCHS}], Training Loss: {train_loss}")

    # Validation phase (no auxiliary output needed)
    model.eval()
    val_loss = 0.0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in val_loader:
            labels = labels.unsqueeze(1).float().to(config.DEVICE)
            images = images.to(config.DEVICE)

            outputs = model(images)  # Only get main output for validation
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            preds = (torch.sigmoid(outputs) > 0.5).float()
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    val_loss = val_loss / len(val_loader)
    val_losses.append(val_loss)

    # Calculate F1 Score
    f1 = f1_score(all_labels, all_preds)
    f1_scores.append(f1)
    print(f"Epoch [{epoch + 1}/{config.NUM_EPOCHS}], Validation Loss: {val_loss}, F1 Score: {f1}")

# Plot Training and Validation Loss
plt.figure(figsize=(12, 5))
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss over Epochs')
plt.legend()
plt.show()

# Plot F1 Score
plt.figure(figsize=(12, 5))
plt.plot(f1_scores, label='F1 Score')
plt.xlabel('Epoch')
plt.ylabel('F1 Score')
plt.title('F1 Score over Epochs')
plt.legend()
plt.show()

In [None]:
torch.save(model.state_dict(), 'model_weights.pt')

In [None]:
# 모델 인스턴스 생성
model = SimpleCNN()  # 미리 정의된 모델 클래스

# 상태 dict 불러오기 및 모델에 로드
model.load_state_dict(torch.load('model_weights.pt'))
model.eval()



In [None]:
all_preds = []
all_labels = []

# 추론용 데이터 로더 정의
infer_loader = DataLoader(train_dataset, batch_size=config.BATCH_SIZE, shuffle=False, collate_fn=collate_fn)


with torch.no_grad():  # 추론 시에는 gradient 계산 불필요
    for images, labels in infer_loader:
        outputs = model(images)
        preds = (outputs > 0.5).float()  # 이진 분류일 경우 임계값 설정
        
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 예측 결과를 numpy 배열로 변환
import numpy as np
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

# 필요한 경우 정확도 계산
accuracy = (all_preds == all_labels).mean()
print(f"Overall accuracy: {accuracy:.4f}")


In [None]:
f1 = f1_score(all_labels, all_preds)
print(f"F1 Score: {f1:.4f}")