In [3]:
import pandas as pd

train_path = "ChestNew-20250214T104539Z-001/ChestNew/trainfiltered.csv"
test_path = "ChestNew-20250214T104539Z-001/ChestNew/testfiltered.csv"

try:
    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)

    print(train_df.head(), test_df.head())

except Exception as e:
    print(str(e))

         Name                                     Hasil Normal/abnormal  \
0  2202341086  Mencurigai nodul multipel paru bilateral        Abnormal   
1  2307410255                         Efusi pleura kiri        Abnormal   
2  2302394915                         Efusi pleura kiri        Abnormal   
3  2408458946                   Hidropneumothorax kanan        Abnormal   
4  2306409371                          massa paru kanan        Abnormal   

   class_id  
0         1  
1         1  
2         1  
3         1  
4         1            Name                Hasil Normal/abnormal  class_id
0  1906189456         Kardiomegali        Abnormal         1
1  2410466550         Kardiomegali        Abnormal         1
2  1610094592           Edema paru        Abnormal         1
3  2308413109         Kardiomegali        Abnormal         1
4  1611099078  Pneumonia bilateral        Abnormal         1


In [4]:
import os

image_folder = "ChestNew-20250214T104539Z-001/ChestNew/"

missing_files = [f for f in train_df["Name"].astype(str) + ".png" if not os.path.exists(os.path.join(image_folder, f))]

if missing_files:
    print("Gambar yang hilang:", missing_files)
else:
    print("Semua Gambar tersedia!")

Semua Gambar tersedia!


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from tqdm import tqdm
import os

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

class MedicalDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        img_name = str(self.data.iloc[index, 0]) + ".png"
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert("RGB")

        label = int(self.data.iloc[index, 3])
        label = torch.tensor(label, dtype=torch.long)

        if self.transform:
            image = self.transform(image)

        return image, label
    
train_dataset = MedicalDataset(train_path, image_folder, transform=transform)
test_dataset = MedicalDataset(test_path, image_folder, transform=transform)

print(f"Total Training Images: {len(train_dataset)}")
print(f"Total Test Image Image: {len(test_dataset)}")

Total Training Images: 796
Total Test Image Image: 200


In [6]:
train_dataset.data["class_id"].value_counts()

class_id
0    403
1    393
Name: count, dtype: int64

In [37]:
# Resampled method
from imblearn.over_sampling import RandomOverSampler 


X = train_dataset.data[["Name", "Hasil", "Normal/abnormal"]]
y = train_dataset.data["class_id"].values

ros = RandomOverSampler(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = ros.fit_resample(X, y)

df_resampled = pd.DataFrame(X_resampled, columns=["Name", "Hasil", "Normal/abnormal"])
df_resampled["class_id"] = y_resampled

df_resampled.reset_index(drop=True)

df_resampled.to_csv("ChestNew-20250214T104539Z-001/ChestNew/train_resampled.csv", index=False)

In [11]:
df_resampled["class_id"].value_counts()

class_id
1    403
0    403
Name: count, dtype: int64

In [27]:
resampled_path = "ChestNew-20250214T104539Z-001/ChestNew/train_resampled.csv"

In [28]:
resampled_dataset = MedicalDataset(resampled_path, image_folder, transform=transform)


In [29]:
# DataLoader
train_loader = DataLoader(resampled_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)

In [30]:
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


import timm

# 3. LOAD MODEL INCEPTION-RESNET V2 DARI TIMM
# ============================
model = timm.create_model("inception_resnet_v2", pretrained=True)  # Load model pre-trained
num_ftrs = model.classif.in_features  # Jumlah fitur sebelum output
model.classif = nn.Sequential(
    nn.Linear(num_ftrs, 512),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(512, 2),  # 2 kelas (sesuaikan jumlah kelas dataset)
    nn.LogSoftmax(dim=1)
)

model = model.to(device)

In [31]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

num_epochs = 20

for epoch in range(num_epochs):
    # Training Mode
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    train_loss = running_loss / len(train_loader)
    train_acc = correct / total

    # Validation Mode
    model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0

    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            val_correct += (predicted == labels).sum().item()
            val_total += labels.size(0)

    val_loss /= len(test_loader)
    val_acc = val_correct / val_total

    # Print hasil per epoch
    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")


Epoch 1/20 - Training:   0%|          | 0/13 [00:00<?, ?it/s]


IndexError: index 3 is out of bounds for axis 0 with size 2