In [None]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('Dataset/MURA-v1.1/merged_train_image_labels.csv')  # Replace with your actual file name

df['label'] = df['label'].fillna(1)
print(df.head())# cell id: augment_minority_samples
counts = df['path'].value_counts()
print("Normal (0):", counts.get(0, 0))
print("Abnormal (1):", counts.get(1, 0))

                                          image_path  label
0  MURA-v1.1/train/XR_SHOULDER/patient00001/study...    1.0
1  MURA-v1.1/train/XR_SHOULDER/patient00001/study...    1.0
2  MURA-v1.1/train/XR_SHOULDER/patient00002/study...    1.0
3  MURA-v1.1/train/XR_SHOULDER/patient00002/study...    1.0
4  MURA-v1.1/train/XR_SHOULDER/patient00002/study...    1.0
Normal (0): 21935
Abnormal (1): 14872


In [10]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('Dataset/MURA-v1.1/merged_train_image_labels.csv')  # Replace with your actual file name

df['label'] = df['label'].fillna(1)
print(df.head())# cell id: augment_minority_samples

import os, random
from PIL import Image
from torchvision import transforms

# assume df is already loaded & has columns ['image_path','label']
# create an output folder for augmented images
BASE_DIR = 'Dataset/'
df['image_path'] = df['image_path'].apply(
    lambda p: os.path.normpath(os.path.join(BASE_DIR, p.lstrip(r'\\/')))
)

# now continue with your existing augment code…
aug_dir = os.path.join(BASE_DIR, 'augmented_train')
os.makedirs(aug_dir, exist_ok=True)

# define simple augment pipeline
augment = transforms.Compose([
    transforms.Resize((246, 246)),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomApply(
        [transforms.ColorJitter(0.4,0.4,0.4,0.1)], p=0.8
    ),
])

# split, sample, augment exactly as before...
df_norm = df[df.label == 0]
df_abn  = df[df.label == 1]
n_to_gen = len(df_norm) - len(df_abn)

if n_to_gen > 0:
    df_sample = df_abn.sample(n=n_to_gen, replace=True, random_state=42)
    new_rows = []
    for i, (_, row) in enumerate(df_sample.iterrows()):
        src = row['image_path']
        img = Image.open(src).convert('RGB')
        img_aug = augment(img)
        name, ext = os.path.splitext(os.path.basename(src))
        new_name = f"{name}_aug_{i:04d}{ext}"
        dst = os.path.join(aug_dir, new_name)
        img_aug.save(dst)
        new_rows.append({'image_path': dst, 'label': 1})

    df_aug = pd.DataFrame(new_rows)
    df_balanced = pd.concat([df, df_aug], ignore_index=True).sample(
        frac=1, random_state=42
    )
    out_csv = os.path.join(BASE_DIR, 'balanced_train_image_labels.csv')
    df_balanced.to_csv(out_csv, index=False)
    print(f"Generated {len(df_aug)} augmented samples. Saved balanced CSV to:\n  {out_csv}")
else:
    print("Already balanced or minority ≥ majority.")

                                          image_path  label
0  MURA-v1.1/train/XR_SHOULDER/patient00001/study...    1.0
1  MURA-v1.1/train/XR_SHOULDER/patient00001/study...    1.0
2  MURA-v1.1/train/XR_SHOULDER/patient00002/study...    1.0
3  MURA-v1.1/train/XR_SHOULDER/patient00002/study...    1.0
4  MURA-v1.1/train/XR_SHOULDER/patient00002/study...    1.0
Generated 7063 augmented samples. Saved balanced CSV to:
  Dataset/balanced_train_image_labels.csv


In [13]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('Dataset/balanced_train_image_labels.csv')  # Replace with your actual file name

df['label'] = df['label'].fillna(1)
print(df.head())# cell id: augment_minority_samples
counts = df['label'].value_counts()
print("Normal (0):", counts.get(0, 0))
print("Abnormal (1):", counts.get(1, 0))

                                          image_path  label
0  Dataset\MURA-v1.1\train\XR_FOREARM\patient0937...    0.0
1        Dataset/augmented_train\image3_aug_5774.png    1.0
2  Dataset\MURA-v1.1\train\XR_FOREARM\patient0953...    0.0
3  Dataset\MURA-v1.1\train\XR_FINGER\patient04218...    0.0
4  Dataset\MURA-v1.1\train\XR_FINGER\patient04177...    0.0
Normal (0): 21935
Abnormal (1): 21935
