In [None]:
!pip install opencv-python pydicom numpy scikit-learn pandas tensorflow flwr
!pip install efficientnet


Collecting pydicom
  Downloading pydicom-3.0.1-py3-none-any.whl.metadata (9.4 kB)
Collecting flwr
  Downloading flwr-1.19.0-py3-none-any.whl.metadata (15 kB)
Collecting click<8.2.0 (from flwr)
  Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Collecting cryptography<45.0.0,>=44.0.1 (from flwr)
  Downloading cryptography-44.0.3-cp39-abi3-manylinux_2_34_x86_64.whl.metadata (5.7 kB)
Collecting iterators<0.0.3,>=0.0.2 (from flwr)
  Downloading iterators-0.0.2-py3-none-any.whl.metadata (2.5 kB)
Collecting pathspec<0.13.0,>=0.12.1 (from flwr)
  Downloading pathspec-0.12.1-py3-none-any.whl.metadata (21 kB)
Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.3 (from tensorflow)
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting pycryptodome<4.0.0,>=3.18.0 (from flwr)
  Downloading pycryptodome-3.23.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting tomli<3.0.0,>=2

Collecting efficientnet
  Downloading efficientnet-1.1.1-py3-none-any.whl.metadata (6.4 kB)
Collecting keras-applications<=1.0.8,>=1.0.7 (from efficientnet)
  Downloading Keras_Applications-1.0.8-py3-none-any.whl.metadata (1.7 kB)
Downloading efficientnet-1.1.1-py3-none-any.whl (18 kB)
Downloading Keras_Applications-1.0.8-py3-none-any.whl (50 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.7/50.7 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: keras-applications, efficientnet
Successfully installed efficientnet-1.1.1 keras-applications-1.0.8


In [None]:
import os
import numpy as np
import cv2

def resize_npy_images(input_folder, output_folder, size=(224, 224)):
    os.makedirs(output_folder, exist_ok=True)
    for filename in os.listdir(input_folder):
        if filename.endswith(".npy"):
            img = np.load(os.path.join(input_folder, filename))
            resized_img = cv2.resize(img, size)
            np.save(os.path.join(output_folder, filename), resized_img)


In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import layers, models

def create_model(input_shape=(224, 224, 3)):
    base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=input_shape)
    base_model.trainable = False

    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dense(128, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model


In [None]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split

input_dir = "/content/drive/MyDrive/LIDC-NPY"
output_dir = "/content/drive/MyDrive/LungCancerFLData1"
os.makedirs(output_dir, exist_ok=True)

def load_and_preprocess_images():
    images = []
    labels = []  # Replace with actual labels if available
    for file in os.listdir(input_dir):
        if file.endswith(".npy"):
            path = os.path.join(input_dir, file)
            try:
                img = np.load(path)

                # Handle both 2D and 3D
                if len(img.shape) == 3:
                    # Take middle slice if it's 3D (z, x, y)
                    mid_slice = img[img.shape[0] // 2]
                elif len(img.shape) == 2:
                    mid_slice = img
                else:
                    print(f"Skipping {file} due to unexpected shape: {img.shape}")
                    continue

                # Resize to 224x224 and convert to 3 channels
                img_resized = cv2.resize(mid_slice, (224, 224))
                img_resized = np.stack([img_resized]*3, axis=-1)

                images.append(img_resized)
                labels.append(1 if "malign" in file.lower() else 0)  # Dummy logic

            except Exception as e:
                print(f"⚠️ Skipping {file} due to error: {e}")

    return np.array(images), np.array(labels)

def split_and_save(images, labels, num_clients=4):
    # Shuffle the entire dataset once
    indices = np.arange(len(images))
    np.random.shuffle(indices)
    images, labels = images[indices], labels[indices]

    # Split into train+test
    x_train_all, x_test_all, y_train_all, y_test_all = train_test_split(
        images, labels, test_size=0.2, stratify=labels
    )

    # Split train into non-iid partitions for each client
    train_size_per_client = len(x_train_all) // num_clients
    test_size_per_client = len(x_test_all) // num_clients

    for i in range(num_clients):
        train_start = i * train_size_per_client
        train_end = (i + 1) * train_size_per_client
        test_start = i * test_size_per_client
        test_end = (i + 1) * test_size_per_client

        x_train = x_train_all[train_start:train_end]
        y_train = y_train_all[train_start:train_end]
        x_test = x_test_all[test_start:test_end]
        y_test = y_test_all[test_start:test_end]

        client_dir = os.path.join(output_dir, f"client{i+1}")
        os.makedirs(client_dir, exist_ok=True)

        np.save(os.path.join(client_dir, "x_train.npy"), x_train)
        np.save(os.path.join(client_dir, "y_train.npy"), y_train)
        np.save(os.path.join(client_dir, "x_test.npy"), x_test)
        np.save(os.path.join(client_dir, "y_test.npy"), y_test)

        print(f"\n📦 Client{i+1} - Training Samples: {len(x_train)}")
        print(f"🧪 Client{i+1} - Testing Samples: {len(x_test)}")

if __name__ == "__main__":
    images, labels = load_and_preprocess_images()
    split_and_save(images, labels)



✅ Saved client datasets in: /content/drive/MyDrive/LungCancerFLData


✅ Saved client datasets in: /content/drive/MyDrive/LungCancerFLData



In [None]:
import os
import numpy as np

client_root = "/content/drive/MyDrive/LungCancerFLData1"
num_clients = 4

for i in range(1, num_clients + 1):
    client_dir = os.path.join(client_root, f"client{i}")
    x_train_path = os.path.join(client_dir, "x_train.npy")
    x_test_path = os.path.join(client_dir, "x_test.npy")

    if os.path.exists(x_train_path):
        x_train = np.load(x_train_path)
        print(f"📦 Client{i} - Training Samples: {len(x_train)}")
    if os.path.exists(x_test_path):
        x_test = np.load(x_test_path)
        print(f"🧪 Client{i} - Testing Samples: {len(x_test)}\n")



📦 Client1 - Training Samples: 164
🧪 Client1 - Testing Samples: 41

📦 Client2 - Training Samples: 164
🧪 Client2 - Testing Samples: 41

📦 Client3 - Training Samples: 164
🧪 Client3 - Testing Samples: 41

📦 Client4 - Training Samples: 164
🧪 Client4 - Testing Samples: 41



In [None]:
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split

# Set paths
input_dir = "/content/drive/MyDrive/LIDC-NPY"
output_dir = "/content/drive/MyDrive/LungCancerFLData1"
os.makedirs(output_dir, exist_ok=True)

# Function to load and preprocess DICOM slices
def load_and_preprocess_images():
    images = []
    labels = []
    for file in os.listdir(input_dir):
        if file.endswith(".npy"):
            path = os.path.join(input_dir, file)
            try:
                img = np.load(path)

                # Take middle slice if 3D
                if len(img.shape) == 3:
                    mid_slice = img[img.shape[0] // 2]
                elif len(img.shape) == 2:
                    mid_slice = img
                else:
                    print(f"⚠️ Skipping {file}: Unexpected shape {img.shape}")
                    continue

                # Resize to 224x224 and stack to 3 channels (RGB)
                img_resized = cv2.resize(mid_slice, (224, 224))
                img_rgb = np.stack([img_resized]*3, axis=-1)

                images.append(img_rgb)
                labels.append(1 if "malign" in file.lower() else 0)  # Dummy label logic

            except Exception as e:
                print(f"⚠️ Error loading {file}: {e}")

    return np.array(images), np.array(labels)

# Function to split data and save to client folders
def split_and_save(images, labels, num_clients=4):
    # Shuffle dataset
    indices = np.arange(len(images))
    np.random.shuffle(indices)
    images, labels = images[indices], labels[indices]

    # Global train/test split
    x_train_all, x_test_all, y_train_all, y_test_all = train_test_split(
        images, labels, test_size=0.2, stratify=labels
    )

    # Split train and test sets for clients
    train_size_per_client = len(x_train_all) // num_clients
    test_size_per_client = len(x_test_all) // num_clients

    for i in range(num_clients):
        train_start = i * train_size_per_client
        train_end = (i + 1) * train_size_per_client
        test_start = i * test_size_per_client
        test_end = (i + 1) * test_size_per_client

        x_train = x_train_all[train_start:train_end]
        y_train = y_train_all[train_start:train_end]
        x_test = x_test_all[test_start:test_end]
        y_test = y_test_all[test_start:test_end]

        client_dir = os.path.join(output_dir, f"client{i+1}")
        os.makedirs(client_dir, exist_ok=True)

        np.save(os.path.join(client_dir, "x_train.npy"), x_train)
        np.save(os.path.join(client_dir, "y_train.npy"), y_train)
        np.save(os.path.join(client_dir, "x_test.npy"), x_test)
        np.save(os.path.join(client_dir, "y_test.npy"), y_test)

        print(f"\n📦 Client{i+1} - Training Samples: {len(x_train)}")
        print(f"🧪 Client{i+1} - Testing Samples: {len(x_test)}")

# Main execution
if __name__ == "__main__":
    images, labels = load_and_preprocess_images()
    split_and_save(images, labels)


KeyboardInterrupt: 