In [None]:
#Step by step

import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import cv2
import os


In [None]:
# Load the CSV file
csv_path = '/content/drive/MyDrive/datacombined.xlsx'
df = pd.read_csv(csv_path)

# Inspect the CSV structure
print(df.head())


In [None]:
import pandas as pd

# Load the Excel file and specify the sheet names
excel_path = '/content/drive/MyDrive/datacombined.xlsx'
df_non_cancer = pd.read_excel(excel_path, sheet_name=0)  # Sheet 1: Non-cancer
df_cancer = pd.read_excel(excel_path, sheet_name=1)      # Sheet 2: Cancer

# Add a 'label' column to each dataframe (0 for non-cancer, 1 for cancer)
df_non_cancer['label'] = 0
df_cancer['label'] = 1

# Concatenate both dataframes into a single dataframe
df = pd.concat([df_non_cancer, df_cancer], ignore_index=True)

# Verify the data
print(df.head())
print(df['label'].value_counts())  # Should show counts of 0s and 1s


In [None]:
# Replace ".dcm" with ".jpg" in the 'InputFileName' column
df["InputFileName"] = df["InputFileName"].str.replace(".dcm", ".jpg", regex=False)


In [None]:
import cv2
import numpy as np
import os

# Path to images in Google Drive
image_folder = '/content/drive/MyDrive/dicom_images_converted'

def load_images(df, image_folder, img_size=(224, 224)):
    images = []
    labels = []
    for _, row in df.iterrows():
        img_path = os.path.join(image_folder, row['InputFileName'])
        if os.path.exists(img_path):
            img = cv2.imread(img_path)
            img = cv2.resize(img, img_size)  # Resize images to the target size
            img = img / 255.0  # Normalize pixel values
            images.append(img)
            labels.append(row['label'])
        else:
            print(f"Image not found: {img_path}")  # For troubleshooting
    return np.array(images), np.array(labels)

# Load images and labels
images, labels = load_images(df, image_folder)
print(f"Loaded {len(images)} images.")


import os
import cv2
import numpy as np
import pandas as pd

# Path to the main images directory in Google Drive
image_folder = '/content/drive/MyDrive/dicom_images_converted'

def find_image_path(image_folder, filename):
    # Walk through all subdirectories to find the image file
    for root, _, files in os.walk(image_folder):
        if filename in files:
            return os.path.join(root, filename)
    return None

def load_images(df, image_folder, img_size=(224, 224)):
    images = []
    labels = []
    for _, row in df.iterrows():
        img_path = find_image_path(image_folder, row['InputFileName'])
        if img_path:
            img = cv2.imread(img_path)
            if img is not None:
                img = cv2.resize(img, img_size)  # Resize images to the target size
                img = img / 255.0  # Normalize pixel values
                images.append(img)
                labels.append(row['label'])
            else:
                print(f"Could not load image: {img_path}")
        else:
            print(f"Image not found for: {row['InputFileName']}")  # For troubleshooting
    return np.array(images), np.array(labels)

# Example usage with DataFrame containing 'InputFileName' and 'label' columns
# Assuming df has already been defined with necessary columns
images, labels = load_images(df, image_folder)
print(f"Loaded {len(images)} images.")


In [None]:
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42, stratify=labels)


In [None]:
# Load the base model
base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=Input(shape=(224, 224, 3)))

# Add custom layers on top of the base model
x = base_model.output
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=output)

# Freeze the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest"
)

# Train the model
batch_size = 32
epochs = 10

history = model.fit(
    datagen.flow(X_train, y_train, batch_size=batch_size),
    validation_data=(X_val, y_val),
    steps_per_epoch=len(X_train) // batch_size,
    epochs=epochs
)


In [None]:
# Predictions on validation set
y_pred = (model.predict(X_val) > 0.5).astype("int32")

# Classification Report
print(classification_report(y_val, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_val, y_pred)
print("Confusion Matrix:\n", cm)


In [None]:
model.save('/content/drive/MyDrive/breast_cancer_model.h5')
