In [None]:
# Import required libraries
import os
from kaggle.api.kaggle_api_extended import KaggleApi
import zipfile
import json

# Set path to your Kaggle JSON file
kaggle_json_path = r"C:\Users\Admin\.kaggle\kaggle.json"  # Update this path

# Load Kaggle credentials manually from kaggle.json
with open(kaggle_json_path, 'r') as f:
    kaggle_credentials = json.load(f)

# Set Kaggle environment variables for the API to use
os.environ['KAGGLE_USERNAME'] = kaggle_credentials['username']
os.environ['KAGGLE_KEY'] = kaggle_credentials['key']

# Initialize and authenticate the Kaggle API
api = KaggleApi()
api.authenticate()
print("Kaggle API authenticated successfully!")

# Correct dataset path
dataset = 'paultimothymooney/breast-histopathology-images'

# Download the dataset
try:
    api.dataset_download_files(dataset, path='./', unzip=True)
    print(f"Dataset downloaded successfully!")
except Exception as e:
    print(f"Error during download: {e}")


# Unzip the dataset
downloaded_file_path = os.path.join('.', file_name)
if os.path.exists(downloaded_file_path):
    with zipfile.ZipFile(downloaded_file_path, 'r') as zip_ref:
        zip_ref.extractall("data")
    print("Dataset unzipped successfully!")


In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Set the path to the dataset directory
data_dir = './data/IDC_regular_ps50_idx5'

# Define image size and categories (malignant = 1, benign = 0)
IMG_SIZE = 50
categories = ["0", "1"]

# Prepare the dataset
def load_data(data_dir):
    data = []
    for category in categories:
        path = os.path.join(data_dir, category)
        class_num = categories.index(category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img))
                img_resized = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
                data.append([img_resized, class_num])
            except Exception as e:
                pass
    return data

# Load the data
data = load_data(data_dir)
print(f"Total images loaded: {len(data)}")

# Shuffle the data
import random
random.shuffle(data)

# Split features and labels
X = []
y = []

for features, label in data:
    X.append(features)
    y.append(label)

# Convert to NumPy arrays and normalize the images
X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
X = X / 255.0  # Normalize pixel values
y = np.array(y)

# One-hot encoding of labels
y = to_categorical(y, 2)

# Split into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print(f"Training set: {len(X_train)} images")
print(f"Validation set: {len(X_val)} images")
print(f"Test set: {len(X_test)} images")


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Build a CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(2, activation='softmax')  # Output layer with 2 classes (benign and malignant)
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()


In [None]:
# Train the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)


In [None]:
# Evaluate on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


In [None]:
import matplotlib.pyplot as plt

# Plot training accuracy and validation accuracy
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Plot training loss and validation loss
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


In [None]:
# Save the model
model.save('breast_cancer_detector.h5')


In [None]:
# Load the model
model = tf.keras.models.load_model('breast_cancer_detector.h5')


In [None]:
# Predict on a new image
img = cv2.imread('path_to_new_image.jpg')
img_resized = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
img_array = np.array(img_resized).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
img_array = img_array / 255.0

prediction = model.predict(img_array)
predicted_class = np.argmax(prediction)
print(f"Predicted class: {predicted_class}")  # 0 for benign, 1 for malignant
