In [10]:
import os, glob
import gdown # GoogleDrive Downloader
import zipfile
import random
import shutil

from PIL import Image

!pip install landingai
from landingai.pipeline.frameset import Frame
from landingai.predict import Predictor

In [1]:
# Download the dataset
url = 'YOUR_DRIVE_LINK_WHERE_DATA_FOLDER_IS-UPLOADED'
data_fp = os.path.join(os.getcwd(), 'data')
try: 
    os.makedirs(data_fp, exist_ok=False)
except OSError:
    os.system("rm -rf " + data_fp)
    os.makedirs(data_fp, exist_ok=False)
zip_fp = os.path.join(os.getcwd(), 'data', 'CMPT340_Project.zip')
try:
    gdown.download(url, zip_fp, quiet=False, fuzzy=True)
except Exception as e:
    print("Download Failed: " +e)
    os.remove(zip_fp)

TypeError: can only concatenate str (not "NameError") to str

In [None]:
# Decompress the ZIP
with zipfile.ZipFile(zip_fp, 'r') as zip_ref:
    zip_ref.extractall(os.path.join(os.getcwd(), 'data'))

In [None]:
# Set the paths
project_folder = os.path.join(os.getcwd(), 'data', 'CMPT340_Project')
dataset_folder = os.path.join(project_folder, 'BreastCancerDataset')
classification_folder = os.path.join(project_folder, 'dataset-classification')

# Validation set sizes
benign_validation_size = 10
malignant_validation_size = 15
normal_validation_size = 5

# Function to copy files from source to destination
def copy_files(source_folder, destination_folder, file_list):
    for file_name in file_list:
        source_path = os.path.join(source_folder, file_name)
        destination_path = os.path.join(destination_folder, file_name)
        shutil.copy(source_path, destination_path)

# Create the validation set folders
validation_folder = os.path.join(classification_folder, 'validation')

# Remove the existing validation folder if it exists
if os.path.exists(validation_folder):
    shutil.rmtree(validation_folder)

os.makedirs(validation_folder, exist_ok=True)

# Get the list of files in each class
benign_files = [file for file in os.listdir(os.path.join(dataset_folder, 'benign')) if '_mask' not in file]
malignant_files = [file for file in os.listdir(os.path.join(dataset_folder, 'malignant')) if '_mask' not in file]
normal_files = [file for file in os.listdir(os.path.join(dataset_folder, 'normal')) if '_mask' not in file]

# Get the list of files in the classification folder
classification_files = set(os.listdir(classification_folder))

# Randomly select validation files
benign_validation_files = random.sample(set(benign_files) - classification_files, min(benign_validation_size, len(benign_files)))
malignant_validation_files = random.sample(set(malignant_files) - classification_files, min(malignant_validation_size, len(malignant_files)))
normal_validation_files = random.sample(set(normal_files) - classification_files, min(normal_validation_size, len(normal_files)))

# Copy files to the validation set folders
copy_files(os.path.join(dataset_folder, 'benign'), validation_folder, benign_validation_files)
copy_files(os.path.join(dataset_folder, 'malignant'), validation_folder, malignant_validation_files)
copy_files(os.path.join(dataset_folder, 'normal'), validation_folder, normal_validation_files)

# Print a message indicating that the validation set creation was successful
print("Validation set creation completed successfully!")


Validation set creation completed successfully!


since Python 3.9 and will be removed in a subsequent version.
  benign_validation_files = random.sample(set(benign_files) - classification_files, min(benign_validation_size, len(benign_files)))
since Python 3.9 and will be removed in a subsequent version.
  malignant_validation_files = random.sample(set(malignant_files) - classification_files, min(malignant_validation_size, len(malignant_files)))
since Python 3.9 and will be removed in a subsequent version.
  normal_validation_files = random.sample(set(normal_files) - classification_files, min(normal_validation_size, len(normal_files)))


# Run inference on the validation set with the trained model deployed on LandingAI

In [None]:
# !pip install landingai 

In [None]:

# Set up Landing AI model
deployed_model_endpoint = "Your Endpoint ID"  # Replace with your actual Landing AI model endpoint
_api_key = "Your API"  # Replace with your actual Landing AI API key
predictor = Predictor(endpoint_id=deployed_model_endpoint, api_key=_api_key)

# Constants
BENIGN = "benign"
MALIGNANT = "malignant"
NORMAL = "normal"

def get_truth_label_from_name(filepath):
    # Adjust this function to match your file naming conventions and classes
    if 'benign' in filepath:
        return BENIGN
    elif 'malignant' in filepath:
        return MALIGNANT
    elif 'normal' in filepath:
        return NORMAL
    else:
        raise Exception("Invalid filepath")

# Run inferences on Landing AI
output_path = os.path.join(os.getcwd(), 'data/CMPT340_Project/dataset-classification', 'validation')   # Replace with the path to your validation set folder
output_path 

'/Users/sanyamsingh/Desktop/Courses/CMPT340/Project/breastCancer/data/CMPT340_Project/dataset-classification/validation'

In [None]:
test_images = sorted(glob.glob(os.path.join(output_path, '*.png')))
frames = [Frame.from_image(im, metadata={"truth": get_truth_label_from_name(im)}) for im in test_images]

correct_counts = {
    BENIGN: 0,
    MALIGNANT: 0,
    NORMAL: 0,
}

for f in frames:
    f.run_predict(predictor)
    truth_label = f.metadata["truth"]
    # prediction = {score: float, label_name:str, label_index: int}
    print("Truth: " + truth_label + "\tPrediction: " + f.predictions[0].label_name)
    if f.predictions[0].label_name == truth_label:
        correct_counts[truth_label] += 1
    f.overlay_predictions()

    
print("Accuracy for BENIGN: %.2f%%" % (correct_counts[BENIGN] / benign_validation_size*100))
print("Accuracy for MALIGNANT: %.2f%%" % (correct_counts[MALIGNANT] / malignant_validation_size*100))
print("Accuracy for NORMAL: %.2f%%" % (correct_counts[NORMAL] /normal_validation_size*100))

Truth: benign	Prediction: benign
Truth: benign	Prediction: benign
Truth: benign	Prediction: malignant
Truth: benign	Prediction: benign
Truth: benign	Prediction: benign
Truth: benign	Prediction: normal
Truth: benign	Prediction: benign
Truth: benign	Prediction: benign
Truth: benign	Prediction: benign
Truth: benign	Prediction: benign
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: benign
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: normal	Prediction: normal
Truth: normal	Prediction: normal
Truth: normal	Predi

# Classification after Overlaying Mask images

In [None]:
# Set the paths
project_folder = os.path.join(os.getcwd(), 'data', 'CMPT340_Project')
dataset_folder = os.path.join(project_folder, 'BreastCancerDataset')
classification_overlayed_folder = os.path.join(project_folder, 'dataset-classification-overlayed')

# Validation set sizes
benign_validation_size = 10
malignant_validation_size = 15
normal_validation_size = 5

# Function to overlay images and masks, resize if needed, and save the result
def overlay_and_save(image_path, mask_path, output_folder, output_filename):
    try:
        # Check if both image and mask files exist
        if os.path.exists(image_path) and os.path.exists(mask_path):
            # Open the actual image and mask image
            image = Image.open(image_path)
            mask = Image.open(mask_path)

            # Ensure both images have the same color mode
            if image.mode != mask.mode:
                mask = mask.convert(image.mode)

            # Resize the images if their sizes don't match
            if image.size != mask.size:
                image = image.resize(mask.size)

            # Overlay the image with the mask
            overlayed = Image.blend(image, mask, alpha=0.5)

            # Save the overlayed image
            overlayed.save(os.path.join(output_folder, output_filename))
        else:
            # Print a message if files are not found
            print(f"File not found for: {image_path} or {mask_path}. Skipping...")
    except Exception as e:
        print(f"An error occurred for: {image_path} or {mask_path}. Error: {str(e)}")

# Create the validation set folder
validation_folder = os.path.join(classification_overlayed_folder, 'validation')

# Remove the existing validation folder if it exists
if os.path.exists(validation_folder):
    shutil.rmtree(validation_folder)

os.makedirs(validation_folder, exist_ok=True)

# Iterate through the subdirectories (benign, malignant, normal)
for label in ['benign', 'malignant', 'normal']:
    label_dir = os.path.join(dataset_folder, label)
    if os.path.isdir(label_dir):
        # Get the list of files in each class
        files = [file for file in os.listdir(label_dir) if '_mask' not in file]
        # Randomly select validation files
        validation_files = random.sample(files, min(eval(f"{label}_validation_size"), len(files)))
        for image_filename in validation_files:
            image_path = os.path.join(label_dir, image_filename)
            # Construct the mask file path based on the naming convention
            mask_filename = image_filename.replace('.png', '_mask.png')
            mask_path = os.path.join(label_dir, mask_filename)

            # Overlay and save the image
            overlay_and_save(image_path, mask_path, validation_folder, image_filename)

# Print a message indicating that the validation set creation was successful
print("Validation set creation completed successfully!")


Validation set creation completed successfully!


# Run inference on validation set of overlayed images with the trained model deployed from LandingAI 

In [None]:
# Set up Landing AI model
deployed_model_endpoint = "a9fe4fbd-671c-413c-837a-cee20cc79135"  # Replace with your actual Landing AI model endpoint
_api_key = "land_sk_BBSVmLozWw0Pe1VVBVzWPwSsNSRESbPeKXNyaQNSnb9WzGwv8a"  # Replace with your actual Landing AI API key
predictor = Predictor(endpoint_id=deployed_model_endpoint, api_key=_api_key)

# Constants
BENIGN = "benign"
MALIGNANT = "malignant"
NORMAL = "normal"

def get_truth_label_from_name(filepath):
    # Adjust this function to match your file naming conventions and classes
    if 'benign' in filepath:
        return BENIGN
    elif 'malignant' in filepath:
        return MALIGNANT
    elif 'normal' in filepath:
        return NORMAL
    else:
        raise Exception("Invalid filepath")

# Run inferences on Landing AI
output_path = os.path.join(os.getcwd(), 'data/CMPT340_Project/dataset-classification-overlayed', 'validation')   # Replace with the path to your validation set folder
output_path 

'/Users/sanyamsingh/Desktop/Courses/CMPT340/Project/data/CMPT340_Project/dataset-classification-overlayed/validation'

In [None]:
test_images = sorted(glob.glob(os.path.join(output_path, '*.png')))
frames = [Frame.from_image(im, metadata={"truth": get_truth_label_from_name(im)}) for im in test_images]

correct_counts = {
    BENIGN: 0,
    MALIGNANT: 0,
    NORMAL: 0,
}

for f in frames:
    f.run_predict(predictor)
    truth_label = f.metadata["truth"]
    # prediction = {score: float, label_name:str, label_index: int}
    print("Truth: " + truth_label + "\tPrediction: " + f.predictions[0].label_name)
    if f.predictions[0].label_name == truth_label:
        correct_counts[truth_label] += 1
    f.overlay_predictions()

    
print("Accuracy for BENIGN: %.2f%%" % (correct_counts[BENIGN] / benign_validation_size*100))
print("Accuracy for MALIGNANT: %.2f%%" % (correct_counts[MALIGNANT] / malignant_validation_size*100))
print("Accuracy for NORMAL: %.2f%%" % (correct_counts[NORMAL] /normal_validation_size*100))

Truth: benign	Prediction: benign
Truth: benign	Prediction: benign
Truth: benign	Prediction: benign
Truth: benign	Prediction: benign
Truth: benign	Prediction: benign
Truth: benign	Prediction: benign
Truth: benign	Prediction: benign
Truth: benign	Prediction: benign
Truth: benign	Prediction: benign
Truth: benign	Prediction: benign
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: benign
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: malignant	Prediction: malignant
Truth: normal	Prediction: normal
Truth: normal	Prediction: normal
Truth: normal	Predicti