In [1]:
from zipfile import ZipFile
from matplotlib import pyplot as plt 
from PIL import Image
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms

In [2]:
source_file = './dataset/brain-cancer.zip'

In [3]:
zip = ZipFile(source_file)

In [4]:
def get_label(filename):
    parts = filename.lower().split('/')
    result = None
    if 'yes' in parts:
        result = 1
    elif 'no' in parts:
        result = 0
    else:
        pass
    
    return result

def get_image_array(filename):
    new_size = (224, 224)
    img_data = Image.open(zip.open(filename))
    img_array = np.array(img_data.resize(new_size), np.int8)
    if len(img_array.shape) == 2: 
        # Replicar o canal para imagens em escala de cinza
        img_array = np.expand_dims(img_array, axis = -1)
        img_array = np.concatenate([img_array] * 3, axis = -1)

    # normalizar
    img_array_normalized = (img_array - img_array.mean()) / img_array.std()

    return img_array_normalized


assert get_label('brain_tumor_dataset/no/30 no.jpg') == 0
assert get_label('brain_tumor_dataset/NO/31 no.jpg') == 0
assert get_label('yes/Y95.jpg') == 1
assert get_label('brain_tumor_dataset/yes/Y155.JPG') == 1
assert get_label('brain_tumor_dataset/Y155.JPG') == None

In [5]:
# process
images = []
labels = []
data_results_dict = []

for filename in zip.namelist():
    
    # get label 0 = no, 1 = yes
    label = get_label(filename)

    # get image array    
    img_array = get_image_array(filename) 
    
    # build a dict with data
    data_results_dict.append(
        {'filename': filename, 
         'label': label, 
         'img_array': img_array}
    )

    # get X and y vars
    images.append(img_array)
    labels.append(label)


In [6]:
len(images), len(labels)

(506, 506)

In [7]:
# all images have 3 channels
size = []
for a in images:
    size.append(len(a.shape))

np.unique(size, return_counts=True)

(array([3]), array([506]))

In [10]:
# y is unbalanced
np.unique(labels, return_counts=True)

(array([0, 1]), array([196, 310]))

In [None]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init()