<a href="https://colab.research.google.com/github/slp22/deep-learning-project/blob/main/dl_diabetic_retinopathy_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### Deep Learning | Model

# Diabetic Retinopathy<a id='top'></a> 

## Imports

In [None]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os, shutil
import pandas as pd
import pathlib as Path
import pickle
import PIL
import random
import seaborn as sns
import sklearn as sk
import tensorflow as tf
import warnings
import zipfile

from glob import glob
from tensorflow import keras
from keras import backend as K
from keras.applications import VGG16
from keras.callbacks import EarlyStopping
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, InputLayer, GlobalAveragePooling2D
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.preprocessing import image as IMG
from keras.preprocessing.image import ImageDataGenerator
# from keras.utils import to_categorical

from PIL import Image
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

%pylab inline
%matplotlib inline
%config InlineBackend.figure_formats = ['retina']  # or svg
pd.set_option('display.max_colwidth', None)
sns.set(context='notebook', style='whitegrid')
warnings.filterwarnings('ignore')

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("Matplotlib version:", matplotlib.__version__)
print("Numpy version:", np.__version__)
print("Pandas version:", pd.__version__)
print("TensorFlow version:", tf.__version__)
print("Keras Version:", tf.keras.__version__)
print("Scikit-learn version:", sk.__version__)


In [None]:
# # https://colab.research.google.com/notebooks/pro.ipynb#scrollTo=23TOba33L4qf
# gpu_info = !nvidia-smi
# gpu_info = '\n'.join(gpu_info)
# if gpu_info.find('failed') >= 0:
#   print('Not connected to a GPU')
# else:
#   print(gpu_info)

## 1 | Research Design


* **Research Question:** How well can a neural network diagnose diabetic retinopathy from a retinal image?
* **Impact Hypothesis:** *The client, the [National Eye Institute](https://www.nei.nih.gov/), part of the National Institutes of Health, wants a model that can quickly identify [diabetic retinopathy](https://www.nei.nih.gov/learn-about-eye-health/eye-conditions-and-diseases/diabetic-retinopathy) in patients participating in early phase [clinical trials](https://iovs.arvojournals.org/article.aspx?articleid=2565675).*
* **Data source:** Personal Key Indicators of Heart Disease, n=319,795
* **Error metric:** Accruary for model iteratoins, precision and recall for final model

* **Data Dictionary:**
  * Classes = 5 stages of diabetic retinopathy):
    * **Normal eye**
    * **Mild** Nonproliferative Retinopathy: Microaneurysms are visbile, small areas of balloon-like swelling in the retina's tiny blood vessels.
    * **Moderate** Nonproliferative Retinopathy: Some blood vessels that nourish the retina are blocked.
    * **Severe** Nonproliferative Retinopathy: More blocked blood vessels, depriving several areas of the retina of blood supply; retina sends signals to the body to grow new blood vessels for nourishment.
    * **Proliferative** Retinopathy: Advanced stage; new blood vessels are abnormal and fragile; grow along the retina and along the surface of the clear, vitreous gel that fills the inside of the eye.


## 2 | Dataset: [Diabetic Retinopathy Retinal Images](https://www.kaggle.com/datasets/sovitrath/diabetic-retinopathy-2015-data-colored-resized)

### Download dataset from Kaggle

In [2]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle (1).json


{'kaggle.json': b'{"username":"sandralparedes","key":"746c1c655d0796973a3d3552fbcc97a5"}'}

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# assign to directory 
os.environ['KAGGLE_CONFIG_DIR'] = "/content"

In [5]:
# download dataset from kaggle
! kaggle datasets download -d sovitrath/diabetic-retinopathy-2015-data-colored-resized

diabetic-retinopathy-2015-data-colored-resized.zip: Skipping, found more recently modified local copy (use --force to force download)


In [6]:
# unzip kaggle file
zip_ref = zipfile.ZipFile('diabetic-retinopathy-2015-data-colored-resized.zip', 'r') #Opens the zip file in read mode
zip_ref.extractall('/tmp') #Extracts the files into the /tmp folder
zip_ref.close()

## 3 | Exploratory Data Analysis

### Data size and classes

In [7]:
# show number of classes and images
source_images_path = '/tmp/colored_images/colored_images/'
extracted_directories_path = os.listdir(source_images_path)

for image_directory in extracted_directories_path:
  print(image_directory, len(os.listdir(os.path.join(source_images_path, image_directory))))

Severe 873
Moderate 5292
Proliferate_DR 708
No_DR 25810
Mild 2443


In [None]:
filepath = '/tmp/colored_images/colored_images'
dataframe = {'image_path':[],'diagnosis':[]}

for diagnosis in os.listdir(filepath):
    print(diagnosis)
    if diagnosis != 'colored_images':        
        for image in os.listdir(filepath +"/"+diagnosis):
            if image != 'Dataset':
                dataframe['image_path'].append(filepath +"/"+diagnosis+"/"+ image)
                dataframe['diagnosis'].append(diagnosis)
dataframe = pd.DataFrame(dataframe)  
dataframe.info()
dataframe.head(2)

In [None]:
# distribution of five diagnosis classes in order of severity of disease

plt.figure(figsize=(15,6))
sns.barplot(dataframe.diagnosis.value_counts().index,
            dataframe.diagnosis.value_counts(),
            palette=None)
plt.title("Distribution of Diabetic Retinopahty Diagnosis Classes")
plt.xlabel("Diagnosis")
plt.ylabel("Frequency");


### Preview data

In [None]:
# view one healthy eye image

path = '/tmp/colored_images/colored_images/No_DR/10003_left.png'

healthy_image = tf.keras.preprocessing.image.load_img(
   path,
   grayscale=False, 
   color_mode="rgb", 
  #  target_size=(256, 256),
   interpolation="nearest")

print('No Diabetic Retinopthy')
print('Datatype:', type(healthy_image))
healthy_image 


In [None]:
# view one proliferate DR image (most advanced stage of disease)

path = '/tmp/colored_images/colored_images/Proliferate_DR/10017_left.png'

proliferate_image = tf.keras.preprocessing.image.load_img(
   path,
   grayscale=False, 
   color_mode="rgb", 
  #  target_size=(256, 256), 
   interpolation="nearest")

print('Proliferate Diabetic Retinopthy')
print('Datatype:', type(proliferate_image))
proliferate_image 


### **Data Summary: n = 35,126;  unbalanced classes**

## 4 | Validation

### Create test and train folders, split data, and move into class folders

In [None]:
# set up for train/test split

# DO NOT RUN ##### shutil.rmtree('/tmp/raw_data/train/')
# To re-run uncomment here

temp_path = '/tmp'
raw_data_dir_name = 'raw_data'

train_data_dir_name = 'train'
test_data_dir_name = 'test'
raw_data_directories = [train_data_dir_name, test_data_dir_name]

images_path =  os.path.join(temp_path, raw_data_dir_name)
tmp_directories = os.listdir(temp_path)

def build_raw_data_directory(parent, directory):
  dir_path = os.path.join(parent, directory)
  os.mkdir(dir_path)
  for dir in extracted_directories_path:
    os.mkdir(os.path.join(dir_path, dir))

if raw_data_dir_name not in tmp_directories:
  os.mkdir(images_path)
  for directory in raw_data_directories:
    build_raw_data_directory(images_path, directory)
else:
  print(raw_data_dir_name, " already exist")
  images_path_directories = os.listdir(images_path)
  for directory in raw_data_directories:
    if directory not in images_path_directories:
      build_raw_data_directory(images_path, directory)

In [None]:
## split data into train/test folders
## To re-run uncomment here

distribution = 0.2

raw_data_path = os.path.join(temp_path, raw_data_dir_name)
test_data_path_root = os.path.join(raw_data_path, test_data_dir_name)
train_data_path_root = os.path.join(raw_data_path, train_data_dir_name)

data_dirs = os.listdir(source_images_path)

for dir in data_dirs:
  path = os.path.join(source_images_path, dir)
  test_data_path = os.path.join(test_data_path_root, dir)
  train_data_path = os.path.join(train_data_path_root, dir)
  images = os.listdir(path)
  print(path, test_data_path)
  for image in images:
    random_number = random.uniform(0, 1)
    is_test_image = random_number < distribution
    source_image_path = os.path.join(path, image)
    # shutil.copy(src, dst)
    if is_test_image:
      shutil.copy(source_image_path, test_data_path)
      # print(source_image_path, "goes to test", test_data_path)
    else:
      shutil.copy(source_image_path, train_data_path)
      # print(source_image_path, "goes to train", train_data_path)

test_data_path_root = os.path.join(raw_data_path, test_data_dir_name)
train_data_path_root = os.path.join(raw_data_path, train_data_dir_name)

test_dirs = os.listdir(test_data_path_root)
train_dirs = os.listdir(train_data_path_root)

for dir in test_dirs:
  path = os.path.join(test_data_path_root, dir)
  files = os.listdir(path)
  print(path, len(files))

for dir in train_dirs:
  path = os.path.join(train_data_path_root, dir)
  files = os.listdir(path)
  print(path, len(files))

### **Train (80%) and test (20%) folders**

## 5 | Baseline 

### Non-Deep Learning Model: Logistic Regression

#### Set up data for logistic regression
Code adapted from [sdblass](https://github.com/sdblass/Metis_coursework/blob/master/6_Deep_Learning/2_baseline_model.ipynb)

In [21]:
# create raw_data_small set to use in logistic regression model
original_raw_data_path = '/tmp/raw_data'
raw_data_copy = '/tmp/raw_data_small'
shutil.copytree(original_raw_data_path, raw_data_copy)

raw_data_copy_test_path = '/tmp/raw_data_small/test'
raw_data_copy_train_path = '/tmp/raw_data_small/train'

# cull to keep 200 images from each class in raw_data_small
paths_to_reduce_images = [raw_data_copy_test_path, raw_data_copy_train_path]

for path in paths_to_reduce_images:
  dir_names = os.listdir(path)
  for dir_name in dir_names:
    images_path = os.path.join(path, dir_name)
    files = os.listdir(images_path)
    for index, file in enumerate(files):
      if index >= 200:
        file_path = os.path.join(images_path, file)
        os.remove(file_path)

In [88]:
root = '/tmp/raw_data_small/train'
retinas = os.listdir(root)
retinas = [retina for retina in retinas if retina[0]!='.']

In [90]:
# convert image to 1D vector
test_img_path = os.listdir(root+f'/{retinas[0]}')[0]
test_img_path = root+f'/{retinas[0]}' + f'/{test_img_path}'
test_img = Image.open(test_img_path)

In [None]:
# num pixels
dimension = np.array(test_img).reshape(-1).shape[0]
dimension

In [92]:
# function to count images
def image_count(num_diagnoses, root): 
  num_images = 0
  for i, retina in enumerate(retinas):
    if i == num_diagnoses: break
    num_images += len(os.listdir(root + f'/{retina}'))
  return num_images

In [None]:
# num images in raw_data_small; 5 classes (diagnoses) 
image_count(5, root) 

In [None]:
# # initialize 2D matrix with dimensions equal to num of images times num of pixels
# image_repo = np.zeros([image_count(5, root), dimension])
# image_repo

In [97]:
# function to vectorize images in raw_data_small/train
def img_vectorization(num_diagnoses, root):
  image_repo = np.zeros([image_count(num_diagnoses, root), dimension])
  count = 0
  diagnosis = []
  for i, retina in enumerate(retinas):
    images = os.listdir(root + f'/{retina}')
    for image in images:
      img = Image.open(root + f'/{retina}' + f'/{image}')
      row = np.array(img).reshape(-1)
      image_repo[count, :] = row
      diagnosis.append(retina)
      img.close()
      count += 1
    if i == num_diagnoses - 1: 
      return image_repo, diagnosis
  return image_repo, diagnosis
# print(image_repo.shape)

In [99]:
X_train, y_train_dx = img_vectorization(5, root)

In [100]:
X_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [None]:
print('X train shape:', X_train.shape)
print('y train (dx) length:', len(y_train_dx))

In [103]:
# vectorize raw_data_small/test data
root_test = '/tmp/raw_data_small/test'
X_test, y_test_dx = img_vectorization(5, root_test)

In [108]:
print(X_test)
print(X_test.shape)
print(len(y_test_dx))

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
(1000, 150528)
1000


In [113]:
# scale data

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

type(X_train)

numpy.ndarray

#### PCA

In [None]:
# PCA 2 components
pca = PCA(n_components=2)
X_train_2PC = np.asarray(pca.fit_transform(X_train))
X_test_2PC = np.asarray(pca.transform(X_test))

sns.scatterplot(x=X_train_2PC[:, 0], 
                y=X_train_2PC[:, 1],
                hue=y_train_dx, 
                alpha=.3,
                palette=sns.color_palette("colorblind", 5))
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.title("Diabetic Retinopathy Plotted with PCA")
plt.legend(loc='upper right');


In [None]:
# logistic regression baseline with PCA
baseline_lr = LogisticRegression(multi_class = 'multinomial', solver = 'lbfgs')
baseline_lr.fit(X_train_2PC, y_train_dx)

score = baseline_lr.score(X_test_2PC, y_test_dx)
print('Logistic Regresion PCA Baseline Score = ', score)

#### Truncated SVD


In [115]:
# truncated SVD
svd = TruncatedSVD(n_components=2)
X_train_2d = svd.fit_transform(X_train)
X_test_2d = svd.transform(X_test)

print('X_train_2d', X_train_2d.shape)
print('X_test_2d', X_test_2d.shape)

In [None]:
#plot TruncatedSVD
sns.set(style='white', rc={"figure.figsize":(10, 7)}, font_scale=1.3)
sns.scatterplot(x=X_train_2d[:, 0], 
                y=X_train[:, 1],
                hue=y_train_dx 
                alpha=.3,
                palette=sns.color_palette("colorblind", 5))
plt.xlabel("SVD Component 1")
plt.ylabel("SVD Component 2")
plt.title("Diabetic Retinopathy Plotted with SVD");
plt.legend(loc='upper right');

In [121]:
# logistic regression baseline with SVD
baseline_lr = LogisticRegression(multi_class = 'multinomial', solver = 'lbfgs')
baseline_lr.fit(X_train_2d, y_train_dx)

score = baseline_lr.score(X_test_2d, y_train_dx)
print('Logistic Regresion SVD Baseline Score = ', score)

LogisticRegression(multi_class='multinomial')

### **Logistic regression scores poorly on accruacy for classifying diabetic retinopathy based on retinal images**

## 6 | Deep Learning Models

In [None]:
# convert label to a 2D array with binary columnns for each class (one-hot encoding)
y_train_dx_cat = to_categorical(y_train_dx)
y_train_dx_cat

### 6.1 Base CNN

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, GlobalAveragePooling2D, InputLayer
'''
 In this network structure, note that we follow the typical CNN heuristic of 
 gradually reducing width and height dimenions over time with max pooling
 (typically by a factor of 2), but increasing the filter depth dimension 
 to find increasingly specific patterns. These models are typically compromised 
 of a series of convolutional blocks followed by a flattening operation and 
 a series of fully connected layers at the terminus.
'''

NN = Sequential()

NN.add(InputLayer(input_shape=X_train.shape[1:]))

# Conv block 1.  You can add more conv steps to
# each block to increase model capacity.
NN.add(Conv2D(filters=10, kernel_size=3, activation='relu', padding='same'))
# NN.add(Conv2D(filters=16, kernel_size=3, activation='relu', padding='same'))
NN.add(MaxPooling2D())

# Conv block 2 - note we increase filter dimension as we move
# further into the network. You can add more conv steps to
# each block to increase model capacity.
NN.add(Conv2D(filters=20, kernel_size=3, activation='relu', padding='same'))
# NN.add(Conv2D(filters=16, kernel_size=3, activation='relu', padding='same'))
NN.add(MaxPooling2D())

# Conv block 3 - The conv blocks should be ended with either a flatten
# layer or a global pooling layer. These transform the 2D layers to 1D
# to match the following dense layers.
NN.add(Conv2D(filters=30, kernel_size=3, activation='relu', padding='same'))

NN.add(GlobalAveragePooling2D())

# Fully connected block - flattening followed by dense and output layers
# NN.add(Flatten())
NN.add(Dense(20, activation='relu'))
NN.add(Dense(10, activation='softmax'))  # 10 target classes

NN.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy'],
)
NN.summary()
NN.fit(X_train, y_train_cat, epochs=5, verbose=1, validation_split=0.25,
       callbacks=[
           keras.callbacks.ModelCheckpoint(
               'models/mnist.{epoch:02d}-{val_loss:.2f}.hdf5',
               save_best_only=True)
       ])  # track progress as we fit

In [None]:
preds = np.argmax(NN.predict(X_test), axis=1)
accuracy_score(y_test, preds)

### 6.2 Apply transfer learning

#### MobileNet

In [None]:
from tensorflow.keras.applications import mobilenet_v2
from tensorflow.keras.preprocessing import image


def prepare_image(img_path):

    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = mobilenet_v2.preprocess_input(x)
    return x

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
# This is all we need to load and use the full pretrained model!
model = mobilenet_v2.MobileNetV2(weights='imagenet',)

x = prepare_image('dog.jpeg') #update this path if your image folder is in a different directory than the notebook!
# df = pd.read_csv(io.BytesIO(
#     uploaded['dog.jpeg']))
# df.head()
out = model.predict(x)

print('Predicted:', mobilenet_v2.decode_predictions(out))

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
x = prepare_image('sloth.jpg')

out = model.predict(x)

print('Predicted:', mobilenet_v2.decode_predictions(out))

In [None]:
preds = np.argmax(NN.predict(X_test), axis=1)
accuracy_score(y_test, preds)

In [None]:
model.summary()

#### Transfer learning on pretrained base

In [None]:
from tensorflow.keras.models import Model

# note we exclude the final dense layers by setting include_top=False,
# and add new ones to train from scratch below
base_model = mobilenet_v2.MobileNetV2(weights='imagenet', include_top=False, input_shape=(224,224,3)) 
 
# Freeze convolutional layers
for layer in base_model.layers:
    layer.trainable = False    

# Establish new fully connected block
x = base_model.output
x = Flatten()(x) # flatten from convolution tensor output 
x = Dense(100, activation='relu')(x) # number of layers and units are hyperparameters, as usual
x = Dense(50, activation='relu')(x)
predictions = Dense(5, activation='softmax')(x) # should match # of classes predicted

# define formal model object to train and compile it as usual
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# i.e. if we had training images and our own labels, we could run
model.fit(X_train,y_train)

## 7 | Model: Diagnosiing Diabetic Retinopathy

### Test Model

### Confusion Matrix

### Classification Report 

### Predictions

#  code snippets

### https://github.com/sdblass/Metis_coursework/blob/master/6_Deep_Learning/3_Basic_CNN.ipynb

### Code adapted from [ngoodby](https://github.com/ngoodby/Metis-Deep-Learning-Project/blob/master/model_creation.ipynb)

### Code adapted from [PurpleGrace](https://github.com/PurpleGrace/Deep_Learning_Tools_Image_Classification/blob/main/code/1.EDA_Sample_Images.ipynb)

In [None]:
def img_to_array(img_path):
    img=  IMG.load_img(img_path,target_size=[150,150])
    x = IMG.img_to_array(img)
    return x

In [None]:
filepath = 'work_data'

work_data_df = {'image_path':[],'category':[],'class':[]}

for class_ in os.listdir(filepath):
    if class_ != '.DS_Store':  
        for category in os.listdir(filepath +"/"+class_):
            if category != '.DS_Store':        
                for image in os.listdir(filepath +"/"+class_ +"/"+category):
                    if image != '.DS_Store':
                        work_data_df['image_path'].append(filepath +"/"+class_+"/"+category+"/"+ image)
                        work_data_df['category'].append(category)
                        work_data_df['class'].append(class_)
work_data_df = pd.DataFrame(work_data_df)    
work_data_df.head()

In [None]:
work_data_df.to_csv('work_data_df')


In [None]:
X_train = []
X_val = []
X_test = []

data_train = work_data_df[work_data_df['class'] == 'train']
y_train = data_train.category.astype("category").cat.codes
y_train_hotcode = keras.utils.to_categorical(y_train)
for index in data_train.index:
    X_train.append(img_to_array(data_train.loc[index,'image_path']))
    
data_val = work_data_df[work_data_df['class'] == 'validation']
y_val = data_val.category.astype("category").cat.codes
y_val_hotcode = keras.utils.to_categorical(y_train)
for index in data_val.index:
    X_val.append(img_to_array(data_val.loc[index,'image_path']))    
    
data_test = work_data_df[work_data_df['class'] == 'test']
y_test = data_test.category.astype("category").cat.codes
y_test_hotcode = keras.utils.to_categorical(y_test)
for index in data_test.index:
    X_test.append(img_to_array((data_test.loc[index,'image_path'])))   
    
X_train = np.array(X_train)
X_test = np.array(X_test)
X_val = np.array(X_val)

In [None]:
### Save X_digit, y_digit
image_data_array = (X_train, X_test,X_val,y_train,y_test,y_val)
with open('image_data_array.pickle','wb') as file:
    pickle.dump(image_data_array,file)

In [None]:
print(f"We have {len(X_train)} train dataset")
print(f"We have {len(X_val)} train dataset")
print(f"We have {len(X_test)} test dataset")

In [None]:
global category_map
category_int_map = dict(zip(data_train.category,y_train))
int_category_map = dict(zip(y_train,data_train.category))
int_category_map

In [None]:
category_int_map


In [125]:
img_width, img_height = 150, 150

train_data_dir = '/tmp/raw_data/test'
validation_data_dir = '/tmp/raw_data/test'
nb_train_samples = 2000
nb_validation_samples = 800
epochs = 50
batch_size = 16

In [126]:
if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)

In [127]:
model = Sequential()
model.add(InputLayer(input_shape=X_train.shape[1:])))

model.add(Conv2D(1000, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(750, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))

# model.add(Dropout(0.5))

model.add(Dense(1))
model.add(Activation('sigmoid'))


In [None]:
model.compile(optimizer='sgd',
              optimizer='mse',
              metrics=[tf.keras.metrics.Recall(), tf.keras.metrics.Precision()]

# model.compile(loss='binary_crossentropy',
#               optimizer='rmsprop',
#               metrics=['precision', 'recall'])

In [129]:
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        '/tmp/raw_data/train',  
        target_size=(150, 150), 
        batch_size=batch_size,
        class_mode='binary')  

validation_generator = test_datagen.flow_from_directory(
        '/tmp/raw_data/test',
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='binary')

Found 33725 images belonging to 5 classes.
Found 12782 images belonging to 5 classes.


In [130]:
model.fit_generator(
        train_generator,
        steps_per_epoch=nb_train_samples // batch_size,
        epochs=5,
        validation_data=validation_generator,
        validation_steps=nb_validation_samples // batch_size
        )

model.save_weights('first_try.h5')  

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [27]:
class_names = ['Proliferate_DR', 'Mild','No_DR', 'Severe', 'Moderate']
class_names_label = {class_name:i for i, class_name in enumerate(class_names)}
size = (224,224)
num_classes = len(class_names)

In [None]:
def load_data(class_names):
    
    datasets = ['/tmp/raw_data/train', 
                '/tmp/raw_data/test']
    output = []
    
    # Iterate through training and test set folders
    for dataset in datasets:

        images = []
        labels = []
        folders = os.listdir(dataset)
        folders = [value for value in folders if value != ".DS_Store"]
        
        print("Loading {}".format(dataset))
        
        # Iterate through each folder corresponding to a category
        for folder in folders:
            label = class_names_label[folder]
            
            # Iterate through each image in the folder
            for file in tqdm(os.listdir(os.path.join(dataset, folder))):

                # Get the path name of the image
                img_path = os.path.join(os.path.join(dataset, folder), file)

                # Open and resize the image
                image = cv2.imread(img_path)
#                 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#                 image = cv2.resize(image, size) 

                # Append the image and its corresponding label to the output
                images.append(image)
                labels.append(label)
                
        images = np.array(images, dtype = None)
        labels = np.array(labels, dtype = None)   
        
        output.append((images, labels))

    return output

In [None]:
(train_images, train_labels), (test_images, test_labels) = load_data(class_names)

In [None]:
train_images = train_images.flatten()
print('train images type:', type(train_images))
print('train images shape:', train_images.shape)

In [None]:
test_images = test_images.flatten()
print('test images type:', type(test_images))
print('test images shape:', test_images.shape)

### Code source: https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d 