## Setting up Connection between Drive and Google Colab

In [1]:
# Load the Drive helper and mount
from google.colab import drive

# Mounting the drive -- This will prompt for authorization.
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [2]:
!ls "/content/drive/My Drive"

 Aadhar.pdf
'AI Saturdays Participant Guide (Public).gdoc'
'All About Programming'
'Ambassador Resource Kit'
'Arpita (Apu)'
'Artificial Intelligence Chapter - Jabalpur .gform'
 BRIAN-TRACY
 Building+your+Deep+Neural+Network+-+Step+by+Step+v8.ipynb
 Certificates
 chatbot.ipynb
 cnn_cifar10_clean.ipynb
'Colab Notebooks'
'cover letter srajan.docx'
 CV_Latest.pdf
 CV-SRAJAN.compressed.pdf
 CV-SRAJAN.docx
 CV-SRAJAN-Latest.pdf
'Cycle2 - Ambassador Resource Kit'
'Deep Learning and Neural Nets.pptx'
'Driving License.pdf'
'Event Feedback.gform'
'https:  www (1).lin.txt'
'https:  www.lin.txt'
 IMG1.jpg
 IMG_20180228_182329.jpg
 IMG_20180228_182447.jpg
 IMG_20180522_013222.jpg
 IMG_20180706_205024_487.jpg
 Keras-task.ipynb
 lesson1.ipynb
'Like It Happened Yesterday - Ravinder Singh.pdf'
'Logistic Regression with a Neural Network mindset.ipynb'
'Machine Learning and Python '
 Marksheets
'[merrells.space] Udemy - machinelearning'
'ML Formulae'
'Mumbai Cha Raja - Ganpati Song.mp3'
'Perian DS and ML

## Locating Data on Drive

In [0]:
import os

os.chdir("/content/drive/My Drive/Tumor Project/training/dataset")   # directory contains all the pictures sent as it is


In [4]:
files = os.listdir()
print(len(files))       # 2125 images sent via the zip

2125


## Creating and splitting Train and Test Sets

In [0]:
os.chdir("/content/drive/My Drive/Tumor Project/training")

#### Creating folder named *"train"* to store training set

In [0]:
train = 'train'

# Create target Directory if don't exist
if not os.path.exists(train):
  os.mkdir(train)
  print("Directory " , train ,  " Created ")
else:    
  print("Directory " , train ,  " already exists")

Directory  train  Created 


#### Creating folder named *"test"* to store test set

In [0]:
test = 'test'

# Create target Directory if don't exist
if not os.path.exists(test):
  os.mkdir(test)
  print("Directory " , test ,  " Created ")
else:    
  print("Directory " , test ,  " already exists")

Directory  test  Created 


#### Splitting data into *"train"* folder

In [0]:
import random
import shutil

dir.sort()  # making sure that the filenames have a fixed order before shuffling
random.seed(23)  # to maintain the result as it is no matter how many times the code is run
random.shuffle(dir)  # shuffles the ordering of filenames

for file in files[:1805]:  
  src = 'dataset/' + file
  dest = 'train/' + file
  shutil.move(src, dest) 
  
# train ~ 85% of 2125
# test ~ 15% of 2125

In [0]:
os.chdir("/content/drive/My Drive/Tumor Project/training/dataset")

In [0]:
files = os.listdir()
print(len(files)) 

In [0]:
os.chdir("/content/drive/My Drive/Tumor Project/training")

#### Transferring remaining data into *"test"* folder

In [0]:
for file in files:  
  src = 'dataset/' + file
  dest = 'test/' + file
  shutil.move(src, dest) 
  

#### Division of images is :

Train : 1805 images

Test : 320 images

## Extracting labels from filenames

In [0]:
os.chdir('/content/drive/My Drive/Tumor Project/training/test')

In [0]:
test = os.listdir()
print(len(test))

In [0]:
### Function to extract labels fom filename in the given path ###

import numpy as np

def extract_labels(path, dir):
  
  """
    Extracts labels from the filename provided at the given path
    
    Arguments:
    path -- str type, path of the folder containing the files
    dir -- list, containing the names of the files
    
    Returns:
    labels -- labels extracted from the filenames, numpy array of shape (y, 1) containing 0 or 1 (0: notumor, 1: tumor)
    
    """
  
  path = str(path)
  list = path.split('/')
  
  if (list[-1] == 'test' or list[-1] == 'train'):
    labels = []
    for i in range(len(dir)):
      if (dir[i].split('.')[0].split('-')[2] == 'True'):
        label = 1
      else: 
        label = 0
      labels.append(label)
    labels = np.array(labels)
    return labels
    
  else:
    print('Indefinite Call to the function')

In [0]:
y_test = extract_labels(os.getcwd(), test)
print(y_test.shape)

In [0]:
os.chdir('/content/drive/My Drive/Tumor Project/training')

In [0]:
train = os.listdir()
y_train = extract_labels(os.getcwd(), train)
print(y_train.shape)

In [0]:
# Saving np label arrays in disk for future use and fast loading

np.save("y_train.npy", y_train)      
np.save("y_test.npy", y_test)      

## Converting train and test images into numpy arrays

In [0]:
os.getcwd()

'/content/drive/My Drive/Tumor Project/training/train'

In [0]:
### Function to convert images in specified path into numpy arrays ###

from keras.preprocessing import image
def image_to_nparray(dir):
  """
    Converts image files from the given folder into numpy arrays
    
    Arguments:
    dir -- list, contains the filenames in given path (train or test folders)
    
    Returns:
    img_array -- numpy array of shape (m,h,w,c), contains the image values in form of np array
    
    """
  
  img_list = []
  for str in dir:
    img = image.load_img(str, target_size = (128, 128))
    x = image.img_to_array(img)
    img_list.append(x)
  img_array = np.array(img_list)
  del(img_list)
  return img_array

In [0]:
x_train = image_to_nparray(train)
print(x_train.shape)

In [0]:
os.chdir('/content/drive/My Drive/Tumor Project/training/test')

In [0]:
x_test = image_to_nparray(test)
print(x_test.shape)

(320, 128, 128, 3)


In [0]:
os.chdir('/content/drive/My Drive/Tumor Project/training')

In [0]:
# saving array to load it faster in future

np.save("x_train.npy", x_train)      
np.save("x_test.npy", x_test)      

## Loading numpy arrays from memory

In [0]:
os.chdir('/content/drive/My Drive/Tumor Project/training')

In [0]:
import numpy as np

x_train = np.load('x_train.npy')
x_test = np.load('x_test.npy')
y_train = np.load('y_train.npy')
y_test = np.load('y_test.npy')

## Setting up the Model

In [8]:
### Importing Keras in top of Tensorflow ###

from keras.models import Sequential
from keras.layers import Dense,Flatten
from keras.applications.resnet50 import ResNet50
import keras.backend as K
K.set_image_data_format('channels_last')

Using TensorFlow backend.


In [21]:
new_model = Sequential()                                                               # defining the model instance in keras
new_model.add(ResNet50(include_top = False, input_shape = (128,128,3), classes = 2))   # inbuilt resnet weights from ImageNet
new_model.add(Flatten())                                                               # flatenning the resnet output
new_model.add(Dense(1, activation = 'sigmoid'))                                        # the last sigmoid layer with one node for classification



In [0]:
new_model.compile(
    loss = 'binary_crossentropy',      # loss function
    optimizer = 'adam',                # optimizer
    metrics = ['accuracy']             # report accuracy during training
)

In [23]:
print(new_model.summary())             # printing the model's summary

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 4, 4, 2048)        23587712  
_________________________________________________________________
flatten_2 (Flatten)          (None, 32768)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 32769     
Total params: 23,620,481
Trainable params: 23,567,361
Non-trainable params: 53,120
_________________________________________________________________
None


In [12]:
os.getcwd()

'/content/drive/My Drive/Tumor Project/training'

In [0]:
### saving the model plot for future reference

from keras.utils import plot_model
plot_model(new_model, to_file='model.png', show_shapes = True)

In [0]:
# freezing the initial 7 layers

# for layers in new_model.layers[-7:]:
#    layers.trainable = True

## Image Preprocessing before Model Fit

#### Checking on array shapes before feeding

In [24]:
y_train.shape

(1805,)

In [25]:
x_train.shape

(1805, 128, 128, 3)

In [26]:
x_test.shape

(320, 128, 128, 3)

In [27]:
y_test.shape

(320,)

#### Copying data into separate classes for image preprocessing

In [0]:
os.chdir("/content/drive/My Drive/Tumor Project/training/train")

In [0]:
train = os.listdir()

##### Making sub-directories for each class in train directory 

In [0]:
tumor = 'tumor'

# Create target Directory if don't exist
if not os.path.exists(tumor):
  os.mkdir(tumor)
  print("Directory " , tumor ,  " Created ")
else:    
  print("Directory " , tumor ,  " already exists")

Directory  tumor  Created 


In [0]:
nonTumor = 'non-tumor'

# Create target Directory if don't exist
if not os.path.exists(nonTumor):
  os.mkdir(nonTumor)
  print("Directory " , nonTumor ,  " Created ")
else:    
  print("Directory " , nonTumor ,  " already exists")

Directory  non-tumor  Created 


In [0]:
# Moving images into separate class folders for train set

for f in test:
  if (f.split('.')[0].split('-')[-1] == 'True'):
    src = f
    dest = 'tumor/' + f
    shutil.move(src, dest)
  else:
    if (f.split('.')[0].split('-')[-1] == 'False'):
      src = f
      dest = 'non-tumor/' + f
      shutil.move(src, dest)
    else:
      print('Failed!')
    
print('success')

success


##### Similary making sub-directories and moving images into them for test set

In [0]:
os.chdir("/content/drive/My Drive/Tumor Project/training/test")

In [0]:
test = os.listdir()

In [0]:
tumor = 'tumor'

# Create target Directory if don't exist
if not os.path.exists(tumor):
  os.mkdir(tumor)
  print("Directory " , tumor ,  " Created ")
else:    
  print("Directory " , tumor ,  " already exists")

Directory  tumor  Created 


In [0]:
nonTumor = 'non-tumor'

# Create target Directory if don't exist
if not os.path.exists(nonTumor):
  os.mkdir(nonTumor)
  print("Directory " , nonTumor ,  " Created ")
else:    
  print("Directory " , nonTumor ,  " already exists")

Directory  non-tumor  Created 


In [0]:
# Moving images from test directory into class-wise sub-directories

for f in test:
  if (f.split('.')[0].split('-')[-1] == 'True'):
    src = f
    dest = 'tumor/' + f
    shutil.move(src, dest)
  else:
    if (f.split('.')[0].split('-')[-1] == 'False'):
      src = f
      dest = 'non-tumor/' + f
      shutil.move(src, dest)
    else:
      print('Failed!')
    
print('success')

#### Image Preprocessing

In [28]:
os.getcwd()

'/content/drive/My Drive/Tumor Project/training'

In [29]:
# Preprocessing the images and performing augmentation 
# which will help in case of imbalanced dataset

from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale = 1./255,                                   
                                   zoom_range = 0.2,
                                   horizontal_flip = True,
                                   zca_whitening=True)


test_datagen= ImageDataGenerator(rescale=1./255)


train = train_datagen.flow_from_directory('train',
                                           target_size=(128, 128),
                                           batch_size = 32,
                                           class_mode='binary')


test = test_datagen.flow_from_directory('test',
                                         target_size=(128, 128),
                                         batch_size = 32,
                                         class_mode='binary')



Found 1805 images belonging to 2 classes.
Found 320 images belonging to 2 classes.


## Training the Model

In [30]:
new_model.fit_generator(train,                  # generated after image preprocessing
                        epochs = 20,            
                        steps_per_epoch = 56,   # int(total images/batchsize)
                        verbose = 2)

Epoch 1/20




 - 360s - loss: 0.7069 - acc: 0.6741
Epoch 2/20
 - 28s - loss: 0.4746 - acc: 0.7019
Epoch 3/20
 - 24s - loss: 0.4128 - acc: 0.7779
Epoch 4/20
 - 24s - loss: 0.3990 - acc: 0.7933
Epoch 5/20
 - 24s - loss: 0.3802 - acc: 0.8114
Epoch 6/20
 - 24s - loss: 0.3603 - acc: 0.8315
Epoch 7/20
 - 24s - loss: 0.3617 - acc: 0.8410
Epoch 8/20
 - 24s - loss: 0.3661 - acc: 0.8404
Epoch 9/20
 - 24s - loss: 0.3317 - acc: 0.8382
Epoch 10/20
 - 24s - loss: 0.3183 - acc: 0.8517
Epoch 11/20
 - 24s - loss: 0.2665 - acc: 0.8711
Epoch 12/20
 - 24s - loss: 0.3000 - acc: 0.8650
Epoch 13/20
 - 24s - loss: 0.2483 - acc: 0.8829
Epoch 14/20
 - 24s - loss: 0.2808 - acc: 0.8828
Epoch 15/20
 - 24s - loss: 0.3240 - acc: 0.8736
Epoch 16/20
 - 24s - loss: 0.2635 - acc: 0.8851
Epoch 17/20
 - 24s - loss: 0.2427 - acc: 0.8962
Epoch 18/20
 - 24s - loss: 0.2335 - acc: 0.8896
Epoch 19/20
 - 24s - loss: 0.2331 - acc: 0.8910
Epoch 20/20
 - 24s - loss: 0.2046 - acc: 0.9119


<keras.callbacks.History at 0x7fb50a966320>

In [0]:
os.getcwd()

'/content/drive/My Drive/Tumor Project/training'

In [0]:
### Saving the Model on Disk ###
new_model.save('my-model.h5')
new_model.save_weights("model-weights.h5")

In [0]:
### Loading the Model from Disk ###

from keras.models import load_model
new_model = load_model('my-model.h5')

In [0]:
scores = new_model.evaluate_generator(test, steps = 56)
print("Accuracy = ", scores[1])

Accuracy =  0.8655133928571429


In [45]:
from sklearn.metrics import confusion_matrix

y_pred = new_model.predict_generator(test, steps = 10)
print(confusion_matrix(y_test, y_pred.round()))

[[ 35  80]
 [ 50 155]]


## Rough Work

In [0]:
print('Size of dataset : ' + str(len(dataset)))
print('Size of train : ' + str(len(train)))
print('Size of test : ' + str(len(test)))

if((len(train) + len(test)) != len(dataset)):
  print('Error!')

Size of dataset : 2125
Size of train : 1805
Size of test : 320


In [0]:
# count of classes in train
# tumor = 1216 | non tumor = 589
## imbalanced ## 

tumor_counter = 0
non_tumor_counter = 0

for f in train:
  if (f.split('.')[0].split('-')[-1] == 'True'):
    tumor_counter += 1
  else:
    if (f.split('.')[0].split('-')[-1] == 'False'): 
      non_tumor_counter += 1
    else:
      print('Unidentified!')
    
print('tumor counter = ' + str(tumor_counter))
print('non tumor counter = ' + str(non_tumor_counter))

tumor counter = 1216
non tumor counter = 589


In [0]:
# count of classes in test

tumor_counter = 0
non_tumor_counter = 0

for f in test:
  if (f.split('.')[0].split('-')[-1] == 'True'):
    tumor_counter += 1
  else:
    if (f.split('.')[0].split('-')[-1] == 'False'): 
      non_tumor_counter += 1
    else:
      print('Unidentified!')
    
print('tumor counter = ' + str(tumor_counter))
print('non tumor counter = ' + str(non_tumor_counter))

tumor counter = 205
non tumor counter = 115
