In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D
from keras.applications import VGG16

Using TensorFlow backend.


In [2]:
pwd

'/Users/Sivankit/Documents/work/stevens/applied_machine_learning/proj/code'

In [3]:
train_dir = 'xrays/train/'
val_dir = 'xrays/val/'
test_dir = 'xrays/test/'

img_width, img_height = 224, 224  # Default input size for VGG16

In [4]:
# Instantiate convolutional base

conv_base = VGG16(weights='imagenet', 
                  include_top=False,
                  input_shape=(img_width, img_height, 3))

# Show architecture
conv_base.summary()

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128

In [5]:
import os, shutil
from keras.preprocessing.image import ImageDataGenerator


In [6]:
#Defining function for feature extraction and label creation 

datagen = ImageDataGenerator(rescale=1./255)
batch_size = 12

def extract_features(directory, sample_count):
    features = np.zeros(shape=(sample_count, 7, 7, 512))  # Must be equal to the output of the convolutional base
    labels = np.zeros(shape=(sample_count,3))
    # Preprocess data
    generator = datagen.flow_from_directory(directory,
                                            target_size=(img_width,img_height),
                                            batch_size = batch_size,
                                            class_mode='categorical')
    
    # Pass data through convolutional base
    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = conv_base.predict(inputs_batch)
        features[i * batch_size: (i + 1) * batch_size] = features_batch
        labels[i * batch_size: (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            break
    return features, labels
    



In [7]:
#Calling the feature extraction function on the train, test and validation datasets
#For running the training model, train_features would be X_train and train_labels would be y_train

train_features, train_labels = extract_features(train_dir, 760)  # Total no.of files in train folder
val_features, val_labels = extract_features(val_dir, 95)
test_features, test_labels = extract_features(test_dir, 95)

Found 760 images belonging to 3 classes.


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/Users/Sivankit/anaconda3/envs/tf2/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-8823f6cc752e>", line 4, in <module>
    train_features, train_labels = extract_features(train_dir, 760)  # Total no.of files in train folder
  File "<ipython-input-6-fba8bc6de7bb>", line 18, in extract_features
    features_batch = conv_base.predict(inputs_batch)
  File "/Users/Sivankit/anaconda3/envs/tf2/lib/python3.7/site-packages/keras/engine/training.py", line 1462, in predict
    callbacks=callbacks)
  File "/Users/Sivankit/anaconda3/envs/tf2/lib/python3.7/site-packages/keras/engine/training_arrays.py", line 324, in predict_loop
    batch_outs = f(ins_batch)
  File "/Users/Sivankit/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/backend.py", line 3727, in __call__
    outputs = self._graph_fn(*converted_inputs)
  F

KeyboardInterrupt: 

In [11]:
print(train_labels[0],'\n',train_labels[281],'\n',train_labels[484])
print('\n',train_features[0])

[0. 1. 0.] 
 [0. 0. 1.] 
 [0. 1. 0.]

 [[[1.17777109 0.         0.         ... 0.         0.6446377  0.        ]
  [1.26192677 0.         0.12195151 ... 0.         0.41675401 0.        ]
  [0.63260186 0.         0.14891648 ... 0.         0.38908049 0.        ]
  ...
  [0.74722111 0.         0.         ... 0.         0.47313413 0.        ]
  [0.40443221 0.         0.         ... 0.         0.03844876 0.        ]
  [0.17732182 0.         0.         ... 0.         0.34867224 0.        ]]

 [[1.40667415 0.         0.14863993 ... 0.         0.6401009  0.        ]
  [1.25331163 0.         0.25017789 ... 0.         0.63116902 0.        ]
  [0.49793351 0.         0.45441389 ... 0.         0.65091127 0.        ]
  ...
  [1.02703035 0.         0.4532491  ... 0.19995858 0.77533519 0.        ]
  [0.73174357 0.         0.         ... 0.         0.47947657 0.        ]
  [0.19872554 0.         0.         ... 0.         0.43520838 0.        ]]

 [[0.43964788 0.         0.         ... 0.         0.7106

In [9]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

import matplotlib.pyplot as plt

In [10]:
#Running the DT model here returns an error as the train_features array is multi dimensional whereas DT expects a 2D array
dt = tree.DecisionTreeClassifier(random_state=42)
dt.fit(train_features, train_labels)

ValueError: Found array with dim 4. Estimator expected <= 2.