In [None]:
import tensorflow as tf
from tensorflow.contrib import learn
from tensorflow.contrib import layers
from tensorflow.contrib import losses
from tensorflow.examples.tutorials.mnist import input_data
from sklearn import metrics
import numpy as np

In [None]:
from random import shuffle # randomizing ordered data

In [None]:
IMG_WIDTH, IMG_HEIGHT = 100, 100 # dimensions of our images.

In [None]:
# Set logging level to info to see detailed log output
tf.logging.set_verbosity(tf.logging.INFO)

In [None]:
pepperoni_data = np.load('data/pepperoni_rgb_data.npy')
sausage_data = np.load('data/sausage_rgb_data.npy')

In [None]:
pepperoni_data

In [None]:
sausage_data

In [None]:
full_data = np.concatenate((pepperoni_data, sausage_data), axis=0)

In [None]:
full_data.shape

In [None]:
shuffle(full_data)

In [None]:
#Segregate the data into training, validation and test sets
train = full_data[0:1500]
validation = full_data[1501:1750]
test = full_data[-250:]

In [None]:
# For Gray Scale Images
X_train = np.array([i[0] for i in train], dtype='f').reshape(-1,IMG_WIDTH,IMG_HEIGHT,1)
y_train = np.array([i[1] for i in train], dtype='int32')

X_validation = np.array([i[0] for i in validation], dtype='f').reshape(-1,IMG_WIDTH,IMG_HEIGHT,1)
y_validation = np.array([i[1] for i in validation], dtype='int32')

X_test = np.array([i[0] for i in test], dtype='f').reshape(-1, IMG_WIDTH, IMG_HEIGHT, 1)
y_test = np.array([i[1] for i in test], dtype='int32')

In [None]:
# For RGB, 3 channel layout
X_train = np.array([i[0] for i in train], dtype='f').reshape(-1,IMG_WIDTH,IMG_HEIGHT,3)
y_train = np.array([i[1] for i in train], dtype='int32')

X_validation = np.array([i[0] for i in validation], dtype='f').reshape(-1,IMG_WIDTH,IMG_HEIGHT,3)
y_validation = np.array([i[1] for i in validation], dtype='int32')

X_test = np.array([i[0] for i in test], dtype='f').reshape(-1, IMG_WIDTH, IMG_HEIGHT, 3)
y_test = np.array([i[1] for i in test], dtype='int32')

In [None]:
X_train.shape

In [None]:
# creating custom estimator
def model_function_2d(features, targets, mode):
    #input layer 
    #Reshape features to 4-D tensor: [batch_size, width, height, channels]
    # MNIST images are 28x28 pixels, and have one color channel 
    #batch_size corresponds to number of images: -1 represents compute the number of images automatically
    input_layer = tf.reshape(features, [-1, IMG_WIDTH, IMG_HEIGHT, 1])
    kernel_size_1=[5, 5, 5]
    
    # Convolutional Layer #1
    # Computes 32 features using a 5x5 filter with ReLU activation.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 28, 28, 1]
    # Output Tensor Shape: [batch_size, 28, 28, 32]
    conv1 = tf.layers.conv3d(
      inputs=input_layer,
      num_of_outputs=32,
      kernel_size=kernel_size_1,
      stride=1,
      padding="SAME",
      activation_fn=tf.nn.relu)
    
    # Pooling Layer #1
    # First max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 28, 28, 32]
    # Output Tensor Shape: [batch_size, 14, 14, 32]
    pool1 = layers.max_pool2d(inputs=conv1, kernel_size=[2, 2], stride=2)
    
    # Convolutional Layer #2
    # Computes 64 features using a 5x5 filter.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 14, 14, 32]
    # Output Tensor Shape: [batch_size, 14, 14, 64]
    conv2 = layers.conv2d(
      inputs=pool1,
      num_outputs=64,
      kernel_size=[5, 5],
      stride=1,
      padding="SAME",
      activation_fn=tf.nn.relu)
    # Pooling Layer #2
    # Second max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 14, 14, 64]
    # Output Tensor Shape: [batch_size, 7, 7, 64]
    pool2 = layers.max_pool2d(inputs=conv2, kernel_size=[2, 2], stride=2)
    
    # Flatten tensor into a batch of vectors
    # Input Tensor Shape: [batch_size, 7, 7, 64]
    # Output Tensor Shape: [batch_size, 7 * 7 * 64]
    
    pool2_flat = tf.reshape(pool2, [-1, int(IMG_WIDTH/4) * int(IMG_HEIGHT/4) * 64])
    
    # Fully connected Layers with 100, 20 neurons
    # Input Tensor Shapuntitled0.e: [batch_size, 14 * 14 * 32]
    # Output Tensor Shape: [batch_size, 10]
    fclayers = layers.stack(pool2_flat, layers.fully_connected, [100,20], activation_fn = tf.nn.relu)
    outputs = layers.fully_connected(inputs=fclayers,
                                                 num_outputs=2,
                                                 activation_fn=None)
    # Calculate loss using mean squared error
    loss = losses.softmax_cross_entropy(outputs, targets)
    # Create an optimizer for minimizing the loss function
    optimizer = layers.optimize_loss(
      loss=loss,
      global_step=tf.contrib.framework.get_global_step(),
      learning_rate=0.8,
      optimizer="SGD")
    probs = tf.nn.softmax(outputs)
    
    return {'probs':probs, 'labels':tf.arg_max(probs,1)}, loss, optimizer

In [None]:
# For VIDEOS
# creating custom estimator
def model_function_3d_for_videos(features, targets, mode):
    #input layer 
    #Reshape features to 4-D tensor: [batch_size, width, height, channels]
    # MNIST images are 28x28 pixels, and have one color channel 
    #batch_size corresponds to number of images: -1 represents compute the number of images automatically
    input_layer = tf.reshape(features, [-1, IMG_WIDTH, IMG_HEIGHT, 3])
    kernel_size_1 = [5, 5, 5]
    kernel_size_2 = [5, 5, 5]
    
    # Convolutional Layer #1
    # Computes 32 features using a 5x5 filter with ReLU activation.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 28, 28, 1]
    # Output Tensor Shape: [batch_size, 28, 28, 32]
    conv1 = tf.layers.conv3d(
      inputs=input_layer,
      filters=32,
      kernel_size=kernel_size_1,
      strides=(1,1,1),
      padding="SAME",
      activation=tf.nn.relu)
    
    # Pooling Layer #1
    # First max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 28, 28, 32]
    # Output Tensor Shape: [batch_size, 14, 14, 32]
    pool1 = tf.layers.max_pooling3d(inputs=conv1, pool_size=[2, 2, 2], strides=(2,2,2))
    
    # Convolutional Layer #2
    # Computes 64 features using a 5x5 filter.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 14, 14, 32]
    # Output Tensor Shape: [batch_size, 14, 14, 64]
    conv2 = tf.layers.conv3d(
      inputs=pool1,
      num_outputs=64,
      kernel_size=kernel_size_2,
      strides=(1,1,1),
      padding="SAME",
      activation_fn=tf.nn.relu)
    # Pooling Layer #2
    # Second max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 14, 14, 64]
    # Output Tensor Shape: [batch_size, 7, 7, 64]
    pool2 = tf.layers.max_pooling3d(inputs=conv2, pool_size=[2, 2, 2], strides=(2,2,2))
    
    # Flatten tensor into a batch of vectors
    # Input Tensor Shape: [batch_size, 7, 7, 64]
    # Output Tensor Shape: [batch_size, 7 * 7 * 64]
    
    pool2_flat = tf.reshape(pool2, [-1, int(IMG_WIDTH/4) * int(IMG_HEIGHT/4) * 64 * 3])
    
    # Fully connected Layers with 100, 20 neurons
    # Input Tensor Shapuntitled0.e: [batch_size, 14 * 14 * 32]
    # Output Tensor Shape: [batch_size, 10]
    fclayers = layers.stack(pool2_flat, layers.fully_connected, [100,20], activation_fn = tf.nn.relu)
    outputs = layers.fully_connected(inputs=fclayers,
                                                 num_outputs=2,
                                                 activation_fn=None)
    # Calculate loss using mean squared error
    loss = losses.softmax_cross_entropy(outputs, targets)
    # Create an optimizer for minimizing the loss function
    optimizer = layers.optimize_loss(
      loss=loss,
      global_step=tf.contrib.framework.get_global_step(),
      learning_rate=0.8,
      optimizer="SGD")
    probs = tf.nn.softmax(outputs)
    
    return {'probs':probs, 'labels':tf.arg_max(probs,1)}, loss, optimizer

In [None]:
# For Images
# creating custom estimator
def model_function(features, targets, mode):
    #input layer 
    #Reshape features to 4-D tensor: [batch_size, width, height, channels]
    # MNIST images are 28x28 pixels, and have one color channel 
    #batch_size corresponds to number of images: -1 represents compute the number of images automatically
    input_layer = tf.reshape(features, [-1, IMG_WIDTH, IMG_HEIGHT, 1])
    kernel_size_1=[3, 3]
    
    # Convolutional Layer #1
    # Computes 32 features using a 5x5 filter with ReLU activation.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 28, 28, 1]
    # Output Tensor Shape: [batch_size, 28, 28, 32]
    conv1 = layers.conv2d(
      inputs=input_layer,
      num_outputs=32,
      kernel_size=kernel_size_1,
      stride=1,
      padding="SAME",
      activation_fn=tf.nn.relu)
    
    # Pooling Layer #1
    # First max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 28, 28, 32]
    # Output Tensor Shape: [batch_size, 14, 14, 32]
    pool1 = layers.max_pool2d(inputs=conv1, kernel_size=[2, 2], stride=2)
    
    # Convolutional Layer #2
    # Computes 64 features using a 5x5 filter.
    # Padding is added to preserve width and height.
    # Input Tensor Shape: [batch_size, 14, 14, 32]
    # Output Tensor Shape: [batch_size, 14, 14, 64]
    conv2 = layers.conv2d(
      inputs=pool1,
      num_outputs=64,
      kernel_size=[2, 2],
      stride=1,
      padding="SAME",
      activation_fn=tf.nn.relu)
    # Pooling Layer #2
    # Second max pooling layer with a 2x2 filter and stride of 2
    # Input Tensor Shape: [batch_size, 14, 14, 64]
    # Output Tensor Shape: [batch_size, 7, 7, 64]
    pool2 = layers.max_pool2d(inputs=conv2, kernel_size=[2, 2], stride=2)
    
    # Flatten tensor into a batch of vectors
    # Input Tensor Shape: [batch_size, 7, 7, 64]
    # Output Tensor Shape: [batch_size, 7 * 7 * 64]
    
    pool2_flat = tf.reshape(pool2, [-1, int(IMG_WIDTH/4) * int(IMG_HEIGHT/4) * 64 * 3 ])
    
    # Fully connected Layers with 100, 20 neurons
    # Input Tensor Shapuntitled0.e: [batch_size, 14 * 14 * 32]
    # Output Tensor Shape: [batch_size, 10]
    fclayers = layers.stack(pool2_flat, layers.fully_connected, [200,40], activation_fn = tf.nn.relu)
    outputs = layers.fully_connected(inputs=fclayers,
                                                 num_outputs=2,
                                                 activation_fn=None)
    # Calculate loss using mean squared error
    loss = losses.sigmoid_cross_entropy(outputs, targets)
    # Create an optimizer for minimizing the loss function
    optimizer = layers.optimize_loss(
      loss=loss,
      global_step=tf.contrib.framework.get_global_step(),
      learning_rate=0.1,
      optimizer="SGD")
    probs = tf.nn.softmax(outputs)
    
    return {'probs':probs, 'labels':tf.arg_max(probs,1)}, loss, optimizer

In [None]:
#create custom estimator
nn = learn.Estimator(model_fn=model_function, model_dir="/home/datascience/projects/vsoft/tf_model_new")

In [None]:
#build the model
nn.fit(x=X_train, y=y_train, steps=100, batch_size=10)

In [None]:
for var in nn.get_variable_names():
    print ("%s:%s" % (var,nn.get_variable_value(var)))

In [None]:
# Predict the outcome of test data using model
predictions = nn.predict(X_test, as_iterable=True)
y_pred = []
for i, p in enumerate(predictions):
    y_pred.append(p['labels'])
    print("Prediction %s: %s : %s" % (i + 1, p['probs'], p['labels']))


In [None]:
score = metrics.accuracy_score(np.argmax(y_test,1), y_pred)

In [None]:
score

# Prediction

## Data Preperation

In [None]:
# Import required libraries

import cv2                 # working with, mainly resizing, images
import numpy as np
import os
from random import shuffle # randomizing ordered data
from tqdm import tqdm      # a nice pretty percentage bar for tasks.


In [None]:
# define variables 

IMG_WIDTH, IMG_HEIGHT = 100, 100 # dimensions of our images.
source_dir_name = '/home/datascience/projects/vsoft/data/test'

In [None]:
# One hot encoding of the target colum
def label_img(img, word_label):
    if word_label == 'Pepperoni': return [1,0]
    elif word_label == 'Sausage': return [0,1]

In [None]:
# Create training data
def create_train_data(directory='', label='', num_of_images=1000):
    training_data = []
    if directory=='':
        directory = TRAIN_DIR
    label_dir = directory+'/'+label
    i=0
    for img in tqdm(os.listdir(label_dir)):
        img_label = label_img(img, label)
        path = os.path.join(label_dir,img)
        #img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
        img = cv2.imread(path,cv2.IMREAD_COLOR)
        #print(img.shape)
        img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
        #print(img.shape)
        training_data.append([np.array(img),np.array(img_label)])
        i=i+1
        if i>num_of_images:
            break
    #print(np.array(training_data).shape)
    return training_data

In [None]:
pepperoni_data_test = create_train_data(source_dir_name,'Pepperoni',10)

In [None]:
sausage_data_test = create_train_data(source_dir_name,'Sausage',1000)

In [None]:
test_data_unseen = np.concatenate((pepperoni_data_test, sausage_data_test), axis=0)

In [None]:
X_test_unseen = np.array([i[0] for i in test_data_unseen], dtype='f').reshape(-1, IMG_WIDTH, IMG_HEIGHT, 3)
y_test_unseen = np.array([i[1] for i in test_data_unseen], dtype='int32')

In [None]:
# Predict the outcome of test data using model
predictions_unseen = nn.predict(X_test_unseen, as_iterable=True)
y_pred_unseen = []
for i, p in enumerate(predictions_unseen):
    y_pred_unseen.append(p['labels'])
    print("Prediction %s: %s : %s" % (i + 1, p['probs'], p['labels']))


In [None]:
# Predict accuracy score
score = metrics.accuracy_score(np.argmax(y_test_unseen,1), y_pred_unseen)