In [39]:
import os
import json
from PIL import Image
from skimage.measure import block_reduce
from scipy.misc import imread
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

## Alexnet

#### Helper Functions:

In [53]:
def conv(x, name, filter_height, filter_width, num_filters, strides=[1, 4, 4, 1], padding="SAME", groups=1):
    channels = int(x.shape[-1])
    
    # create lambda function for convolution
    convolve = lambda a, b: tf.nn.conv2d(a, b, strides=strides, padding=padding)
    
    with tf.variable_scope(name) as scope:
        w = tf.get_variable("w", shape=[filter_height, filter_width, int(channels/groups), num_filters])
        b = tf.get_variable("b", shape=[num_filters])
        
        conv = convolve(x, w)    
        output = tf.reshape(tf.nn.bias_add(conv, b), conv.shape.as_list())
        relu = tf.nn.relu(output, name=scope.name)
        
        return relu  

In [54]:
def fully_connected(x, dim_in, dim_out, name):
    with tf.variable_scope(name) as scope:
        w = tf.get_variable("w", shape=[dim_in, dim_out], trainable=True)
        b = tf.get_variable("b", [dim_out], trainable=True)
        
        output = tf.matmul(x, w) + b
        relu = tf.nn.relu(output) 
        
        return relu

In [55]:
def max_pool(x, filter_height, filter_width, stride_y, stride_x, name, padding='SAME'):
  return tf.nn.max_pool(x, ksize=[1, filter_height, filter_width, 1], 
                        strides = [1, stride_y, stride_x, 1], padding=padding, name=name)

In [56]:
def lrn(x, name, radius=5, alpha=0.0001, beta=0.75, bias=2.0):
  return tf.nn.local_response_normalization(x, depth_radius=radius, alpha=alpha, beta=beta, bias=bias, name=name)

In [57]:
def dropout(x, keep_prob):
  return tf.nn.dropout(x, keep_prob)

#### Graph:

In [61]:
# parameters
n_dim = (224, 224, 3)
n_classes = 1000

In [67]:
X = tf.placeholder(tf.float32,shape=n_dim)  
Y = tf.placeholder(tf.float32,[None,n_classes])

## REST OF GRAPH HERE #
#
#
#

## Preprocessing CAPTCHA images

As with the ImageNet data, we need virtually no preprocessing, save for downsampling the images (currently `100x100`) to fit the dimensions of our input layer (`64x64`).

In [19]:
%matplotlib inline

# iterate through recaptcha dataset
for folder in os.listdir("recapt_offline"):
    if folder != ".DS_Store":
        for file in os.listdir("recapt_offline/"+folder):
            if file == "info.txt":
                json_string = open("recapt_offline/"+folder+"/"+file, "r")
                json_dict = json.load(json_string)  # convert json to dict
                
                correct = json_dict["correct_answer"]  # array of numbers containing indices (1-9) of correct images
                target = json_dict["desc"]["keyword"]  # string representing target image (e.g. "cake")
                
            if "cand" in file:
                img = Image.open("recapt_offline/"+folder+"/"+file, "r")
                downsampled = block_reduce(np.array(img), block_size=(2, 2, 1), func=np.mean)
                # print(downsampled)
                new = Image.fromarray(downsampled, 'RGB')
                # plt.figure()
                # plt.imshow(downsampled, interpolation="None")
                # plt.show()