## Using Dato's Graphlab-Create to classify lunar images

#### Load images from test and train directories, re-size just in case there are odd-sized images.

In [4]:
import graphlab as gl
WORKING_DIR = '/home/wilber/work/Galvanize/gcp-data/iForest/loonie'
train_sf = gl.image_analysis.load_images(WORKING_DIR + '/train', \
                                         random_order=True)
train_sf['image'] = gl.image_analysis.resize(train_sf['image'], 256, 256)
test_sf = gl.image_analysis.load_images(WORKING_DIR + '/test', \
                                        random_order=True)
test_sf['image'] = gl.image_analysis.resize(test_sf['image'], 256, 256)

#### Generate labels

In [5]:
train_sf['label'] = train_sf['path'].apply(lambda x: 1 if (x.split('/')[-1].startswith('h')
                                           or x.split('/')[-1].startswith('nth')
                                           or x.split('/')[-1].startswith('wsh')) else 0)
test_sf['label'] = test_sf['path'].apply(lambda x: 1 if (x.split('/')[-1].startswith('h')
                                         or x.split('/')[-1].startswith('nth')
                                         or x.split('/')[-1].startswith('wsh')) else 0)
print train_sf['label'].head(40)
print test_sf['label'].head(40)
train_sf.save(WORKING_DIR + '/sframe/train_shuffle')
test_sf.save(WORKING_DIR + '/sframe/test_shuffle')

[1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1]
[1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1]


In [7]:
gl.canvas.set_target('ipynb')
train = gl.SFrame(WORKING_DIR + '/sframe/train_shuffle')
train.head()

path,image,label
/home/wilber/work/Galvani ze/gcp-data/iForest/l ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/iForest/l ...,Height: 256 Width: 256,0
/home/wilber/work/Galvani ze/gcp-data/iForest/l ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/iForest/l ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/iForest/l ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/iForest/l ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/iForest/l ...,Height: 256 Width: 256,0
/home/wilber/work/Galvani ze/gcp-data/iForest/l ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/iForest/l ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/iForest/l ...,Height: 256 Width: 256,1


#### Subtract mean image from training data:

In [9]:
mean_image = train['image'].mean()
gl.SArray([mean_image]).save(WORKING_DIR + '/sframe/mean_image')
mean_image = gl.SArray(WORKING_DIR + '/sframe/mean_image')[0]
mean_image

Height: 256px
Width: 256px
Channels: 3

#### Obtain a NerualNet object from the builtin networks in the deeplearing toolkit.
#### The "imagenet" NeuralNet is derived from Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton, 2012.

In [10]:
net = gl.deeplearning.get_builtin_neuralnet('imagenet')
net


### network layers ###
layer[0]: ConvolutionLayer
  init_random = gaussian
  padding = 0
  stride = 4
  num_channels = 96
  num_groups = 1
  kernel_size = 11
layer[1]: RectifiedLinearLayer
layer[2]: MaxPoolingLayer
  padding = 0
  stride = 2
  kernel_size = 3
layer[3]: LocalResponseNormalizationLayer
  alpha = 0.001
  beta = 0.75
  knorm = 1
  local_size = 5
layer[4]: ConvolutionLayer
  init_random = gaussian
  padding = 2
  stride = 1
  num_channels = 256
  num_groups = 2
  kernel_size = 5
layer[5]: RectifiedLinearLayer
layer[6]: MaxPoolingLayer
  padding = 0
  stride = 2
  kernel_size = 3
layer[7]: LocalResponseNormalizationLayer
  alpha = 0.001
  beta = 0.75
  knorm = 1
  local_size = 5
layer[8]: ConvolutionLayer
  init_random = gaussian
  padding = 1
  stride = 1
  num_channels = 384
  num_groups = 1
  kernel_size = 3
layer[9]: RectifiedLinearLayer
layer[10]: ConvolutionLayer
  init_random = gaussian
  padding = 1
  stride = 1
  num_channels = 384
  num_groups = 2
  kernel_size = 

In [11]:
net.layers[-2].num_hidden_units = 2.
net.verify()

True

#### Train!

In [12]:
import time
start = time.time()
m = gl.neuralnet_classifier.create(train[['image', 'label']],                      
                                   target='label',
#                                  input_shape=[100, 100, 3],
#                                  output_shape=2,
                                   network=net,                                    
                                   mean_image=mean_image,                          
                                   metric=['accuracy','precision'],                 
                                   max_iterations=35,                              
                                   model_checkpoint_path=WORKING_DIR
                                     + '/result/model_checkpoint',
                                   model_checkpoint_interval=5,                    
                                   batch_size=50)
totsecs = time.time() - start
hours = int(totsecs/3600)
mins = int((totsecs - 3600.*hours)/60)
secs = totsecs - 3600.*hours - 60.*mins
print "Elapsed time = {0} hours, {1} minutes, {2} seconds".format(hours, mins, secs)

Using network:

### network layers ###
layer[0]: ConvolutionLayer
  init_random = gaussian
  padding = 0
  stride = 4
  num_channels = 96
  num_groups = 1
  kernel_size = 11
layer[1]: RectifiedLinearLayer
layer[2]: MaxPoolingLayer
  padding = 0
  stride = 2
  kernel_size = 3
layer[3]: LocalResponseNormalizationLayer
  alpha = 0.001
  beta = 0.75
  knorm = 1
  local_size = 5
layer[4]: ConvolutionLayer
  init_random = gaussian
  padding = 2
  stride = 1
  num_channels = 256
  num_groups = 2
  kernel_size = 5
layer[5]: RectifiedLinearLayer
layer[6]: MaxPoolingLayer
  padding = 0
  stride = 2
  kernel_size = 3
layer[7]: LocalResponseNormalizationLayer
  alpha = 0.001
  beta = 0.75
  knorm = 1
  local_size = 5
layer[8]: ConvolutionLayer
  init_random = gaussian
  padding = 1
  stride = 1
  num_channels = 384
  num_groups = 1
  kernel_size = 3
layer[9]: RectifiedLinearLayer
layer[10]: ConvolutionLayer
  init_random = gaussian
  padding = 1
  stride = 1
  num_channels = 384
  num_groups = 2
 

In [8]:
test = gl.SFrame(WORKING_DIR + '/sframe/test_shuffle')
test.head()

path,image,label
/home/wilber/work/Galvani ze/gcp-data/lunarPhot ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/lunarPhot ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/lunarPhot ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/lunarPhot ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/lunarPhot ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/lunarPhot ...,Height: 256 Width: 256,0
/home/wilber/work/Galvani ze/gcp-data/lunarPhot ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/lunarPhot ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/lunarPhot ...,Height: 256 Width: 256,1
/home/wilber/work/Galvani ze/gcp-data/lunarPhot ...,Height: 256 Width: 256,1


In [9]:
predictions = m.classify(test)
print predictions

+--------+-------+----------------+
| row_id | class |     score      |
+--------+-------+----------------+
|   0    |   1   |      1.0       |
|   1    |   1   |      1.0       |
|   2    |   1   |      1.0       |
|   3    |   1   |      1.0       |
|   4    |   1   |      1.0       |
|   5    |   0   | 0.999995470047 |
|   6    |   1   |      1.0       |
|   7    |   1   |      1.0       |
|   8    |   1   |      1.0       |
|   9    |   1   |      1.0       |
+--------+-------+----------------+
[4611 rows x 3 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.


In [10]:
pred_top2 = m.predict_topk(test, k=2)
print pred_top2

+--------+-------+-------------------+
| row_id | class |       score       |
+--------+-------+-------------------+
|   0    |   1   |        1.0        |
|   0    |   0   | 2.02496830068e-12 |
|   1    |   1   |        1.0        |
|   1    |   0   | 1.23690831882e-11 |
|   2    |   1   |        1.0        |
|   2    |   0   | 5.29956200451e-12 |
|   3    |   1   |        1.0        |
|   3    |   0   |  4.418005458e-12  |
|   4    |   1   |        1.0        |
|   4    |   0   | 1.39312227119e-12 |
+--------+-------+-------------------+
[9222 rows x 3 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.


In [12]:
result = m.evaluate(test)
print "Accuracy         : %s" % result['accuracy']
print "Confusion Matrix : \n%s" % result['confusion_matrix']

Accuracy         : 0.997614383698
Confusion Matrix : 
+--------------+-----------------+-------+
| target_label | predicted_label | count |
+--------------+-----------------+-------+
|      1       |        1        |  3461 |
|      0       |        1        |   11  |
|      0       |        0        |  1139 |
+--------------+-----------------+-------+
[3 rows x 3 columns]



### Pickle the model

In [13]:
from graphlab.util import gl_pickle

# Setup the GLC pickler
pickler = gl_pickle.GLPickler(filename='IdiotModel1,pkl')
pickler.dump(m)

# The pickler has to be closed to make sure the files get closed.
pickler.close()

with open('IdiotModel1.pkl', 'r') as IM1file:
    model = pickle.load(IM1file)

ImportError: cannot import name gl_pickle

### Can we extract features?