# Load data

In [1]:
from six.moves import cPickle as pickle
pickle_file = 'frogs_all_20x20.pickle'
with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    dataset = save['training_data']
    species_labels = save['training_labels']
    recording_ids = save['recording_ids']
    del save  # hint to help gc free up memory
    print('Dataset', dataset.shape, species_labels.shape, recording_ids.shape)


('Dataset', (7784, 20, 20, 1), (7784,), (7784,))


In [2]:
import numpy as np
import common 
from sklearn.cross_validation import train_test_split
def index(mylist, myvalue):
    start = mylist.tolist().index(myvalue)
    end = len(mylist) - mylist[::-1].tolist().index(myvalue)
    return start, end



def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))/ predictions.shape[0])

num_steps = 200
batch_size = 16
image_size = 20

patch_size = 3
depth = 16
num_hidden = 128
conv_stride = 2



# Labels for the problem
# In this case the 'hylaedactylus' class is chosen to be in target class
# It's also possible to add multiple classes to the target class. For example target = [0, 2, 6] includes 
# AdenomeraAndre, hylaedactylus, LeptodactylusFuscus species.

# ['AdenomeraAndre', 'Ameeregatrivittata', 'hylaedactylus', 'HylaMinuta', 'HypsiboasCinerascens', 
#           'HypsiboasCordobae', 'LeptodactylusFuscus', 'OsteocephalusOophagus', 'Rhinellagranulosa', 'ScinaxRuber']
target = [0, 2, 6]
target = [2]


others = set(species_labels.tolist()) - set(target)
labels = []
for i, label in enumerate(species_labels):
    if label in target:
        labels.append([1, 0])
    else:
        labels.append([0, 1])
labels = np.array(labels)

final_predictions = np.empty((0,2))
final_labels = np.empty((0,2))

# LOOCV
# With each cycle one recording is chosen as test set and all others are used as training set
for i in range(int(np.max(recording_ids)) + 1):
    start_index, end_index = index(recording_ids, i)
    test_set = dataset[start_index:end_index]
    test_labels = labels[start_index:end_index]
    
    train_set = np.vstack((dataset[0:start_index], dataset[end_index:dataset.shape[0]]))
    train_labels = np.concatenate((labels[0:start_index], labels[end_index:labels.shape[0]]))
    train_set, _, train_labels, _ = train_test_split(train_set, train_labels, test_size=0, random_state=0)
    # TODO: randomize training set and labels
    
    test_predictions = common.train_cnn_model(num_steps, batch_size, image_size, train_set, train_labels, 
                                             test_set, verbose=0, seed=719)
    final_predictions = np.vstack((final_predictions, test_predictions))
    final_labels = np.vstack((final_labels, test_labels))
    
    print "Recording " + str(i) + " accuracy: " + str(accuracy(test_predictions, test_labels))    

Recording 0 accuracy: 100.0
Recording 1 accuracy: 100.0
Recording 2 accuracy: 100.0
Recording 3 accuracy: 100.0
Recording 4 accuracy: 100.0
Recording 5 accuracy: 100.0
Recording 6 accuracy: 100.0
Recording 7 accuracy: 100.0
Recording 8 accuracy: 100.0
Recording 9 accuracy: 100.0
Recording 10 accuracy: 100.0
Recording 11 accuracy: 100.0
Recording 12 accuracy: 100.0
Recording 13 accuracy: 97.9899497487
Recording 14 accuracy: 99.348534202
Recording 15 accuracy: 99.3119266055
Recording 16 accuracy: 100.0
Recording 17 accuracy: 88.6075949367
Recording 18 accuracy: 99.1803278689
Recording 19 accuracy: 100.0
Recording 20 accuracy: 99.6610169492
Recording 21 accuracy: 99.5867768595
Recording 22 accuracy: 100.0
Recording 23 accuracy: 100.0
Recording 24 accuracy: 92.8571428571
Recording 25 accuracy: 100.0
Recording 26 accuracy: 100.0
Recording 27 accuracy: 100.0
Recording 28 accuracy: 69.4444444444
Recording 29 accuracy: 100.0
Recording 30 accuracy: 100.0
Recording 31 accuracy: 45.4545454545
Rec

In [3]:
print 'Accuracy: ' + str(accuracy(final_predictions, final_labels))
#print final_predictions
#print final_predictions

from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
print 'AUC score: ' + str(roc_auc_score(final_labels, final_predictions))
print 'F1 Score: ' + str(f1_score(np.argmax(final_labels, 1), np.argmax(final_predictions,1)))

Accuracy: 99.3961973279
AUC score: 0.999520655683
F1 Score: 0.99499733901
