# State Farm Detection using Convolutions

In [1]:
%cd
%cd nbs
%matplotlib inline
from theano.sandbox import cuda
cuda.use('gpu0')

from __future__ import division,print_function

import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=180)
from shutil import copyfile, copy, rmtree, move

import utils; reload(utils)
from utils import *
from IPython.display import FileLink

/home/ubuntu
/home/ubuntu/nbs


Using Theano backend.


# Setup

Make the set of output folders that we will train our classifier on.

In [1]:
%cd
%cd nbs/data/statefarm
%mkdir valid
%cd valid
%mkdir c0 c1 c2 c3 c4 c5 c6 c7 c8 c9

/home/ubuntu
/home/ubuntu/nbs/data/statefarm
/home/ubuntu/nbs/data/statefarm/valid


In [3]:
%cd
%cd nbs/data/statefarm
%ls train/c1 | wc -l

/home/ubuntu
/home/ubuntu/nbs/data/statefarm
2267


In [8]:
for i in range(0,10):
    g = glob('train/c'+ str(i) + '/*')
    shuf = np.random.permutation(g)
    for j in range(0,200): copy(shuf[j], 'valid/c' + str(i) + '/')

In [4]:
%ls valid/c9 | wc -l

200


# Convolutional Neural Network

Now that we've setup the folders and directories for our images, we can start setting up our training, validation, and testing sets to feed into our neural networks.

In [3]:
%cd
%cd nbs/data/statefarm
batch_size = 64
batches = get_batches('train', batch_size = batch_size)
val_batches = get_batches('valid', batch_size = batch_size * 2)
test_batches = get_batches('test', batch_size = batch_size)

/home/ubuntu
/home/ubuntu/nbs/data/statefarm
Found 22424 images belonging to 10 classes.
Found 2000 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.


In [2]:
%cd
%cd nbs/data
(val_classes, trn_classes, val_labels, trn_labels, val_filenames, filenames, test_filenames) = get_classes('statefarm/')

/home/ubuntu
/home/ubuntu/nbs/data
Found 22424 images belonging to 10 classes.
Found 2000 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.


In [18]:
dir_src = "test/"
dir_dst = "test/unknown"

for filename in os.listdir(dir_src):
    if filename.endswith('.jpg'):
        move(dir_src + filename, dir_dst)

In [15]:
%cd statefarm
%ls testtest | wc -l

/home/ubuntu/nbs/data/statefarm
79726


In [8]:
def conv3(batches):
    model = Sequential([
        BatchNormalization(axis = 1, input_shape = (3, 224, 224)),
        Convolution2D(32, 3, 3, activation = 'relu'),
        BatchNormalization(axis = 1),
        MaxPooling2D(),
        Convolution2D(64, 3, 3, activation = 'relu'),
        BatchNormalization(axis = 1),
        MaxPooling2D(),
        Convolution2D(128, 3, 3, activation= 'relu'),
        BatchNormalization(axis = 1),
        MaxPooling2D(),
        Flatten(),
        Dense(100, activation = 'relu'),
        BatchNormalization(),
        Dense(100, activation = 'relu'),
        BatchNormalization(),
        Dropout(.5),
        Dense(10, activation = 'softmax')
    ])
    
    model.compile(Adam(lr = .001), loss = 'categorical_crossentropy', metrics = ['accuracy'])
    model.fit_generator(batches, batches.nb_sample, nb_epoch = 1, validation_data = val_batches, 
                       nb_val_samples = val_batches.nb_sample)
    return model

Data Augmentation

In [17]:
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, 
                shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
batches = get_batches('train', gen_t, batch_size = batch_size)

Found 22424 images belonging to 10 classes.


In [5]:
model = conv3(batches)

Epoch 1/1

KeyboardInterrupt: 

In [20]:
model.optimizer.lr = .0001
model.fit_generator(batches, batches.nb_sample, nb_epoch = 1, validation_data = val_batches, 
                    nb_val_samples = val_batches.nb_sample)

Epoch 1/1


<keras.callbacks.History at 0x7f664ad9ef50>

In [21]:
model.optimizer.lr = .00001
model.fit_generator(batches, batches.nb_sample, nb_epoch = 2, validation_data = val_batches, 
                    nb_val_samples = val_batches.nb_sample)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f664ad9ef10>

In [22]:
model.save_weights('models/conv3.0')

In [15]:
model.load_weights('models/conv3.0')

In [60]:
test_feat = model.predict_generator(test_batches, test_batches.nb_sample)

In [71]:
val_feat = model.predict_generator(val_batches, val_batches.nb_sample)

In [74]:
train_feat = model.predict_generator(batches, batches.nb_sample)

In [69]:
save_array('results/test_feat.dat', test_feat)

In [72]:
save_array('results/val_feat.dat', val_feat)

In [75]:
save_array('results/train_feat.dat', train_feat)

In [2]:
%cd ~/nbs/data/statefarm

/home/ubuntu/nbs/data/statefarm


In [16]:
test_feat = load_array('results/test_feat.dat')

In [17]:
val_feat = load_array('results/val_feat.dat')

In [18]:
train_feat = load_array('results/train_feat.dat')

In [8]:
test_feat.shape

(79726, 10)

In [9]:
val_feat.shape

(2000, 10)

In [10]:
train_feat.shape

(22424, 10)

# Vgg16 Pretrained Model

Using a pretrained vgg model for its convolutional layers, and then setting up our own dense network to train more specifically on the statefarm driver images.

In [24]:
vgg = Vgg16()
model = vgg.model
last_conv_idx = [i for i,l in enumerate(model.layers) if type(l) is Convolution2D][-1]
conv_layers = model.layers[:last_conv_idx + 1]

In [25]:
conv_model = Sequential(conv_layers)

In [13]:
batches = get_batches('train', batch_size = batch_size, shuffle = False)

Found 22424 images belonging to 10 classes.


In [11]:
%cd
%cd nbs/data/
(val_classes, trn_classes, val_labels, trn_labels, 
    val_filenames, filenames, test_filenames) = get_classes('statefarm/')

/home/ubuntu
/home/ubuntu/nbs/data
Found 22424 images belonging to 10 classes.
Found 2000 images belonging to 10 classes.
Found 79726 images belonging to 1 classes.


In [20]:
conv_feat = conv_model.predict_generator(batches, batches.nb_sample)
conv_val_feat = conv_model.predict_generator(val_batches, val_batches.nb_sample)
conv_test_feat = conv_model.predict_generator(test_batches, test_batches.nb_sample)

In [22]:
save_array('results/conv_val_feat.dat', conv_val_feat)
save_array('results/conv_test_feat.dat', conv_test_feat)
save_array('results/conv_feat.dat', conv_feat)

In [21]:
%cd ~/nbs/data/statefarm
conv_val_feat = load_array('results/conv_val_feat.dat')
#conv_test_feat = load_array('results/conv_test_feat.dat')
conv_feat = load_array('results/conv_feat.dat')

/home/ubuntu/nbs/data/statefarm


In [39]:
conv_val_feat.shape
#conv_test_feat.shape
conv_feat.shape

(22424, 512, 14, 14)

In [26]:
def vggmodel(p):
    model = Sequential([
            MaxPooling2D(input_shape = conv_layers[-1].output_shape[1:]),
            Flatten(),
            Dropout(p/2),
            Dense(128, activation = 'relu'),
            BatchNormalization(),
            Dropout(p/2),
            Dense(128, activation = 'relu'),
            BatchNormalization(),
            Dropout(p),
            Dense(10, activation = 'softmax')
            ])
    
    return model

In [27]:
p = 0.8

In [28]:
vggmodel = vggmodel(p)

In [15]:
import gc
gc.collect()

0

Checks the amount of memory our data is using.

In [10]:
import sys

# These are the usual ipython objects, including this one you are creating
ipython_vars = ['In', 'Out', 'exit', 'quit', 'get_ipython', 'ipython_vars']

# Get a sorted list of the objects and their sizes
sorted([(x, sys.getsizeof(globals().get(x))) for x in dir() if not x.startswith('_') and x not in sys.modules and x not in ipython_vars], key=lambda x: x[1], reverse=True)

[('conv_data_feat', 45005865104),
 ('conv_feat', 9001173136),
 ('Activation', 904),
 ('Adam', 904),
 ('AtrousConv1D', 904),
 ('AtrousConv2D', 904),
 ('AtrousConvolution1D', 904),
 ('AtrousConvolution2D', 904),
 ('AveragePooling1D', 904),
 ('AveragePooling2D', 904),
 ('AveragePooling3D', 904),
 ('BatchNormalization', 904),
 ('Bidirectional', 904),
 ('Conv1D', 904),
 ('Conv2D', 904),
 ('Conv3D', 904),
 ('Convolution1D', 904),
 ('Convolution2D', 904),
 ('Convolution3D', 904),
 ('Cropping1D', 904),
 ('Cropping2D', 904),
 ('Cropping3D', 904),
 ('Deconv2D', 904),
 ('Deconvolution2D', 904),
 ('Dense', 904),
 ('Dropout', 904),
 ('Embedding', 904),
 ('FileLink', 904),
 ('Flatten', 904),
 ('GRU', 904),
 ('GlobalAveragePooling2D', 904),
 ('InputSpec', 904),
 ('LSTM', 904),
 ('Lambda', 904),
 ('Layer', 904),
 ('MaxPooling1D', 904),
 ('MaxPooling2D', 904),
 ('MaxPooling3D', 904),
 ('MixIterator', 904),
 ('Model', 904),
 ('OneHotEncoder', 904),
 ('OrderedDict', 904),
 ('RMSprop', 904),
 ('Reshape', 

In [39]:
vggmodel.compile(Adam(lr = .00001), loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [40]:
vggmodel.fit(conv_feat, trn_labels, batch_size = 64, nb_epoch = 3, validation_data = (conv_val_feat, val_labels))

Train on 22424 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fb20e4a38d0>

In [15]:
vggmodel.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
maxpooling2d_6 (MaxPooling2D)    (None, 512, 7, 7)     0           maxpooling2d_input_1[0][0]       
____________________________________________________________________________________________________
flatten_2 (Flatten)              (None, 25088)         0           maxpooling2d_6[0][0]             
____________________________________________________________________________________________________
dropout_3 (Dropout)              (None, 25088)         0           flatten_2[0][0]                  
____________________________________________________________________________________________________
dense_4 (Dense)                  (None, 128)           3211392     dropout_3[0][0]                  
___________________________________________________________________________________________

In [48]:
vggmodel.optimizer.lr = .1
vggmodel.fit(conv_feat, trn_labels, batch_size = 64, nb_epoch = 5)#, validation_data = (conv_val_feat, val_labels))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fb47f208610>

In [50]:
%cd ~/nbs/data/statefarm/
vggmodel.save_weights('models/vggmodel1')

/home/ubuntu/nbs/data/statefarm


In [37]:
vggmodel.load_weights('models/vggmodel1')

Data Augmentation with our Vgg model. Now we have to recreate extra sets of training data, and also add an equivalent amount of training labels for their respective data.

In [14]:
%cd ~/nbs/data/statefarm/

/home/ubuntu/nbs/data/statefarm


In [54]:
vgg_data =  image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, 
                shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
batches_data = get_batches('train/', vgg_data, batch_size= 64, shuffle=False)

Found 22424 images belonging to 10 classes.


In [56]:
conv_data_feat = conv_model.predict_generator(batches_data, batches_data.nb_sample * 5)

In [57]:
save_array('results/conv_data_feat.dat', conv_data_feat)

In [5]:
conv_data_feat = load_array('results/conv_data_feat.dat')

In [9]:
conv_data_feat = np.concatenate([conv_data_feat, conv_feat])

MemoryError: 

At this point, I was going to go through a vgg model while using data augmentation on some of our validation data. However our system ran out of memory with all of the necessary components we need to have. So unfortunately I could not run the rest of the models due to lack of memory space.

In [None]:
trn_data_labels = np.concatenate([trn_labels] * 6)

Let's make a separate model that depicts this new data augmentation batch.

In [None]:
def vggdatamodel(p):
    model = Sequential([
            MaxPooling2D(input_shape = conv_layers[-1].output_shape[1:]),
            Flatten(),
            Dropout(p/4),
            Dense(128, activation = 'relu'),
            BatchNormalization(),
            Dropout(p/2),
            Dense(64, activation = 'relu'),
            BatchNormalization(),
            Dropout(p/4),
            Dense(10, activation = 'softmax')
            ])
    
    return model

In [None]:
vggdatamodel = vggdatamodel(0.8)

In [None]:
vggdatamodel.compile(Adam(0.01), loss = 'categorical_crossentropy', metrics = ['accuracy'])
vggdatamodel.fit(conv_data_feat, trn_data_labels, batch_size = batch_size, nb_epoch = 1, validation_data = (conv_val_feat, val_labels))

In [None]:
vggdatamodel.optimizer.lr = 0.001
vggdatamodel.fit(conv_data_feat, trn_data_labels, batch_size = batch_size, nb_epoch = 1, validation_data = (conv_val_feat, val_labels))

In [None]:
vggdatamodel.save_weights('models/vggdatamodel')

# Pseudo Labeling some more training data to get better results.

This part also suffers from lack of memory, in which I could not successfully transfer all of the data for the training sets.

In [31]:
val_pseudo = vggmodel.predict(conv_val_feat, batch_size = 64)

In [32]:
save_array('results/val_pseudo.dat', val_pseudo)

In [33]:
comb_pseudo = np.concatenate([trn_labels, val_pseudo])

In [34]:
comb_feat = np.concatenate([conv_feat, conv_val_feat])

In [38]:
vggmodel.compile(Adam(lr = 0.000001), loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [None]:
vggmodel.fit(comb_feat, comb_pseudo, nb_epoch = 3, batch_size = 64)#, validation_data = (conv_val_feat, val_labels))

Epoch 1/3

In [None]:
vggmodel.optimizer.lr = 0.0001
vggmodel.fit(comb_feat, comb_pseudo, nb_epoch = 1, batch_size = 64)#, validation_data = (conv_val_feat, val_labels))

In [None]:
vggmodel.save_weights('results/modelpseudo')

# Kaggle Submission Version 1

In [150]:
import bcolz
def save_array(fname, arr): c = bcolz.carray(arr, rootdir = fname, mode = 'w'); c.flush()
def load_array(fname): return bcolz.open(fname)[:]

In [151]:
%cd
%cd nbs/data/statefarm/
test_pred_other = load_array('results/test_feat.dat')

/home/ubuntu
/home/ubuntu/nbs/data/statefarm


In [152]:
subm_other = np.clip(test_pred_other, .07/9, .93)

In [153]:
subm_name = 'results/subm.gz'

In [154]:
classes = sorted(batches.class_indices, key = batches.class_indices.get)

In [155]:
submission = pd.DataFrame(subm_other, columns = classes)
submission.insert(0, 'img', img_filenames)
submission.head()

Unnamed: 0,img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,img_81601.jpg,0.007778,0.93,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778
1,img_14887.jpg,0.299819,0.007778,0.007778,0.085893,0.063129,0.007778,0.007778,0.007778,0.1176,0.42119
2,img_62885.jpg,0.007778,0.007778,0.045387,0.007778,0.019743,0.007778,0.88504,0.007778,0.046638,0.007778
3,img_45125.jpg,0.584869,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.043578,0.232616,0.13616
4,img_22633.jpg,0.93,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.04507,0.007778


In [160]:
submission[:5]

Unnamed: 0,img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,img_81601.jpg,0.007778,0.93,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778
1,img_14887.jpg,0.299819,0.007778,0.007778,0.085893,0.063129,0.007778,0.007778,0.007778,0.1176,0.42119
2,img_62885.jpg,0.007778,0.007778,0.045387,0.007778,0.019743,0.007778,0.88504,0.007778,0.046638,0.007778
3,img_45125.jpg,0.584869,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.043578,0.232616,0.13616
4,img_22633.jpg,0.93,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.04507,0.007778


In [175]:
print(submission[:3])

             img        c0        c1        c2        c3        c4        c5  \
0  img_81601.jpg  0.007778  0.930000  0.007778  0.007778  0.007778  0.007778   
1  img_14887.jpg  0.299819  0.007778  0.007778  0.085893  0.063129  0.007778   
2  img_62885.jpg  0.007778  0.007778  0.045387  0.007778  0.019743  0.007778   

         c6        c7        c8        c9  
0  0.007778  0.007778  0.007778  0.007778  
1  0.007778  0.007778  0.117600  0.421190  
2  0.885040  0.007778  0.046638  0.007778  


In [167]:
np.savetxt('results/test_submission', submission, fmt = '%.6f' * 11, header = '', comments = '')

TypeError: Mismatch between array dtype ('object') and format specifier ('%.6f%.6f%.6f%.6f%.6f%.6f%.6f%.6f%.6f%.6f%.6f')

In [168]:
submission.to_csv(subm_name, index = False, encoding = 'utf-8')

In [157]:
FileLink(subm_name)

# Kaggle Submission Version 2

In [64]:
def do_clip(arr, max): return np.clip(arr, (1 - max)/9, max)

In [24]:
%cd 
%cd nbs/data/statefarm/
test_feat = load_array('results/test_feat.dat')

/home/ubuntu
/home/ubuntu/nbs/data/statefarm


ValueError: too many values to unpack

In [192]:
test_pred = np.clip(test_feat, .01/9, .99)

In [28]:
test_pred[:5]

array([[ 0.0078,  0.93  ,  0.0078,  0.0078,  0.0078,  0.0078,  0.0078,  0.0078,  0.0078,  0.0078],
       [ 0.2998,  0.0078,  0.0078,  0.0859,  0.0631,  0.0078,  0.0078,  0.0078,  0.1176,  0.4212],
       [ 0.0078,  0.0078,  0.0454,  0.0078,  0.0197,  0.0078,  0.885 ,  0.0078,  0.0466,  0.0078],
       [ 0.5849,  0.0078,  0.0078,  0.0078,  0.0078,  0.0078,  0.0078,  0.0436,  0.2326,  0.1362],
       [ 0.93  ,  0.0078,  0.0078,  0.0078,  0.0078,  0.0078,  0.0078,  0.0078,  0.0451,  0.0078]], dtype=float32)

In [30]:
test_filenames[:5]

['unknown/img_81601.jpg',
 'unknown/img_14887.jpg',
 'unknown/img_62885.jpg',
 'unknown/img_45125.jpg',
 'unknown/img_22633.jpg']

In [67]:
test_classes = img + test_classes

In [32]:
img_filenames = [(f[8:]) for f in test_filenames]

In [33]:
img_filenames[:5]

['img_81601.jpg',
 'img_14887.jpg',
 'img_62885.jpg',
 'img_45125.jpg',
 'img_22633.jpg']

In [120]:
image_filenames = np.array(img_filenames)

In [121]:
image_filenames[:5]

array(['img_81601.jpg', 'img_14887.jpg', 'img_62885.jpg', 'img_45125.jpg', 'img_22633.jpg'], 
      dtype='|S14')

In [193]:
subm = np.column_stack((image_filenames, test_pred))

In [181]:
subm2 = np.column_stack((image_filenames, test_feat))

In [185]:
full_subm2 = np.row_stack((test_classes, subm2))

In [126]:
subm[:5]

array([['img_81601.jpg', '0.00777777796611', '0.930000007153', '0.00777777796611',
        '0.00777777796611', '0.00777777796611', '0.00777777796611', '0.00777777796611',
        '0.00777777796611', '0.00777777796611', '0.00777777796611'],
       ['img_14887.jpg', '0.299818694592', '0.00777777796611', '0.00777777796611',
        '0.0858930796385', '0.0631291419268', '0.00777777796611', '0.00777777796611',
        '0.00777777796611', '0.117599993944', '0.421190172434'],
       ['img_62885.jpg', '0.00777777796611', '0.00777777796611', '0.0453870147467',
        '0.00777777796611', '0.0197426471859', '0.00777777796611', '0.885040462017',
        '0.00777777796611', '0.0466382727027', '0.00777777796611'],
       ['img_45125.jpg', '0.584869027138', '0.00777777796611', '0.00777777796611',
        '0.00777777796611', '0.00777777796611', '0.00777777796611', '0.00777777796611',
        '0.0435781218112', '0.232615575194', '0.13616040349'],
       ['img_22633.jpg', '0.930000007153', '0.007777777

In [130]:
test_classes = np.array(test_classes)

In [131]:
test_classes[:]

array(['img', 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'], 
      dtype='|S3')

In [66]:
img = ['img']

In [194]:
full_subm = np.row_stack((test_classes, subm))

In [195]:
full_subm[:5]

array([['img', 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'],
       ['img_81601.jpg', '0.00111111113802', '0.990000009537', '0.00111111113802',
        '0.00111111113802', '0.00111111113802', '0.00111111113802', '0.00111111113802',
        '0.00111111113802', '0.00111111113802', '0.00111111113802'],
       ['img_14887.jpg', '0.299818694592', '0.00209501828067', '0.00111111113802',
        '0.0858930796385', '0.0631291419268', '0.00259108375758', '0.00761960726231',
        '0.00111111113802', '0.117599993944', '0.421190172434'],
       ['img_62885.jpg', '0.00111111113802', '0.00111111113802', '0.0453870147467',
        '0.00111111113802', '0.0197426471859', '0.00111364515033', '0.885040462017',
        '0.00111111113802', '0.0466382727027', '0.00116757268552'],
       ['img_45125.jpg', '0.584869027138', '0.00111111113802', '0.00111111113802',
        '0.00111111113802', '0.00111111113802', '0.00111111113802', '0.00111111113802',
        '0.0435781218112', '0.232615575194

In [186]:
full_subm2[:5]

array([['img', 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'],
       ['img_81601.jpg', '4.08481209888e-05', '0.999704658985', '9.58340297075e-06',
        '1.87718032976e-05', '5.32808144271e-07', '2.11346701917e-07', '4.7516565246e-05',
        '2.66900224233e-06', '2.06583695217e-06', '0.000173020656803'],
       ['img_14887.jpg', '0.299818694592', '0.00209501828067', '4.4994885684e-05',
        '0.0858930796385', '0.0631291419268', '0.00259108375758', '0.00761960726231',
        '1.82215462701e-05', '0.117599993944', '0.421190172434'],
       ['img_62885.jpg', '0.000131585198687', '1.12135458039e-05', '0.0453870147467',
        '1.84712334885e-05', '0.0197426471859', '0.00111364515033', '0.885040462017',
        '0.000749175960664', '0.0466382727027', '0.00116757268552'],
       ['img_45125.jpg', '0.584869027138', '0.000525393290445', '0.000197202665731',
        '0.000274943362456', '0.00110527721699', '0.000214307307033', '0.000459816626972',
        '0.0435781218112

In [196]:
import csv
with open('results/please_subm_againplsagain.csv', 'wb') as f:
    writer = csv.writer(f)
    writer.writerows(full_subm)

In [134]:
%cd
%cd nbs
import bcolz
np.savetxt('data/statefarm/results/test_pred5.csv', subm, delimiter = ' ',
           header = 'img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9', comments = '')
FileLink('data/statefarm/results/test_pred5.csv')

/home/ubuntu
/home/ubuntu/nbs


TypeError: Mismatch between array dtype ('|S32') and format specifier ('%.18e %.18e %.18e %.18e %.18e %.18e %.18e %.18e %.18e %.18e %.18e')

In [17]:
subm_name = 'results/subm.gz'

In [20]:
classes = sorted(batches.class_indices, key = batches.class_indices.get)

In [21]:
submission = pd.DataFrame(subm, columns = classes)
submission.insert(0, 'img', [a[4:] for a in test_filenames])
submission.head()

Unnamed: 0,img,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9
0,own/img_81601.jpg,0.007778,0.93,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778
1,own/img_14887.jpg,0.299819,0.007778,0.007778,0.085893,0.063129,0.007778,0.007778,0.007778,0.1176,0.42119
2,own/img_62885.jpg,0.007778,0.007778,0.045387,0.007778,0.019743,0.007778,0.88504,0.007778,0.046638,0.007778
3,own/img_45125.jpg,0.584869,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.043578,0.232616,0.13616
4,own/img_22633.jpg,0.93,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.007778,0.04507,0.007778


In [22]:
submission.to_csv(subm_name, index=False, compression = 'gzip')