In [1]:
import scipy.io as sio
import numpy as np

## Load SAT-6 training and test data
### sat-6-full.mat contains the following variables:
```
train_x        --------------    28x28x4x324000 uint8  (containing 324000 training samples of 28x28 images each with 4 channels - R, G, B and NIR)
train_y        --------------    6x324000       double (containing 6x1 vectors having labels for the 324000 training samples)
test_x         --------------    28x28x4x81000  uint8  (containing 81000 test samples of 28x28 images each with 4 channels - R, G, B and NIR)
test_y         --------------    6x81000        double (containing 6x1 vectors having labels for the 81000 test samples)
annotations    --------------    6x2            cell   (containing the class label annotations for the 6 classes of SAT-6)
```

In [2]:
data = sio.loadmat('/home/gadiraju/data/bl-slums/sat-6-full.mat')
train_X = data['train_x'].transpose()
train_Y = data['train_y'].transpose()
test_X = data['test_x'].transpose()
test_Y = data['test_y'].transpose()
annotations = data['annotations']

print train_X.shape, train_Y.shape, test_X.shape, test_Y.shape, annotations

train_X = train_X.reshape(train_X.shape[0], 28,28,4)
test_X = test_X.reshape(test_X.shape[0],28,28,4)full 

print train_X.shape, test_X.shape

(324000, 4, 28, 28) (324000, 6) (81000, 4, 28, 28) (81000, 6) [[array([u'100000'],
      dtype='<U6')
  array([u'building'],
      dtype='<U8')]
 [array([u'010000'],
      dtype='<U6')
  array([u'barren land'],
      dtype='<U11')]
 [array([u'001000'],
      dtype='<U6')
  array([u'trees'],
      dtype='<U5')]
 [array([u'000100'],
      dtype='<U6')
  array([u'grassland'],
      dtype='<U9')]
 [array([u'000010'],
      dtype='<U6') array([u'road'],
      dtype='<U4')]
 [array([u'000001'],
      dtype='<U6')
  array([u'water'],
      dtype='<U5')]]
(324000, 28, 28, 4) (81000, 28, 28, 4)


## Get class proportions in training and test in SAT-6

In [3]:
per_class_counts_train = np.sum(train_Y, axis =0)
per_class_counts_test = np.sum(test_Y, axis=0)

print per_class_counts_train, per_class_counts_test

[ 14923  73397  56809  50347   8192 120332] [ 3714 18367 14185 12596  2070 30068]


## Divide training set into train and validation
### Also reshape the arrays to be suitable with tflearn

In [20]:
P_TRAIN = 0.8
P_VAL = 1 - P_TRAIN

train_X_indices = dict.fromkeys(range(6))
print train_X_indices

for k in range(6):
    train_X_indices[k] = []

for i in range(train_X.shape[0]):#train_X.shape[0]
    current_class = train_Y[i]
    k, = np.where(current_class==1)
    #print k[0]
    train_X_indices[k[0]].append(i)


trainX=[]
trainY=[]
testX=[]
testY=[]
valX=[]
valY=[]

for k in range(6):
    n_train = int(P_TRAIN*len(train_X_indices[k]))
    per_class_indices = train_X_indices[k]
    train_indices = per_class_indices[0:n_train]
    val_indices = per_class_indices[n_train+1:len(train_X_indices[k])]
    print len(per_class_indices), len(train_indices), len(val_indices)
    for ind in train_indices:
        trainX.append(train_X[ind,:,:,:])
        trainY.append(train_Y[ind,:])
    for ind in val_indices:
        valX.append(train_X[ind,:,:,:])
        valY.append(train_Y[ind,:])

trainX = np.asarray(trainX)
trainY = np.asarray(trainY)
valX = np.asarray(valX)
valY = np.asarray(valY)

print trainX.shape, trainY.shape, valX.shape, valY.shape

{0: None, 1: None, 2: None, 3: None, 4: None, 5: None}
14923 11938 2984
73397 58717 14679
56809 45447 11361
50347 40277 10069
8192 6553 1638
120332 96265 24066
(259197, 28, 28, 4) (259197, 6) (64797, 28, 28, 4) (64797, 6)


In [21]:
print valY

[[1 0 0 0 0 0]
 [1 0 0 0 0 0]
 [1 0 0 0 0 0]
 ..., 
 [0 0 0 0 0 1]
 [0 0 0 0 0 1]
 [0 0 0 0 0 1]]


## Write train and validation files to disk

In [22]:
import pickle

f = open('/home/gadiraju/data/bl-slums/trainSATX','w')
pickle.dump(trainX,f)
f.close()

f = open('/home/gadiraju/data/bl-slums/trainSATY','w')
pickle.dump(trainY,f)
f.close()

f = open('/home/gadiraju/data/bl-slums/valSATX','w')
pickle.dump(valX,f)
f.close()

f = open('/home/gadiraju/data/bl-slums/valSATY','w')
pickle.dump(valY,f)
f.close()

f = open('/home/gadiraju/data/bl-slums/testSATX','w')
pickle.dump(testX,f)
f.close()

f = open('/home/gadiraju/data/bl-slums/testSATY','w')
pickle.dump(testY,f)
f.close()