In [38]:
!git clone https://github.com/pruvi007/ML_Datasets.git

fatal: destination path 'ML_Datasets' already exists and is not an empty directory.


In [39]:
!pip install split-folders



In [0]:
import split_folders
split_folders.ratio('ML_Datasets/UCMerced_LandUse/Images', output="output", seed=1337, ratio=(.8, .2)) 

In [0]:
# !rm -rf output

In [0]:

# Show pictures
import os, random,shutil
import matplotlib.pyplot as plt
import numpy as np
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator

train_dir = 'output/train'
validation_dir = 'output/val' 
test_dir = 'output/test'

train_size, validation_size, test_size = 1680, 420, 210

img_width, img_height = 224, 224  # Default input size for Mobile

In [0]:

# Instantiate convolutional base
from keras.applications import MobileNet

conv_base = MobileNet(weights='imagenet', 
                  include_top=False,
                  input_shape=(img_width, img_height, 3))  # 3 = number of channels in RGB pictures

In [43]:
# Check architecture
conv_base.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
conv1_pad (ZeroPadding2D)    (None, 225, 225, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (ReLU)            (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128       
__________

In [45]:
datagen = ImageDataGenerator(rescale=1./255)
batch_size = 32

def extract_features(directory, sample_count):
    features = np.zeros(shape=(sample_count, 7, 7, 1024))  # Must be equal to the output of the convolutional base
    labels = np.zeros(shape=(sample_count,21))
    # Preprocess data
    generator = datagen.flow_from_directory(directory,
                                            target_size=(img_width,img_height),
                                            batch_size = batch_size,
                                            class_mode='categorical')
    # Pass data through convolutional base
    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = conv_base.predict(inputs_batch)
        features[i * batch_size: (i + 1) * batch_size] = features_batch
        labels[i * batch_size: (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            break
    return features, labels
    
train_features, train_labels = extract_features(train_dir, train_size)
validation_features, validation_labels = extract_features(validation_dir, validation_size)

Found 1680 images belonging to 21 classes.
Found 420 images belonging to 21 classes.


In [0]:
labels_train = []
labels_validation = []
labels_test = []
for i in range(len(train_labels)):
  labels_train.append(np.argmax(train_labels[i]))
for i in range(len(validation_labels)):
  labels_validation.append(np.argmax(validation_labels[i]))
  
#for i in range(len(test_labels)):
 # labels_test.append(np.argmax(test_labels[i]))  
  

In [0]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC 
from scipy.ndimage import convolve
from sklearn import linear_model, datasets, metrics
from sklearn.model_selection import train_test_split
from sklearn.neural_network import BernoulliRBM
from sklearn.pipeline import Pipeline
from sklearn.base import clone

In [0]:
def scale(X, eps = 0.001):
	# scale the data points s.t the columns of the feature space
	# (i.e the predictors) are within the range [0, 1]
	return (X - np.min(X, axis = 0)) / (np.max(X, axis = 0) + eps)

In [51]:
X_train, y_train = train_features.reshape(1680,7*7*1024), labels_train
X_train = scale(X_train)
X_test = scale(validation_features.reshape(420,7*7*1024))
y_test = labels_validation
X_test.shape

(420, 50176)

In [0]:
# Models we will use
logistic = linear_model.LogisticRegression(solver='lbfgs', max_iter=10000,
                                           multi_class='multinomial')
rbm = BernoulliRBM(random_state=0, verbose=True)

rbm_features_classifier = Pipeline(
    steps=[('rbm', rbm), ('logistic', logistic)])

# #############################################################################
# Training

# Hyper-parameters. These were set by cross-validation,
# using a GridSearchCV. Here we are not performing cross-validation to
# save time.
rbm.learning_rate = 0.06
rbm.n_iter = 10
# More components tend to give better prediction performance, but larger
# fitting time
rbm.n_components = 200#acc =70
logistic.C = 6000

In [53]:
# Training RBM-Logistic Pipeline
rbm_features_classifier.fit(X_train, y_train)

[BernoulliRBM] Iteration 1, pseudo-likelihood = -12670.82, time = 48.87s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -11277.81, time = 50.87s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -10909.00, time = 50.84s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -10573.44, time = 51.50s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -10433.13, time = 49.75s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -10310.42, time = 50.45s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -10238.72, time = 50.37s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -10242.41, time = 49.36s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -10138.05, time = 50.19s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -9989.02, time = 49.98s


Pipeline(memory=None,
     steps=[('rbm', BernoulliRBM(batch_size=10, learning_rate=0.06, n_components=200, n_iter=10,
       random_state=0, verbose=True)), ('logistic', LogisticRegression(C=6000, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=10000, multi_class='multinomial',
          n_jobs=None, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False))])

In [54]:
Y_pred = rbm_features_classifier.predict(X_test)
print("logistic using RBM features:\n%s\n" % (
    metrics.classification_report(y_test,Y_pred)))

logistic using RBM features:
              precision    recall  f1-score   support

           0       1.00      0.95      0.97        20
           1       0.77      1.00      0.87        20
           2       0.65      0.75      0.70        20
           3       1.00      1.00      1.00        20
           4       0.32      0.35      0.33        20
           5       1.00      1.00      1.00        20
           6       0.41      0.35      0.38        20
           7       1.00      1.00      1.00        20
           8       1.00      0.85      0.92        20
           9       0.70      0.80      0.74        20
          10       0.87      1.00      0.93        20
          11       0.71      0.60      0.65        20
          12       0.75      0.60      0.67        20
          13       0.61      0.55      0.58        20
          14       0.80      1.00      0.89        20
          15       0.95      0.95      0.95        20
          16       0.89      0.85      0.87        2

In [0]:
svm = SVC(C=1)
rbm = BernoulliRBM(random_state=0, verbose=True)
rbm_classifier = Pipeline(
    steps=[('rbm', rbm), ('svm', svm)])

rbm.learning_rate = 0.06
rbm.n_iter = 10
# More components tend to give better prediction performance, but larger
# fitting time
rbm.n_components = 200#acc =70

In [56]:
# Training RBM-Logistic Pipeline
rbm_classifier.fit(X_train, y_train)

[BernoulliRBM] Iteration 1, pseudo-likelihood = -12670.82, time = 48.23s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -11277.81, time = 49.46s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -10909.00, time = 50.23s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -10573.44, time = 50.92s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -10433.13, time = 50.43s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -10310.42, time = 51.00s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -10238.72, time = 50.39s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -10242.41, time = 49.42s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -10138.05, time = 50.15s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -9989.02, time = 49.83s




Pipeline(memory=None,
     steps=[('rbm', BernoulliRBM(batch_size=10, learning_rate=0.06, n_components=200, n_iter=10,
       random_state=0, verbose=True)), ('svm', SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))])

In [57]:
Y_pred = rbm_classifier.predict(X_test)
print("SVM using RBM features:\n%s\n" % (
    metrics.classification_report(y_test,Y_pred)))

SVM using RBM features:
              precision    recall  f1-score   support

           0       1.00      0.95      0.97        20
           1       0.63      0.95      0.76        20
           2       0.52      0.80      0.63        20
           3       0.91      1.00      0.95        20
           4       0.29      0.20      0.24        20
           5       1.00      1.00      1.00        20
           6       0.50      0.15      0.23        20
           7       1.00      1.00      1.00        20
           8       0.89      0.80      0.84        20
           9       0.83      0.50      0.62        20
          10       0.87      1.00      0.93        20
          11       0.45      0.50      0.48        20
          12       0.54      0.65      0.59        20
          13       0.59      0.65      0.62        20
          14       0.62      0.90      0.73        20
          15       1.00      1.00      1.00        20
          16       0.88      0.75      0.81        20
   