In [1]:
import numpy as np
import pandas as pd
import pickle
from os import listdir, getcwd
import tensorflow as tf
from keras.applications import VGG16
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications.vgg16 import preprocess_input
from keras.models import Model

Using TensorFlow backend.


In [2]:
def extractFeatures(filename, model):
    image = load_img(filename, target_size=(224, 224))
    image = img_to_array(image)
    # reshape image array for model input
    image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
    # prepare image for CNN. Normalizes image array to range [-1, 1], matching format of images used to train VGG16
    image = preprocess_input(image)
    features = model.predict(image, verbose=0)
    return features[0]

def FC3(imgs_folder):
    # load VGG16 model
    model = VGG16()
    # FC3 is the last layer in model
    model = Model(inputs=model.inputs, outputs=model.layers[-1].output)
    Images = list()
    Features = list()
    for class_folder in listdir(imgs_folder):
        for file in listdir(imgs_folder + '/' + class_folder):
            # get FC image features
            filename = imgs_folder + '/' + class_folder + '/' + file
            img_features = extractFeatures(filename, model)
            Images.append(class_folder)
            Features.append(img_features)
    df = pd.DataFrame()
    df['Images'] = Images
    df['Features'] = Features
    return df

In [4]:
all_images_folder = r'C:\Users\Shelby\Desktop\UMKC\Academics\Fall20\ComputerVision\Project\NWPU-RESISC45'

FC3_features = FC3(all_images_folder)

KeyboardInterrupt: 

In [5]:
#pickle.dump(FC3_features, open(r'C:\Users\Shelby\Desktop\UMKC\Academics\Fall20\ComputerVision\Project\FC3_features.pkl', 'wb'))
FC3_features = pickle.load(open(r'C:\Users\Shelby\Desktop\UMKC\Academics\Fall20\ComputerVision\Project\FC3_features.pkl', 'rb'))

In [6]:
print(len(FC3_features.Features[1]))
FC3_features.head()

1000


Unnamed: 0,Images,Features
0,airplane,"[4.2372054e-08, 8.7819e-07, 1.695985e-05, 6.50..."
1,airplane,"[1.9635292e-05, 1.6506528e-05, 0.0007205153, 0..."
2,airplane,"[9.1684065e-09, 1.8724259e-07, 1.8019435e-05, ..."
3,airplane,"[3.087039e-06, 2.9423143e-05, 0.0045943004, 0...."
4,airplane,"[9.787228e-07, 7.053377e-05, 3.2243755e-05, 1...."


In [7]:
# Expand column of list into one column per feature in list
# https://stackoverflow.com/questions/44663903/pandas-split-column-of-lists-of-unequal-length-into-multiple-columns

labels = FC3_features.Images
FC3_features = pd.DataFrame(FC3_features.Features.values.tolist()).add_prefix('feature_')
FC3_features = FC3_features.join(labels)

In [8]:
FC3_features.head()

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,...,feature_991,feature_992,feature_993,feature_994,feature_995,feature_996,feature_997,feature_998,feature_999,Images
0,4.237205e-08,8.7819e-07,1.7e-05,6.5e-05,0.004288,1.019812e-06,8.448081e-07,2.044057e-06,2.057535e-07,5.690772e-08,...,2.593912e-06,1.107361e-07,3.157517e-08,5.962774e-08,2.381319e-07,6.128194e-07,7.562264e-08,1.819394e-07,0.000189,airplane
1,1.963529e-05,1.650653e-05,0.000721,0.000378,0.004588,0.0004207369,0.001248385,6.391029e-05,7.595973e-05,6.513813e-05,...,7.955892e-06,4.233636e-06,2.685417e-06,3.055096e-06,1.4331e-05,0.0001386072,1.00312e-05,3.931384e-05,0.002462,airplane
2,9.168406e-09,1.872426e-07,1.8e-05,7e-06,0.00039,5.015593e-08,2.600988e-08,1.162638e-07,2.59788e-08,5.229147e-08,...,3.921651e-09,3.328698e-10,9.699488e-11,1.588696e-09,2.617904e-09,3.557672e-09,6.686002e-11,6.336045e-09,1e-06,airplane
3,3.087039e-06,2.942314e-05,0.004594,0.013314,0.0285,0.000256662,0.0005291572,9.544301e-05,2.569795e-05,2.352959e-06,...,8.031444e-06,9.247121e-07,3.982925e-07,3.173748e-07,2.068906e-06,4.355878e-06,6.715257e-07,3.817584e-06,0.000499,airplane
4,9.787228e-07,7.053377e-05,3.2e-05,1.9e-05,0.000274,1.774996e-05,0.0001707903,7.14962e-07,1.08568e-07,5.605876e-08,...,1.004713e-06,5.986237e-07,1.156408e-07,6.43733e-07,1.048917e-07,1.047089e-06,2.892936e-07,1.618696e-07,0.000412,airplane


In [9]:
# PCA + LDA Dimensionality reduction
# https://stackabuse.com/implementing-lda-in-python-with-scikit-learn/

features = FC3_features.iloc[:,0:1000].values
labels = FC3_features.iloc[:,1000].values

In [10]:
# Splitting data

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=100, random_state=17)

In [11]:
# PCA

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:
len(X_train[0])

1000

In [13]:
# LDA

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

lda = LDA()
X_train = lda.fit_transform(X_train, y_train)
X_test = lda.transform(X_test)

In [17]:
X_train

array([[ 5.14383279,  2.34454657,  4.57886251, ..., -0.13911935,
         1.05453482,  0.06987746],
       [-2.13665383, -0.17864926, -0.33512668, ..., -1.60200868,
         2.78501045, -1.47134957],
       [-0.16326032, -1.96182248, -0.53698065, ..., -0.29803185,
         0.1143716 , -0.35348575],
       ...,
       [ 4.3465568 , -4.59166212,  4.1882672 , ..., -0.03230894,
         0.90518121,  0.24791668],
       [-2.06793268,  0.47737815, -0.34726663, ...,  0.46451799,
         0.83214859,  0.74334924],
       [-2.04842416,  0.63712416,  0.70920258, ..., -1.23209085,
        -3.44861441,  0.5168884 ]])

In [15]:
# Saving reduced data
data = [X_train, X_test, y_train, y_test]
pickle.dump(data, open(r'C:\Users\Shelby\Desktop\UMKC\Academics\Fall20\ComputerVision\Project\reduced_data.pkl', 'wb'))

In [16]:
# https://stackoverflow.com/questions/25859968/in-matlab-how-to-read-python-pickle-file
import numpy, scipy.io
scipy.io.savemat('reduced_data.mat', mdict={'X_train': X_train, 'X_test': X_test, 'y_train': y_train, 'y_test': y_test})