In [1]:
import numpy as np
from sklearn.decomposition import PCA
import scipy.io as sio
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import os
import random
from random import shuffle
from skimage.transform import rotate
import scipy.ndimage
from spectral import *
import spectral.io.envi as envi
from PIL import Image

In [8]:
def loadDataset():
    image = envi.open('../../hyperImage/buidling_maskedout.hdr', '../../hyperImage/buidling_maskedout')
    data = image.load()
    label_data = Image.open('../../hyperImage/shapefile/roadclass.tif')
    labels = np.array(label_data)
    return data, labels

def splitData(X, y, size= 0.2):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=testRatio, random_state=345,
                                                        stratify=y)
    return X_train, X_test, y_train, y_test

def oversampleWeakClasses(X, y):
    uniqueLabels, labelCounts = np.unique(y, return_counts=True)
    maxCount = np.max(labelCounts)
    labelInverseRatios = maxCount / labelCounts  
    # repeat for every label and concat
    newX = X[y == uniqueLabels[0], :, :, :].repeat(round(labelInverseRatios[0]), axis=0)
    newY = y[y == uniqueLabels[0]].repeat(round(labelInverseRatios[0]), axis=0)
    for label, labelInverseRatio in zip(uniqueLabels[1:], labelInverseRatios[1:]):
        cX = X[y== label,:,:,:].repeat(round(labelInverseRatio), axis=0)
        cY = y[y == label].repeat(round(labelInverseRatio), axis=0)
        newX = np.concatenate((newX, cX))
        newY = np.concatenate((newY, cY))
    np.random.seed(seed=42)
    rand_perm = np.random.permutation(newY.shape[0])
    newX = newX[rand_perm, :, :, :]
    newY = newY[rand_perm]
    return newX, newY

def standartizeData(X):
    newX = np.reshape(X, (-1, X.shape[2]))
    scaler = preprocessing.StandardScaler().fit(newX)  
    newX = scaler.transform(newX)
    newX = np.reshape(newX, (X.shape[0],X.shape[1],X.shape[2]))
    return newX, scaler
def applyPCA(X, numComponents=75):
    newX = np.reshape(X, (-1, X.shape[2]))
    pca = PCA(n_components=numComponents, whiten=True)
    newX = pca.fit_transform(newX)
    newX = np.reshape(newX, (X.shape[0],X.shape[1], numComponents))
    return newX, pca

def padWithZeros(X, margin=2):
    newX = np.zeros((X.shape[0] + 2 * margin, X.shape[1] + 2* margin, X.shape[2]))
    x_offset = margin
    y_offset = margin
    newX[x_offset:X.shape[0] + x_offset, y_offset:X.shape[1] + y_offset, :] = X
    return newX

def createPatches(X, y, windowSize=5, removeZeroLabels = True):
    margin = int((windowSize - 1) / 2)
    zeroPaddedX = padWithZeros(X, margin=margin)
    # split patches
    patchesData = np.zeros((X.shape[0] * X.shape[1], windowSize, windowSize, X.shape[2]))
    patchesLabels = np.zeros((X.shape[0] * X.shape[1]))
    patchIndex = 0
    for r in range(margin, zeroPaddedX.shape[0] - margin):
        for c in range(margin, zeroPaddedX.shape[1] - margin):
            patch = zeroPaddedX[r - margin:r + margin + 1, c - margin:c + margin + 1]   
            patchesData[patchIndex, :, :, :] = patch
            patchesLabels[patchIndex] = y[r-margin, c-margin]
            patchIndex = patchIndex + 1
    if removeZeroLabels:
        patchesData = patchesData[patchesLabels>0,:,:,:]
        patchesLabels = patchesLabels[patchesLabels>0]
        patchesLabels -= 1
    return patchesData, patchesLabels

def createPatches_val(X, windowSize=5):
    margin = int((windowSize - 1) / 2)
    zeroPaddedX = padWithZeros(X, margin=margin)
    # split patches
    patchesData = np.zeros((X.shape[0] * X.shape[1], windowSize, windowSize, X.shape[2]))
    patchIndex = 0
    for r in range(margin, zeroPaddedX.shape[0] - margin):
        for c in range(margin, zeroPaddedX.shape[1] - margin):
            patch = zeroPaddedX[r - margin:r + margin + 1, c - margin:c + margin + 1]   
            patchesData[patchIndex, :, :, :] = patch
            patchIndex = patchIndex + 1
    return patchesData
    

def AugmentData(X_train):
    for i in range(int(X_train.shape[0]/2)):
        patch = X_train[i,:,:,:]
        num = random.randint(0,2)
        if (num == 0):
            
            flipped_patch = np.flipud(patch)
        if (num == 1):
            
            flipped_patch = np.fliplr(patch)
        if (num == 2):
            
            no = random.randrange(-180,180,30)
            flipped_patch = scipy.ndimage.interpolation.rotate(patch, no,axes=(1, 0),
                                                               reshape=False, output=None, order=3, mode='constant', cval=0.0, prefilter=False)
    
    
    patch2 = flipped_patch
    X_train[i,:,:,:] = patch2
    
    return X_train


def savePreprocessedData(X_Patches, y_Patches, X_vals_Patches, windowSize, wasPCAapplied = False, numPCAComponents = 0, testRatio = 0.25):
    if wasPCAapplied:
       # with open("X_trainPatches_" + str(windowSize) + "PCA" + str(numPCAComponents) + "testRatio" + str(testRatio) + ".npy", 'bw') as outfile:
        #    np.save(outfile, X_trainPatches)
        #with open("X_testPatches_" + str(windowSize) + "PCA" + str(numPCAComponents) + "testRatio" + str(testRatio) + ".npy", 'bw') as outfile:
         #   np.save(outfile, X_testPatches)
        #with open("y_trainPatches_" + str(windowSize) + "PCA" + str(numPCAComponents) + "testRatio" + str(testRatio) + ".npy", 'bw') as outfile:
         #   np.save(outfile, y_trainPatches)
        #with open("y_testPatches_" + str(windowSize) + "PCA" + str(numPCAComponents) + "testRatio" + str(testRatio) + ".npy", 'bw') as outfile:
         #   np.save(outfile, y_testPatches)
        #with open("X_valPatches_" + str(windowSize) + "PCA" + str(numPCAComponents) + "testRatio" + str(testRatio) + ".npy", 'bw') as outfile:
         #   np.save(outfile, X_valPatches)
        with open("X_Patches_" + str(windowSize) + "PCA" + str(numPCAComponents) + "testRatio" + str(testRatio) + ".npy", 'bw') as outfile:
             np.save(outfile, X_Patches)
        with open("yPatches_" + str(windowSize) + "PCA" + str(numPCAComponents) + "testRatio" + str(testRatio) + ".npy", 'bw') as outfile:
             np.save(outfile, y_Patches)
        with open("X_vals_Patches_" + str(windowSize) + "PCA" + str(numPCAComponents) + "testRatio" + str(testRatio) + ".npy", 'bw') as outfile:
             np.save(outfile, X_vals_Patches)
    else:
        #with open("../preprocessedData/XtrainWindowSize" + str(windowSize) + ".npy", 'bw') as outfile:
         #   np.save(outfile, X_trainPatches)
        #with open("../preprocessedData/XtestWindowSize" + str(windowSize) + ".npy", 'bw') as outfile:
         #   np.save(outfile, X_testPatches)
        #with open("../preprocessedData/ytrainWindowSize" + str(windowSize) + ".npy", 'bw') as outfile:
         #   np.save(outfile, y_trainPatches)
        #with open("../preprocessedData/ytestWindowSize" + str(windowSize) + ".npy", 'bw') as outfile:
         #   np.save(outfile, y_testPatches)
        #with open("../preprocessedData/XvalWindowSize" + str(windowSize) + ".npy", 'bw') as outfile:
         #   np.save(outfile, X_valPatches)
        with open("../preprocessedData/XWindowSize" + str(windowSize) + ".npy", 'bw') as outfile:
            np.save(outfile, X_Patches)
        with open("../preprocessedData/yWindowSize" + str(windowSize) + ".npy", 'bw') as outfile:
            np.save(outfile, y_Patches)
        with open("../preprocessedData/XvalsWindowSize" + str(windowSize) + ".npy", 'bw') as outfile:
            np.save(outfile, X_vals_Patches)
            
            
            

In [9]:
# Load the Global values (windowSize, numPCAcomponents, testRatio) from the text file global_variables.txt
myFile = open('global_variables.txt', 'r') 
file = myFile.readlines()[:]


for line in file:

    if line[0:3] == "win":

        ds = line.find('=')
        windowSize = int(line[ds+1:-1],10)

    elif line[0:3] == "num":

        ds = line.find('=')
        numPCAcomponents = int(line[ds+2:-1],10)

    else:

        ds = line.find('=')
        testRatio = float(line[ds+1:])



In [None]:
X, y = loadDataset()

In [17]:
#X, pca = applyPCA(X, numPCAcomponents)

In [19]:
X_vals_Patches = createPatches_val(X, windowSize=windowSize)
X_vals_Patches.shape

(2508114, 5, 5, 15)

In [20]:
X_vals_Patches = AugmentData(X_vals_Patches)

In [7]:
XPatches, yPatches = createPatches(X, y, windowSize=windowSize)


(24101, 5, 5, 15)

In [10]:
#X_train, X_test, y_train, y_test = splitData(XPatches, yPatches, testRatio)

In [11]:
#X_train, y_train = oversampleWeakClasses(X_train, y_train)

In [12]:
#X_train = AugmentData(X_train)

In [8]:
XPatches, yPatches = oversampleWeakClasses(XPatches, yPatches)

In [9]:
XPatches = AugmentData(XPatches)

# Validation Dataset

In [28]:
val_image = envi.open('../../hyperImage/08_buildingmaskedout.hdr', '../../hyperImage/08_buildingmaskedout')
val_img = val_image.load()
val_img.shape

(931, 2763, 186)

In [29]:
X_val, pca_val = applyPCA(val_img, numPCAcomponents)

In [33]:
XPatches_val = createPatches_val(X_val, windowSize=windowSize)

In [34]:
X_val = AugmentData(XPatches_val)

In [37]:
#savePreprocessedData(X_train, X_test, y_train, y_test, XPatches, yPatches, X_val, windowSize = windowSize, 
                     #wasPCAapplied=True, numPCAComponents = numPCAcomponents,testRatio = testRatio)

In [22]:
savePreprocessedData(XPatches, yPatches, X_vals_Patches, windowSize = windowSize, 
                     wasPCAapplied=True, numPCAComponents = numPCAcomponents,testRatio = testRatio)