# Augsburg数据集
来自仓库https://github.com/danfenghong/ISPRS_S2FL
训练集和测试集的划分在数据集中已经给出
训练集为TrainImage，测试集为TestImage
注意：这个数据集太大了，所以这个数据集的patch size 我设置成7 * 7 了

In [1]:
import scipy.io as scio
import numpy as np

In [2]:

# 1

a1=r'HS-SAR-DSM Augsburg/data_DSM.mat'
a2=r'HS-SAR-DSM Augsburg/data_HS_LR.mat'
a3=r'HS-SAR-DSM Augsburg/data_SAR_HR.mat'
a4=r'HS-SAR-DSM Augsburg/TestImage.mat'
a5=r'HS-SAR-DSM Augsburg/TrainImage.mat'
dsm = scio.loadmat(a1)
hsi = scio.loadmat(a2)
sar = scio.loadmat(a3)
test = scio.loadmat(a4)
train = scio.loadmat(a5)

In [3]:
hsi = hsi['data_HS_LR']
sar = sar['data_SAR_HR']
test = test['TestImage']
train = train['TrainImage']
dsm = dsm['data_DSM']
dsm = np.expand_dims(dsm, axis=2)

In [4]:
print(hsi.shape)# (332, 485, 180)
print(sar.shape)# (332, 485, 4)
print(test.shape)# (332, 485)
print(train.shape)# (332, 485)
print(dsm.shape)# (332, 485)

(332, 485, 180)
(332, 485, 4)
(332, 485)
(332, 485)
(332, 485, 1)


In [11]:
def choose_true_point(true_data, num_classes):
    number_true = []
    pos_true = {}
    for i in range(num_classes + 1):
        each_class = np.argwhere(true_data == i)
        number_true.append(each_class.shape[0])
        pos_true[i] = each_class

    total_pos_true = pos_true[0]
    for i in range(1, num_classes + 1):
        total_pos_true = np.r_[total_pos_true, pos_true[i]]
    total_pos_true = total_pos_true.astype(int)
    return total_pos_true, number_true


def choose_img_point(height, width):
    total_pos_true = np.array([[i, j] for i in range(height) for j in range(width)])
    return total_pos_true


# 1
def chooose_point(test_data, num_classes):
    number_test = []
    pos_test = {}

    for i in range(num_classes):
        each_class = np.argwhere(test_data == (i + 1))
        number_test.append(each_class.shape[0])
        pos_test[i] = each_class

    total_pos_test = pos_test[0]
    for i in range(1, num_classes):
        total_pos_test = np.r_[total_pos_test, pos_test[i]]  # (9671,2)
    total_pos_test = total_pos_test.astype(int)
    return total_pos_test, number_test


def mirror_hsi( input_normalize, patch=5):
    height, width, band = input_normalize.shape
    padding = patch // 2
    mirror_hsi = np.zeros((height + 2 * padding, width + 2 * padding, band), dtype=np.float32)  # padding后的图 上下左右各加padding

    mirror_hsi[padding:(padding + height), padding:(padding + width), :] = input_normalize  # 中间用原图初始化

    for i in range(padding):
        mirror_hsi[padding:(height + padding), i, :] = input_normalize[:, padding - i - 1, :]

    for i in range(padding):
        mirror_hsi[padding:(height + padding), width + padding + i, :] = input_normalize[:, width - 1 - i, :]

    for i in range(padding):
        mirror_hsi[i, :, :] = mirror_hsi[padding * 2 - i - 1, :, :]

    for i in range(padding):
        mirror_hsi[height + padding + i, :, :] = mirror_hsi[height + padding - 1 - i, :, :]

    print("**************************************************")
    print("patch is : {}".format(patch))
    print("mirror_image shape : [{0},{1},{2}]".format(mirror_hsi.shape[0], mirror_hsi.shape[1], mirror_hsi.shape[2]))
    print("**************************************************")
    return mirror_hsi


def gain_neighborhood_pixel(mirror_image, point, i, patch=5):
    x = point[i, 0]
    y = point[i, 1]
    temp_image = mirror_image[x:(x + patch), y:(y + patch), :]
    return temp_image




def get_data(mirror_image, test_point, patch=5):
    height, width, band = mirror_image.shape
    x_test = np.zeros((test_point.shape[0], patch, patch, band), dtype=np.float32)

    for j in range(test_point.shape[0]):
        x_test[j, :, :, :] = gain_neighborhood_pixel(mirror_image, test_point, j, patch)
    print("x_test  shape = {}, type = {}".format(x_test.shape, x_test.dtype))
    print("**************************************************")

    return x_test


def get_label(number_test, num_classes):
    y_test = []
    for i in range(num_classes):
        for k in range(number_test[i]):
            y_test.append(i+1)

    y_test = np.array(y_test)
    print("y_test: shape = {} ,type = {}".format(y_test.shape, y_test.dtype))
    print("**************************************************")
    return y_test

def norm(data):
    input_normalize = np.zeros(data.shape)

    for i in range(data.shape[2]):
        input_max = np.max(data[:, :, i])
        input_min = np.min(data[:, :, i])
        input_normalize[:, :, i] = (data[:, :, i] - input_min) / (input_max - input_min)
    return input_normalize

# 归一化和池化（patch就是patch size，这里是7

In [12]:
num_classes = 7
patch = 7
hsi = norm(hsi)
scio.savemat(r'HSI_norm.mat',{'Data':hsi})
hsi = mirror_hsi(hsi,patch)
#
total_pos_test, number_test = chooose_point(test, num_classes)
total_pos_train, number_train = chooose_point(train, num_classes)

**************************************************
patch is : 7
mirror_image shape : [338,491,180]
**************************************************


# 测试集和训练集的样例个数

In [13]:
number_test

[13361, 30065, 3830, 26609, 523, 1638, 1507]

In [14]:
number_train

[146, 264, 21, 248, 52, 7, 23]

In [15]:
HSI_Te = get_data(hsi, total_pos_test,patch=patch,)
scio.savemat("HSI_Te.mat",{"Data":HSI_Te})
del HSI_Te
HSI_Tr = get_data(hsi, total_pos_train,patch=patch,)
scio.savemat("HSI_Tr.mat",{"Data":HSI_Tr})
del HSI_Tr
del  hsi


x_test  shape = (77533, 7, 7, 180), type = float32
**************************************************
x_test  shape = (761, 7, 7, 180), type = float32
**************************************************


In [16]:
sar = norm(sar)
scio.savemat(r'SAR_norm.mat',{'Data':sar})
sar = mirror_hsi(sar,patch)

SAR_Te = get_data(sar, total_pos_test,patch=patch,)
SAR_Tr = get_data(sar, total_pos_train,patch=patch,)
scio.savemat("SAR_Te.mat",{"Data":SAR_Te})
scio.savemat("SAR_Tr.mat",{"Data":SAR_Tr})



**************************************************
patch is : 7
mirror_image shape : [338,491,4]
**************************************************
x_test  shape = (77533, 7, 7, 4), type = float32
**************************************************
x_test  shape = (761, 7, 7, 4), type = float32
**************************************************


In [17]:
dsm = norm(dsm)
scio.savemat(r'DSM_norm.mat',{'Data':dsm})
dsm = mirror_hsi(dsm,patch)
DSM_Te = get_data(dsm, total_pos_test,patch=patch,)
DSM_Tr = get_data(dsm, total_pos_train,patch=patch,)
scio.savemat("DSM_Te.mat",{"Data":DSM_Te})
scio.savemat("DSM_Tr.mat",{"Data":DSM_Tr})

**************************************************
patch is : 7
mirror_image shape : [338,491,1]
**************************************************
x_test  shape = (77533, 7, 7, 1), type = float32
**************************************************
x_test  shape = (761, 7, 7, 1), type = float32
**************************************************


In [18]:

TeLabel = get_label(number_test, num_classes)
TrLabel = get_label(number_train, num_classes)
scio.savemat("TeLabel.mat",{"Data":TeLabel})
scio.savemat("TrLabel.mat",{"Data":TrLabel})

y_test: shape = (77533,) ,type = int64
**************************************************
y_test: shape = (761,) ,type = int64
**************************************************


In [19]:
TeLabel

array([1, 1, 1, ..., 7, 7, 7])