In [0]:
# Run this cell to mount your Google Drive.
from google.colab import drive
drive.mount('GoogleDrive')

In [0]:
# !fusermount -u GoogleDrive

In [0]:
!cat /proc/cpuinfo | grep model\ name

model name	: Intel(R) Xeon(R) CPU @ 2.30GHz
model name	: Intel(R) Xeon(R) CPU @ 2.30GHz


## 函数定义

In [0]:
import matplotlib.pyplot as plt
import numpy.linalg as LA
import scipy.io as scio
import numpy as np
import xlrd,random
import os,time,sys
import pywt
import warnings
warnings.filterwarnings('ignore')

In [0]:
def norm(x):
    assert type(x) == np.ndarray, "x must be a numpy.ndarray array!"
    return (x - x.min()) / (x.max() - x.min())

In [0]:
def print_progress(num, max_num):
    
    progress = num / max_num
    # Define the length of bar
    barLength = 50

    # Ceck the input!
    assert type(progress) is float, "id is not a float: %r" % id
    assert 0 <= progress <= 1, "variable should be between zero and one!"

    # Empty status while processing.
    status = ""

    # This part is to make a new line when the process is finished.
    if progress >= 1:
        progress = 1
        status = "\r\n"

    # Where we are in the progress!
    indicator = int(round(barLength * progress))

    # Print the appropriate progress phase!
    list = ["#" * indicator , ">" * (barLength - indicator), progress * 100]
    text = "\r{0[0]} {0[1]} {0[2]:.2f}% completed.{1}".format(list, status)
    sys.stdout.write(text)
    sys.stdout.flush()

In [0]:
def get_xlsxlist(xlsx_path, mode):
    return [os.path.join(xlsx_path, f) for f in os.listdir(xlsx_path) if f.endswith(mode)]

In [0]:
# 一维数据固定采样数
def sample_num(x, num=3000):
    dim = x.flatten().shape[0]
    interval = int(dim / num)
    return np.array([x[interval * i] for i in range(num)])

In [0]:
# 一维数据随机采样（经纬度信息及速占比信息）
def random_sample(x, z, num_dim=1000, num_sample=10):
    dim = x.flatten().shape[0]
    init_sp = np.zeros(num_dim)
    init_sd = np.zeros(num_dim)
    for i in range(num_sample):
        ls = random.sample(range(dim), num_dim)
        ls.sort()
        init_sp = np.vstack([init_sp, x[ls]])
        init_sd = np.vstack([init_sd, z[ls]])
    return init_sp[1:], init_sd[1:]
    

In [0]:
def HIST(sample_sd, num_bin, max_speed):
    sample_sd[sample_sd >= max_speed] = max_speed
    init_n = np.zeros(num_bin)
    for z in sample_sd:
        n, bins = np.histogram(z, num_bin, normed=True)
        init_n = np.vstack([init_n, n])
    return init_n[1:]

In [0]:
#@markdown **特征提取函数**
def feature_extr(NAME_list, feature_dim=1000, num_bin=100, max_speed=25, perc=1.):
    print('Start to extract the features, {:.2f}% features are used...'.format(perc*100))
    init_m = np.zeros(feature_dim) # 轨迹图特征初始化
    init_n = np.zeros(num_bin) # 速占比初始化
    # inv = 10 #@param {type:"integer"}
    for num, name in enumerate(NAME):
        max_num = len(NAME)
        print_progress(num + 1, max_num)
        excel_trawl = xlrd.open_workbook(name)
        sheet = excel_trawl.sheet_by_index(0)
        num_f = int((sheet.nrows - 1) * perc)
        x = sheet.col_values(1)[1:num_f]
        y = sheet.col_values(2)[1:num_f]
        data_position = np.vstack([np.array(x).reshape(1, -1), np.array(y).reshape(1, -1)])
        data_cov = np.cov(data_position)
        w, v = LA.eig(data_cov)
        data_pca = np.dot(v[:, 0].T, data_position).flatten()
    #     data_pca = np.dot(v[:, 1].T, data_position).flatten()
#         sample_data = sample_num(data_pca, feature_dim)
        z = np.array(sheet.col_values(3)[1:num_f])
        sample_sp, sample_sd = random_sample(data_pca, z, feature_dim, 50)
        init_m = np.vstack([init_m, sample_sp]) # pca特征采样
        n = HIST(sample_sd, num_bin, max_speed)
        init_n = np.vstack([init_n, n])
        
#         fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 6))
#         n, bins, patches = plt.hist(z_lim, num_bin, normed=True, rwidth=0.8)
    
    #@markdown - **提取的刺网样本1000维特征**
    gill_net_sample = init_m[1:]
    gill_net_sample = (gill_net_sample - gill_net_sample.min()) / (gill_net_sample.max() - gill_net_sample.min())
    
    #@markdown - **速占比归一化特征提取**
    speed_feature = init_n[1:]
    speed_feature_g = speed_feature / speed_feature.max()
    
    print('Feature extraction completed.')
    return (gill_net_sample, speed_feature_g)
    

## 小波特征提取

In [0]:
def wavelet_f(features, wavelet_name='db4', level=4, mode='symmetric'):
    wavelet = pywt.Wavelet(wavelet_name)
    coeffs_t = pywt.wavedec(features[0], wavelet.name, mode, level)
    w_f = np.zeros((features.shape[0], len(coeffs_t[0])))
    for i, f in enumerate(features):
        coeffs = pywt.wavedec(f, wavelet.name, mode, level)
        w_f[i] = coeffs[0]
#         w_f[i] = np.hstack([coeffs[0], coeffs[1]])
    return w_f

In [31]:
#@markdown - **载入特征数据**
data_path = './GoogleDrive/My Drive/Colab Notebooks/VMS/partial_features'
data_name = os.path.join(data_path, 'feature_rd50_full_v1.mat')
feature_data = scio.loadmat(data_name)
feature_v = feature_data['feature_v']
feature_s = feature_data['feature_s']
feature_v.shape

(47550, 1000)

In [0]:
# pywt.families()
pywt.wavelist('coif')

In [46]:
s_t = time.time()
feature_dwt = wavelet_f(feature_v, wavelet_name='db4', level=5)
d_t = time.time()
print(d_t - s_t)
feature_dwt.shape

4.927726745605469


(47550, 38)

In [0]:
# gill_net_sample: 173, net_sample: 77, trawl: 445, cage: 127, fishing rod: 6, mixed: 61, seine: 62

# label[17300:25000], label[25000:69500], label[69500:82200], label[82200:82800], label[82800:88900], label[88900:]= range(1, 7)
n = feature_dwt.shape[1]
x = np.linspace(0, n, n)
Y = np.vstack([feature_dwt[1000], feature_dwt[10000], feature_dwt[25000], 
              feature_dwt[35000], feature_dwt[41200], feature_dwt[42000], feature_dwt[45000]])
NAME = ['gill_net', 'net', 'trawl', 'cage', 'fishing_rod', 'mixed', 'seine']
COLOR = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2']
plt.figure(1, (12, 9))
for y, name, color in zip(Y, NAME, COLOR):
    plt.plot(x, y, c=color, label=name)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    plt.legend(loc='center right', fontsize=14)
plt.show()

In [33]:
data_path = './GoogleDrive/My Drive/Colab Notebooks/VMS/partial_features'
data_name = os.path.join(data_path, 'feature_rd50_dwt_v1.mat')

data_name

'./GoogleDrive/My Drive/Colab Notebooks/VMS/partial_features/feature_rd50_dwt_v1.mat'

In [0]:
scio.savemat(data_name, {'feature_v': feature_dwt, 'feature_s': feature_s})


## 刺网特征采样（1000维）

In [0]:
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/trawl', 'xlsx') # trawl
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/net', 'xlsx') # net
xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/gill_net', 'xlsx') # gill net
NAME = xlls[:]
file_path = './GoogleDrive/My Drive/Colab Notebooks/VMS'
num_bin = 100
max_speed = 25

In [0]:
len(NAME)

173

In [0]:
start_time = time.time()
gill_net_sample, speed_feature_g = feature_extr(NAME, feature_dim=1000, perc=1.)
end_time = time.time()

print('The op takes {0:.4f}s.'.format(end_time - start_time))
print('Gill net samples:\n', gill_net_sample)
print('Speed_features:\n', speed_feature_g)


Start to extract the features, 100.00% features are used...
##################################################  100.00% completed.
Feature extraction completed.
The op takes 631.8006s.
Gill net samples:
 [[0.94690873 0.94690869 0.94690888 ... 0.94834862 0.94794636 0.94794539]
 [0.94690927 0.94690876 0.94690869 ... 0.94797395 0.94796314 0.94794431]
 [0.94690888 0.94690868 0.94690862 ... 0.94816735 0.94818371 0.94817963]
 ...
 [0.08577738 0.08577995 0.08572185 ... 0.08086261 0.0830942  0.08320004]
 [0.08556817 0.08559063 0.08568213 ... 0.08100469 0.08247423 0.08268454]
 [0.08577406 0.08577378 0.08577283 ... 0.08081814 0.08088292 0.08191494]]
Speed_features:
 [[0.57923597 0.11844726 0.07655737 ... 0.00144448 0.00433344 0.00144448]
 [0.55300148 0.11810333 0.06252529 ... 0.         0.         0.00138945]
 [0.39185189 0.12064175 0.02899143 ... 0.         0.         0.00093521]
 ...
 [0.29907932 0.08510387 0.06686733 ... 0.         0.         0.00121577]
 [0.3226941  0.09396457 0.07047342 ...

In [0]:
gill_net_sample.shape

(8650, 1000)

In [0]:
speed_feature_g.shape

(8650, 100)

## 张网特征采样（1000维）

In [0]:
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/trawl', 'xlsx') # trawl
xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/net', 'xlsx') # net
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/gill_net', 'xlsx') # gill net
NAME = xlls[:]
file_path = './GoogleDrive/My Drive/Colab Notebooks/VMS'
num_bin = 100
max_speed = 25

In [0]:
len(NAME)

77

In [0]:
start_time = time.time()
net_sample, speed_feature_n = feature_extr(NAME, feature_dim=1000, perc=1.)
end_time = time.time()

print('The op takes {0:.4f}s.'.format(end_time - start_time))
print('Net samples:\n', net_sample)
print('Speed_features:\n', speed_feature_n)


Start to extract the features, 100.00% features are used...
##################################################  100.00% completed.
Feature extraction completed.
The op takes 215.5246s.
Net samples:
 [[0.96300957 0.96300976 0.96300956 ... 0.96143097 0.96143085 0.96142971]
 [0.96300974 0.96300952 0.96300973 ... 0.96143057 0.9614308  0.96142971]
 [0.9630094  0.96300955 0.96300969 ... 0.96143048 0.96143008 0.96143078]
 ...
 [0.96085523 0.96085543 0.96085485 ... 0.96085236 0.96085265 0.96085257]
 [0.96085568 0.96085602 0.96085658 ... 0.96085249 0.96085258 0.96085229]
 [0.96085558 0.96085565 0.9608555  ... 0.96085273 0.96085255 0.96085248]]
Speed_features:
 [[0.47133527 0.12626996 0.06857765 ... 0.         0.         0.00108853]
 [0.6118068  0.16457961 0.1019678  ... 0.         0.         0.00178891]
 [0.4602122  0.21883289 0.12201592 ... 0.         0.         0.00132626]
 ...
 [0.60422783 0.16911333 0.11098062 ... 0.         0.0017616  0.0017616 ]
 [0.48782863 0.22930867 0.07156767 ... 0.  

In [0]:
net_sample.shape

(3850, 1000)

In [0]:
speed_feature_n.shape

(3850, 100)

## 拖网特征采样（1000维）

In [0]:
xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/trawl', 'xlsx') # trawl
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/net', 'xlsx') # net
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/gill_net', 'xlsx') # gill net
NAME = xlls[:]
file_path = './GoogleDrive/My Drive/Colab Notebooks/VMS'
num_bin = 100
max_speed = 25

In [0]:
len(NAME)

445

In [0]:
start_time = time.time()
trawl_sample, speed_feature_t = feature_extr(NAME, feature_dim=1000, perc=1.)
end_time = time.time()

print('The op takes {0:.4f}s.'.format(end_time - start_time))
print('Trawl samples:\n', trawl_sample)
print('Speed_features:\n', speed_feature_t)


Start to extract the features, 100.00% features are used...
##################################################  100.00% completed.
Feature extraction completed.
The op takes 1342.6659s.
Trawl samples:
 [[0.98696792 0.98727605 0.98735087 ... 0.98108719 0.98109477 0.98095969]
 [0.98688436 0.98714077 0.98724631 ... 0.98089135 0.98091415 0.98096117]
 [0.98706197 0.98707342 0.98735055 ... 0.98098534 0.98107359 0.98110125]
 ...
 [0.83217512 0.83226602 0.83230713 ... 0.83136913 0.8314039  0.83140397]
 [0.83243294 0.83229488 0.83222621 ... 0.83149618 0.83150425 0.83150659]
 [0.83216475 0.83210516 0.83208985 ... 0.83155967 0.83152315 0.83135789]]
Speed_features:
 [[0.09191255 0.02362239 0.01417343 ... 0.         0.         0.0004295 ]
 [0.10512821 0.06923077 0.05726496 ... 0.         0.         0.0008547 ]
 [0.13852048 0.02947244 0.01031536 ... 0.         0.         0.00147362]
 ...
 [0.23173848 0.10573619 0.02819632 ... 0.00088113 0.         0.00088113]
 [0.35585141 0.03944773 0.01150559 ... 0

In [0]:
trawl_sample.shape

(22250, 1000)

In [0]:
speed_feature_t.shape

(22250, 100)

## 笼壶特征采样（1000维）

In [0]:
xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/cage', 'xlsx') # cage
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/fishing_rod', 'xlsx') # fishing_rod
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS//mixed', 'xlsx') # mixed
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS//seine', 'xlsx') # seine
NAME = xlls[:]
file_path = './GoogleDrive/My Drive/Colab Notebooks/VMS'
num_bin = 100
max_speed = 25

In [0]:
len(NAME)

127

In [0]:
start_time = time.time()
cage_sample, speed_feature_c = feature_extr(NAME, feature_dim=1000, perc=1.)
end_time = time.time()

print('The op takes {0:.4f}s.'.format(end_time - start_time))
print('Cage samples:\n', cage_sample)
print('Speed_features:\n', speed_feature_c)

Start to extract the features, 100.00% features are used...
##################################################  100.00% completed.
Feature extraction completed.
The op takes 337.9824s.
Cage samples:
 [[0.87901739 0.87906452 0.87899938 ... 0.85661694 0.85646591 0.85658804]
 [0.87901958 0.87903211 0.87888803 ... 0.85664031 0.85644786 0.85665907]
 [0.87900482 0.87898996 0.87887501 ... 0.85659306 0.85647517 0.85668229]
 ...
 [0.98400471 0.98395612 0.98390776 ... 0.98742219 0.98745123 0.98741706]
 [0.98393703 0.9840228  0.98401123 ... 0.98742874 0.98731889 0.98723598]
 [0.98400812 0.98400864 0.98409107 ... 0.9872605  0.98741706 0.98740559]]
Speed_features:
 [[0.13913043 0.06956522 0.05217391 ... 0.         0.         0.00124224]
 [0.19367589 0.08102767 0.06324111 ... 0.         0.00197628 0.00197628]
 [0.09178744 0.08695652 0.02898551 ... 0.         0.         0.00161031]
 ...
 [0.18032458 0.08174714 0.05049088 ... 0.         0.         0.00120216]
 [0.2300582  0.10065046 0.06573091 ... 0. 

In [0]:
cage_sample.shape

(6350, 1000)

In [0]:
speed_feature_c.shape

(6350, 100)

## 钓具特征采样（1000维）

In [0]:
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/cage', 'xlsx') # cage
xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/fishing_rod', 'xlsx') # fishing_rod
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS//mixed', 'xlsx') # mixed
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS//seine', 'xlsx') # seine
NAME = xlls[:]
file_path = './GoogleDrive/My Drive/Colab Notebooks/VMS'
num_bin = 100
max_speed = 25

In [0]:
len(NAME)

6

In [0]:
start_time = time.time()
fr_sample, speed_feature_f = feature_extr(NAME, feature_dim=1000, perc=1.)
end_time = time.time()

print('The op takes {0:.4f}s.'.format(end_time - start_time))
print('Fishing rod samples:\n', fr_sample)
print('Speed_features:\n', speed_feature_f)

Start to extract the features, 100.00% features are used...
##################################################  100.00% completed.
Feature extraction completed.
The op takes 27.3762s.
Fishing rod samples:
 [[0.5369906  0.53899597 0.50622573 ... 0.10596972 0.10875864 0.1473042 ]
 [0.537093   0.53710947 0.5371269  ... 0.19473275 0.20460268 0.21850948]
 [0.50617574 0.50619483 0.47032945 ... 0.09211271 0.13001973 0.16695549]
 ...
 [0.6602016  0.66776015 0.67700783 ... 0.73127702 0.73116448 0.7308604 ]
 [0.6842346  0.68012022 0.68041128 ... 0.73132671 0.73088786 0.73085979]
 [0.67742597 0.68255397 0.679402   ... 0.73294913 0.73448606 0.73104726]]
Speed_features:
 [[0.86262857 0.09471429 0.06702857 ... 0.         0.         0.00145714]
 [0.77436735 0.20816327 0.09325714 ... 0.         0.00499592 0.00166531]
 [0.867      0.07431429 0.05537143 ... 0.         0.         0.00145714]
 ...
 [0.458592   0.145248   0.177888   ... 0.         0.004896   0.003264  ]
 [0.45296327 0.14488163 0.11157551 .

In [0]:
fr_sample.shape

(300, 1000)

In [0]:
speed_feature_f.shape

(300, 100)

## 杂渔具特征采样（1000维）

In [0]:
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/cage', 'xlsx') # cage
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/fishing_rod', 'xlsx') # fishing_rod
xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS//mixed', 'xlsx') # mixed
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS//seine', 'xlsx') # seine
NAME = xlls[:]
file_path = './GoogleDrive/My Drive/Colab Notebooks/VMS'
num_bin = 100
max_speed = 25

In [0]:
len(NAME)

61

In [0]:
start_time = time.time()
mixed_sample, speed_feature_m = feature_extr(NAME, feature_dim=1000, perc=1.)
end_time = time.time()

print('The op takes {0:.4f}s.'.format(end_time - start_time))
print('Mixed samples:\n', mixed_sample)
print('Speed_features:\n', speed_feature_m)

Start to extract the features, 100.00% features are used...
##################################################  100.00% completed.
Feature extraction completed.
The op takes 209.8324s.
Mixed samples:
 [[0.01893458 0.01858993 0.01899375 ... 0.01602981 0.01603903 0.01614386]
 [0.01875615 0.01856601 0.0190012  ... 0.01614152 0.0161438  0.0161445 ]
 [0.01869587 0.01860901 0.01888443 ... 0.01583591 0.01585504 0.01586357]
 ...
 [0.00344679 0.00340063 0.00324312 ... 0.01006116 0.01014195 0.01006874]
 [0.00373959 0.00263367 0.00263327 ... 0.01010066 0.01012681 0.01004838]
 [0.00359005 0.00357119 0.00332068 ... 0.01011293 0.01002378 0.01008221]]
Speed_features:
 [[0.45804989 0.20357773 0.17225808 ... 0.         0.         0.00195748]
 [0.48642466 0.24321233 0.16619509 ... 0.         0.         0.00202677]
 [0.48057693 0.17458459 0.15768931 ... 0.         0.         0.00187725]
 ...
 [0.53219465 0.14664187 0.18783341 ... 0.         0.         0.00164766]
 [0.36552381 0.09160998 0.09252608 ... 0.

In [0]:
mixed_sample.shape

(3050, 1000)

In [0]:
speed_feature_m.shape

(3050, 100)

## 围网特征采样（1000维）

In [0]:
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/cage', 'xlsx') # cage
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS/fishing_rod', 'xlsx') # fishing_rod
# xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS//mixed', 'xlsx') # mixed
xlls = get_xlsxlist('./GoogleDrive/My Drive/Colab Notebooks/VMS//seine', 'xlsx') # seine
NAME = xlls[:]
file_path = './GoogleDrive/My Drive/Colab Notebooks/VMS'
num_bin = 100
max_speed = 25

In [0]:
len(NAME)

62

In [0]:
start_time = time.time()
seine_sample, speed_feature_s = feature_extr(NAME, feature_dim=1000, perc=1.)
end_time = time.time()

print('The op takes {0:.4f}s.'.format(end_time - start_time))
print('Seine samples:\n', seine_sample)
print('Speed_features:\n', speed_feature_s)

Start to extract the features, 100.00% features are used...
##################################################  100.00% completed.
Feature extraction completed.
The op takes 199.4093s.
Seine samples:
 [[0.19726793 0.19726808 0.1972681  ... 0.19721563 0.19748262 0.19730804]
 [0.19726823 0.19726796 0.19726818 ... 0.19719502 0.19727604 0.19713674]
 [0.19726775 0.19726838 0.19726834 ... 0.19747343 0.19748517 0.19744973]
 ...
 [0.95176561 0.95180364 0.95180509 ... 0.9654513  0.96544111 0.96544007]
 [0.95176565 0.95177643 0.95179599 ... 0.96544416 0.96544453 0.96547532]
 [0.9517657  0.95180261 0.95173935 ... 0.96544905 0.96544474 0.96547081]]
Speed_features:
 [[0.31401099 0.08571429 0.03461538 ... 0.00082418 0.         0.00082418]
 [0.29081633 0.0773362  0.05558539 ... 0.         0.         0.00080559]
 [0.32063492 0.07857143 0.0968254  ... 0.00079365 0.         0.00079365]
 ...
 [0.29054054 0.12644788 0.09555985 ... 0.         0.         0.00096525]
 [0.30326877 0.10078692 0.09170702 ... 0.

In [0]:
seine_sample.shape

(3100, 1000)

In [0]:
speed_feature_s.shape

(3100, 100)

## PCA降维

In [0]:
gill_net_sample
gill_net_sample.shape

(8650, 1000)

In [0]:
speed_feature_g
speed_feature_g.shape

(8650, 100)

In [0]:
net_sample
net_sample.shape

(3850, 1000)

In [0]:
speed_feature_n
speed_feature_n.shape

(3850, 100)

In [0]:
trawl_sample
trawl_sample.shape

(22250, 1000)

In [0]:
speed_feature_t
speed_feature_t.shape

(22250, 100)

In [0]:
cage_sample
cage_sample.shape

(6350, 1000)

In [0]:
speed_feature_c
speed_feature_c.shape

(6350, 100)

In [0]:
fr_sample
fr_sample.shape

(300, 1000)

In [0]:
speed_feature_f
speed_feature_f.shape

(300, 100)

In [0]:
mixed_sample
mixed_sample.shape

(3050, 1000)

In [0]:
speed_feature_m
speed_feature_m.shape

(3050, 100)

In [0]:
seine_sample
seine_sample.shape

(3100, 1000)

In [0]:
speed_feature_s
speed_feature_s.shape

(3100, 100)

In [0]:
# data_path = './GoogleDrive/My Drive/Colab Notebooks/VMS/partial_features'
# weights_name = os.path.join(data_path, 'pca_weights.mat')
# pca_weights = scio.loadmat(weights_name)['pca_weights']
# pca_weights.shape

(36, 1000)

In [0]:
#@markdown - **特征的PCA降维（显示第一项）**
feature_all = np.vstack([gill_net_sample, net_sample, trawl_sample, 
                        cage_sample, fr_sample, mixed_sample, seine_sample])
# feature_all_cov = np.cov(feature_all.T)
# w, v = LA.eig(feature_all_cov)

# feature_all_pca = np.dot(v[:, :36].T, feature_all.T)
# # feature_all_pca = np.dot(pca_weights, feature_all.T)
# feature_all_pca.T[:, 0].real
feature_all.shape

(47550, 1000)

In [0]:
feature_all.max()

1.0

In [0]:
feature_all.min()

0.0

In [0]:
# feature_v = (feature_all - feature_all.min()) / (feature_all.max() - feature_all.min())
feature_v = feature_all

In [0]:
# feature_v.max()

1.0

In [0]:
# feature_v.min()

0.0

In [0]:
#@markdown - **特征归一化**
feature_v = feature_all_pca.T[:, 1:].real
feature_v = feature_v / feature_v.max()
# feature_v = (feature_v - feature_v.min()) / (feature_v.max() - feature_v.min())
feature_v.shape

(95100, 35)

## 特征合并及存储

In [0]:
feature_s = np.vstack([speed_feature_g, speed_feature_n, speed_feature_t, 
                      speed_feature_c, speed_feature_f, speed_feature_m, speed_feature_s])
feature_s.shape

(47550, 100)

In [0]:
feature_s.max()

1.0

In [0]:
feature_s.min()

0.0

In [0]:
data_path = './GoogleDrive/My Drive/Colab Notebooks/VMS/partial_features'
data_name = os.path.join(data_path, 'feature_rd50_full_v6.mat')
data_name

'./GoogleDrive/My Drive/Colab Notebooks/VMS/partial_features/feature_rd50_full_v6.mat'

In [0]:
scio.savemat(data_name, {'feature_v': feature_v, 'feature_s': feature_s})
# scio.savemat(data_name, {'feature_v': feature_v, 'feature_s': feature_s, 'label': label})

In [0]:
feature = np.c_[feature_v, feature_s]
feature.shape

(9510, 1100)

In [0]:
feature

array([[-5.79633246e-02,  1.18536046e-01,  3.46046754e-02, ...,
         1.37398142e-03,  0.00000000e+00,  1.37398142e-03],
       [-5.95206511e-02,  1.15811546e-01,  3.56636666e-02, ...,
         0.00000000e+00,  0.00000000e+00,  1.07310957e-03],
       [-5.87282929e-02,  1.10644291e-01,  3.97041524e-02, ...,
         0.00000000e+00,  0.00000000e+00,  9.67210603e-04],
       ...,
       [ 6.55300672e-01, -1.84903056e-01, -3.50209221e-02, ...,
         0.00000000e+00,  0.00000000e+00,  4.95867769e-04],
       [ 6.98863598e-01, -1.91597820e-01,  2.11584027e-02, ...,
         1.11681930e-03,  1.11681930e-03,  1.11681930e-03],
       [ 6.75386609e-01, -1.96252806e-01,  6.26829182e-02, ...,
         0.00000000e+00,  0.00000000e+00,  6.42315762e-04]])

In [0]:
data_path = './GoogleDrive/My Drive/Colab Notebooks/VMS/partial_features'
data_name = os.path.join(data_path, 'feature_rd100.mat')
data_name

'./GoogleDrive/My Drive/Colab Notebooks/VMS/partial_features/feature_rd100.mat'

In [0]:
# gill_net_sample: 173, net_sample: 77, trawl: 445, cage: 127, fishing rod: 6, mixed: 61, seine: 62
label = np.zeros(9510)
label[1730:2500], label[2500:6950], label[6950:8220], label[8220:8280], label[8280:8890], label[8890:]= range(1, 7)
label.shape

(9510,)

In [0]:
scio.savemat(data_name, {'feature': feature, 'label': label})

In [0]:
feature_rd100_full = os.path.join(data_path, 'feature_rd100_full.mat')
scio.savemat(feature_rd100_full, {'feature': feature_all})

In [0]:
pca_weights

'./GoogleDrive/My Drive/Colab Notebooks/VMS/partial_features/pca_weights_rd100.mat'

In [0]:
pca_weights = os.path.join(data_path, 'pca_weights_rd100.mat')

In [0]:
scio.savemat(pca_weights, {'pca_weights': v[:, :36].T})