In [0]:
# Run this cell to mount your Google Drive.
from google.colab import drive
drive.mount('GoogleDrive')

In [0]:
# !fusermount -u GoogleDrive

In [0]:
!cat /proc/cpuinfo | grep model\ name

model name	: Intel(R) Xeon(R) CPU @ 2.30GHz
model name	: Intel(R) Xeon(R) CPU @ 2.30GHz


## 函数定义

In [0]:
import matplotlib.pyplot as plt
import numpy.linalg as LA
import scipy.io as scio
import numpy as np
import xlrd,random
import os,time,sys
import pywt
import warnings
warnings.filterwarnings('ignore')

In [0]:
def norm(x):
    assert type(x) == np.ndarray, "x must be a numpy.ndarray array!"
    return (x - x.min()) / (x.max() - x.min())

In [0]:
def print_progress(num, max_num):
    
    progress = num / max_num
    # Define the length of bar
    barLength = 50

    # Ceck the input!
    assert type(progress) is float, "id is not a float: %r" % id
    assert 0 <= progress <= 1, "variable should be between zero and one!"

    # Empty status while processing.
    status = ""

    # This part is to make a new line when the process is finished.
    if progress >= 1:
        progress = 1
        status = "\r\n"

    # Where we are in the progress!
    indicator = int(round(barLength * progress))

    # Print the appropriate progress phase!
    list = ["#" * indicator , ">" * (barLength - indicator), progress * 100]
    text = "\r{0[0]} {0[1]} {0[2]:.2f}% completed.{1}".format(list, status)
    sys.stdout.write(text)
    sys.stdout.flush()

In [0]:
def get_xlsxlist(xlsx_path, mode):
    return [os.path.join(xlsx_path, f) for f in os.listdir(xlsx_path) if f.endswith(mode)]

In [0]:
# 一维数据固定采样数
def sample_num(x, num=3000):
    dim = x.flatten().shape[0]
    interval = int(dim / num)
    return np.array([x[interval * i] for i in range(num)])

In [0]:
# 一维数据随机采样（经纬度信息及速占比信息）
def random_sample(x, z, num_dim=1000, num_sample=10):
    dim = x.flatten().shape[0]
    init_sp = np.zeros(num_dim)
    init_sd = np.zeros(num_dim)
    for i in range(num_sample):
        ls = random.sample(range(dim), num_dim)
        ls.sort()
        init_sp = np.vstack([init_sp, x[ls]])
        init_sd = np.vstack([init_sd, z[ls]])
    return init_sp[1:], init_sd[1:]
    

In [0]:
def HIST(sample_sd, num_bin, max_speed):
    sample_sd[sample_sd >= max_speed] = max_speed
    init_n = np.zeros(num_bin)
    for z in sample_sd:
        n, bins = np.histogram(z, num_bin, normed=True)
        init_n = np.vstack([init_n, n])
    return init_n[1:]

In [0]:
#@markdown **特征提取函数**
def feature_extr(NAME_list, feature_dim=1000, num_bin=100, max_speed=25, perc=1.):
    print('Start to extract the features, {:.2f}% features are used...'.format(perc*100))
    init_m = np.zeros(feature_dim) # 轨迹图特征初始化
    init_n = np.zeros(num_bin) # 速占比初始化
    # inv = 10 #@param {type:"integer"}
    for num, name in enumerate(NAME):
        max_num = len(NAME)
        print_progress(num + 1, max_num)
        excel_trawl = xlrd.open_workbook(name)
        sheet = excel_trawl.sheet_by_index(0)
        num_f = int((sheet.nrows - 1) * perc)
        x = sheet.col_values(1)[1:num_f]
        y = sheet.col_values(2)[1:num_f]
        data_position = np.vstack([np.array(x).reshape(1, -1), np.array(y).reshape(1, -1)])
        data_cov = np.cov(data_position)
        w, v = LA.eig(data_cov)
        data_pca = np.dot(v[:, 0].T, data_position).flatten()
    #     data_pca = np.dot(v[:, 1].T, data_position).flatten()
#         sample_data = sample_num(data_pca, feature_dim)
        z = np.array(sheet.col_values(3)[1:num_f])
        sample_sp, sample_sd = random_sample(data_pca, z, feature_dim, 50)
        init_m = np.vstack([init_m, sample_sp]) # pca特征采样
        n = HIST(sample_sd, num_bin, max_speed)
        init_n = np.vstack([init_n, n])
        
#         fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8, 6))
#         n, bins, patches = plt.hist(z_lim, num_bin, normed=True, rwidth=0.8)
    
    #@markdown - **提取的刺网样本1000维特征**
    gill_net_sample = init_m[1:]
    gill_net_sample = (gill_net_sample - gill_net_sample.min()) / (gill_net_sample.max() - gill_net_sample.min())
    
    #@markdown - **速占比归一化特征提取**
    speed_feature = init_n[1:]
    speed_feature_g = speed_feature / speed_feature.max()
    
    print('Feature extraction completed.')
    return (gill_net_sample, speed_feature_g)
    

## 小波特征提取

In [0]:
def wavelet_f(features, wavelet_name='db4', level=4, mode='symmetric'):
    wavelet = pywt.Wavelet(wavelet_name)
    coeffs_t = pywt.wavedec(features[0], wavelet.name, mode, level)
    w_f = np.zeros((features.shape[0], len(coeffs_t[0])))
    for i, f in enumerate(features):
        coeffs = pywt.wavedec(f, wavelet.name, mode, level)
        w_f[i] = coeffs[0]
#         w_f[i] = np.hstack([coeffs[0], coeffs[1]])
    return w_f

In [31]:
#@markdown - **载入特征数据**
data_path = './My_file_path'
data_name = os.path.join(data_path, 'My_data_name')
feature_data = scio.loadmat(data_name)
feature_v = feature_data['feature_v']
feature_s = feature_data['feature_s']
feature_v.shape

(47550, 1000)

In [0]:
# pywt.families()
pywt.wavelist('coif')

In [46]:
s_t = time.time()
feature_dwt = wavelet_f(feature_v, wavelet_name='db4', level=5)
d_t = time.time()
print(d_t - s_t)
feature_dwt.shape

4.927726745605469


(47550, 38)

In [0]:
# gill_net_sample: 173, net_sample: 77, trawl: 445, cage: 127, fishing rod: 6, mixed: 61, seine: 62

# label[17300:25000], label[25000:69500], label[69500:82200], label[82200:82800], label[82800:88900], label[88900:]= range(1, 7)
n = feature_dwt.shape[1]
x = np.linspace(0, n, n)
Y = np.vstack([feature_dwt[1000], feature_dwt[10000], feature_dwt[25000], 
              feature_dwt[35000], feature_dwt[41200], feature_dwt[42000], feature_dwt[45000]])
NAME = ['gill_net', 'net', 'trawl', 'cage', 'fishing_rod', 'mixed', 'seine']
COLOR = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2']
plt.figure(1, (12, 9))
for y, name, color in zip(Y, NAME, COLOR):
    plt.plot(x, y, c=color, label=name)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    plt.legend(loc='center right', fontsize=14)
plt.show()

In [9]:
data_path = './My_file_path'
data_name = os.path.join(data_path, 'My_file_name')

data_name

In [0]:
scio.savemat(data_name, {'feature_v': feature_dwt, 'feature_s': feature_s})


## 刺网特征采样（1000维）

In [0]:
xlls = get_xlsxlist('./My_file_path/gill_net', 'xlsx') # gill net
NAME = xlls[:]
file_path = './My_file_path'
num_bin = 100
max_speed = 25

In [0]:
len(NAME)

173

In [0]:
start_time = time.time()
gill_net_sample, speed_feature_g = feature_extr(NAME, feature_dim=1000, perc=1.)
end_time = time.time()

print('The op takes {0:.4f}s.'.format(end_time - start_time))
print('Gill net samples:\n', gill_net_sample)
print('Speed_features:\n', speed_feature_g)


Start to extract the features, 100.00% features are used...
##################################################  100.00% completed.
Feature extraction completed.
The op takes 631.8006s.
Gill net samples:
 [[0.94690873 0.94690869 0.94690888 ... 0.94834862 0.94794636 0.94794539]
 [0.94690927 0.94690876 0.94690869 ... 0.94797395 0.94796314 0.94794431]
 [0.94690888 0.94690868 0.94690862 ... 0.94816735 0.94818371 0.94817963]
 ...
 [0.08577738 0.08577995 0.08572185 ... 0.08086261 0.0830942  0.08320004]
 [0.08556817 0.08559063 0.08568213 ... 0.08100469 0.08247423 0.08268454]
 [0.08577406 0.08577378 0.08577283 ... 0.08081814 0.08088292 0.08191494]]
Speed_features:
 [[0.57923597 0.11844726 0.07655737 ... 0.00144448 0.00433344 0.00144448]
 [0.55300148 0.11810333 0.06252529 ... 0.         0.         0.00138945]
 [0.39185189 0.12064175 0.02899143 ... 0.         0.         0.00093521]
 ...
 [0.29907932 0.08510387 0.06686733 ... 0.         0.         0.00121577]
 [0.3226941  0.09396457 0.07047342 ...

In [0]:
gill_net_sample.shape

(8650, 1000)

In [0]:
speed_feature_g.shape

(8650, 100)

In [0]:
#@markdown - **特征的PCA降维（显示第一项）**
feature_all = np.vstack([gill_net_sample, net_sample, trawl_sample, 
                        cage_sample, fr_sample, mixed_sample, seine_sample])
# feature_all_cov = np.cov(feature_all.T)
# w, v = LA.eig(feature_all_cov)

# feature_all_pca = np.dot(v[:, :36].T, feature_all.T)
# # feature_all_pca = np.dot(pca_weights, feature_all.T)
# feature_all_pca.T[:, 0].real
feature_all.shape

(47550, 1000)

In [0]:
# feature_v = (feature_all - feature_all.min()) / (feature_all.max() - feature_all.min())
feature_v = feature_all

In [0]:
#@markdown - **特征归一化**
feature_v = feature_all_pca.T[:, 1:].real
feature_v = feature_v / feature_v.max()
# feature_v = (feature_v - feature_v.min()) / (feature_v.max() - feature_v.min())
feature_v.shape

(95100, 35)

## 特征合并及存储

In [0]:
feature_s = np.vstack([speed_feature_g, speed_feature_n, speed_feature_t, 
                      speed_feature_c, speed_feature_f, speed_feature_m, speed_feature_s])
feature_s.shape

(47550, 100)

In [0]:
feature_s.max()

1.0

In [0]:
feature_s.min()

0.0

In [8]:
data_path = './My_file_path'
data_name = os.path.join(data_path, 'feature_name')
data_name

In [0]:
scio.savemat(data_name, {'feature_v': feature_v, 'feature_s': feature_s})
# scio.savemat(data_name, {'feature_v': feature_v, 'feature_s': feature_s, 'label': label})

In [0]:
feature = np.c_[feature_v, feature_s]
feature.shape

(9510, 1100)

In [0]:
feature

array([[-5.79633246e-02,  1.18536046e-01,  3.46046754e-02, ...,
         1.37398142e-03,  0.00000000e+00,  1.37398142e-03],
       [-5.95206511e-02,  1.15811546e-01,  3.56636666e-02, ...,
         0.00000000e+00,  0.00000000e+00,  1.07310957e-03],
       [-5.87282929e-02,  1.10644291e-01,  3.97041524e-02, ...,
         0.00000000e+00,  0.00000000e+00,  9.67210603e-04],
       ...,
       [ 6.55300672e-01, -1.84903056e-01, -3.50209221e-02, ...,
         0.00000000e+00,  0.00000000e+00,  4.95867769e-04],
       [ 6.98863598e-01, -1.91597820e-01,  2.11584027e-02, ...,
         1.11681930e-03,  1.11681930e-03,  1.11681930e-03],
       [ 6.75386609e-01, -1.96252806e-01,  6.26829182e-02, ...,
         0.00000000e+00,  0.00000000e+00,  6.42315762e-04]])

In [7]:
data_path = './My_file_path'
data_name = os.path.join(data_path, 'feature_name')
data_name

In [0]:
# gill_net_sample: 173, net_sample: 77, trawl: 445, cage: 127, fishing rod: 6, mixed: 61, seine: 62
label = np.zeros(9510)
label[1730:2500], label[2500:6950], label[6950:8220], label[8220:8280], label[8280:8890], label[8890:]= range(1, 7)
label.shape

(9510,)

In [0]:
scio.savemat(data_name, {'feature': feature, 'label': label})

In [6]:
feature_rd100_full = os.path.join(data_path, 'feature_name')
scio.savemat(feature_rd100_full, {'feature': feature_all})

In [0]:
pca_weights = os.path.join(data_path, 'pca_weights')

In [0]:
scio.savemat(pca_weights, {'pca_weights': v[:, :36].T})