In [12]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
paragraph = """Ship classification in synthetic aperture radar (SAR)
images is a fundamental and significant step in ocean surveillance. Recently, with the rise of deep learning (DL), modern
abstract features from convolutional neural networks (CNNs)
have hugely improved SAR ship classification accuracy. However,
most existing CNN-based SAR ship classifiers overly rely on
abstract features, but uncritically abandon traditional mature
hand-crafted features, which may incur some challenges for
further improving accuracy. Hence, this article proposes a novel
DL network with histogram of oriented gradient (HOG) feature
fusion (HOG-ShipCLSNet) for preferable SAR ship classification.
In HOG-ShipCLSNet, four mechanisms are proposed to ensure
superior classification accuracy, that is, 1) a multiscale classification mechanism (MS-CLS-Mechanism); 2) a global self-attention
mechanism (GS-ATT-Mechanism); 3) a fully connected balance
mechanism (FC-BAL-Mechanism); and 4) an HOG feature
fusion mechanism (HOG-FF-Mechanism). We perform sufficient
ablation studies to confirm the effectiveness of these four mechanisms. Finally, our experimental results on two open SAR
ship datasets (OpenSARShip and FUSAR-Ship) jointly reveal
that HOG-ShipCLSNet dramatically outperforms both modern
Manuscript received January 27, 2021; revised March 20, 2021 and April 20,
2021; accepted May 10, 2021. Date of publication June 2, 2021; date of
current version January 12, 2022. This work was supported in part by
the National Natural Science Foundation of China under Grant 61571099,
Grant 61501098, and Grant 61671113 and in part by the National Key
Research and Development Program of China under Grant 2017YFB0502700.
(Corresponding author: Xiaoling Zhang.)
Tianwen Zhang, Xiaoling Zhang, Xiao Ke, Xiaowo Xu, Xu Zhan,
Chen Wang, Hao Su, Jun Shi, and Shunjun Wei are with the School of
Information and Communication Engineering, University of Electronic
Science and Technology of China, Chengdu 611731, China (e-mail:
twzhang@std.uestc.edu.cn; xlzhang@uestc.edu.cn; xke@std.uestc.edu.cn;
xuxiaowo@std.uestc.edu.cn; zhanxu@std.uestc.edu.cn; chenwang@std.
uestc.edu.cn; suhao@std.uestc.edu.cn; shijun@uestc.edu.cn; weishunjun@
uestc.edu.cn).
Chang Liu is with the College of Information Science and Technology, Dalian Maritime University, Dalian 116026, China (e-mail:
liuchang@dlmu.edu.cn).
Israr Ahmad is with the State Key Laboratory of Information Engineering in
Surveying, Mapping, and Remote Sensing, Wuhan University, Wuhan 430074,
China (e-mail: israrahmad@whu.edu.cn).
Yue Zhou is with the School of Electronic Information and Electrical
Engineering, Shanghai Jiao Tong University, Shanghai 200240, China (e-mail:
sjtu_zy@sjtu.edu.cn).
Dece Pan is with the Aerospace Information Research Institute,
Chinese Academy of Sciences, Beijing 100194, China (e-mail:
pandece19@mails.ucas.ac.cn).
Jianwei Li is with the 3rd Graduate Student Team, Naval Aviation
University, Yantai 264000, China (e-mail: lgm_jw@163.com).
Digital Object Identifier 10.1109/TGRS.2021.3082759
CNN-based methods and traditional hand-crafted feature
methods."""

In [3]:
sentence = nltk.sent_tokenize(paragraph)
lemmatizer = WordNetLemmatizer()

In [4]:
for i in range(len(sentence)):
    words = nltk.word_tokenize(sentence[i])
    words = [lemmatizer.lemmatize(word) for word in words if word not in set(stopwords.words('english'))]
    sentence[i] = ' '.join(words)

In [5]:
sentence

['Ship classification synthetic aperture radar ( SAR ) image fundamental significant step ocean surveillance .',
 'Recently , rise deep learning ( DL ) , modern abstract feature convolutional neural network ( CNNs ) hugely improved SAR ship classification accuracy .',
 'However , existing CNN-based SAR ship classifier overly rely abstract feature , uncritically abandon traditional mature hand-crafted feature , may incur challenge improving accuracy .',
 'Hence , article proposes novel DL network histogram oriented gradient ( HOG ) feature fusion ( HOG-ShipCLSNet ) preferable SAR ship classification .',
 'In HOG-ShipCLSNet , four mechanism proposed ensure superior classification accuracy , , 1 ) multiscale classification mechanism ( MS-CLS-Mechanism ) ; 2 ) global self-attention mechanism ( GS-ATT-Mechanism ) ; 3 ) fully connected balance mechanism ( FC-BAL-Mechanism ) ; 4 ) HOG feature fusion mechanism ( HOG-FF-Mechanism ) .',
 'We perform sufficient ablation study confirm effectivenes

# Bow

In [6]:
import re
ps = PorterStemmer()
wordnet = WordNetLemmatizer()
sentence = nltk.sent_tokenize(paragraph)
corpus = []

for i in range(len(sentence)):
    review = re.sub('[^a-zA-Z]', ' ', sentence[i])
    review = review.lower()
    review = review.split()
    review = [wordnet.lemmatize(word) for word in review if not word in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)

In [7]:
sentence[1]

'Recently, with the rise of deep learning (DL), modern\nabstract features from convolutional neural networks (CNNs)\nhave hugely improved SAR ship classification accuracy.'

In [8]:
corpus[1]

'recently rise deep learning dl modern abstract feature convolutional neural network cnns hugely improved sar ship classification accuracy'

In [9]:
cv = CountVectorizer()
x = cv.fit_transform(corpus).toarray()

## TF-IDF

In [17]:
tf_idf = TfidfVectorizer()
h = tf_idf.fit_transform(corpus).toarray()


In [18]:
h[1]

array([0.        , 0.        , 0.22974776, 0.        , 0.        ,
       0.        , 0.20652277, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.18850806, 0.        ,
       0.        , 0.        , 0.26248155, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.26248155, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.26248155, 0.        , 0.        , 0.22974776,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.17378897, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     