In [59]:
import pandas as pd 
from umap import UMAP
from hdbscan import HDBSCAN
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import CountVectorizer
 
from bertopic import BERTopic
from bertopic.representation import KeyBERTInspired
from bertopic.vectorizers import ClassTfidfTransformer

from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
import nltk 
from nltk.corpus import wordnet
from nltk.tokenize import TreebankWordTokenizer
from nltk.corpus import stopwords


nltk.download('wordnet')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')

[nltk_data] Downloading package wordnet to /home/ehmindev/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /home/ehmindev/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/ehmindev/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/ehmindev/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [60]:
df = pd.read_excel("savedrecs_vision.xls")

print(df)

    Publication Type                                            Authors   
0                  C  Liu, Z; Lin, YT; Cao, Y; Hu, H; Wei, YX; Zhang...  \
1                  J  Gao, SH; Cheng, MM; Zhao, K; Zhang, XY; Yang, ...   
2                  J  Wang, JD; Sun, K; Cheng, TH; Jiang, BR; Deng, ...   
3                  C  Touvron, H; Cord, M; Douze, M; Massa, F; Sabla...   
4                  C  Radford, A; Kim, JW; Hallacy, C; Ramesh, A; Go...   
..               ...                                                ...   
995                J                                    Wang, J; Lee, S   
996                J  Zhang, LM; Liang, RH; Yin, JW; Zhang, DX; Shao, L   
997                J                                          Xiang, ST   
998                J  Yang, R; Wang, G; Pan, ZR; Lu, HL; Zhang, H; J...   
999                J  Bhatt, PM; Malhan, RK; Rajendran, P; Shah, BC;...   

     Book Authors        Book Editors Book Group Authors   
0             NaN                 NaN  

In [61]:
df = df[["Article Title","Abstract"]]

In [62]:
docs = df["Abstract"].astype(str).tolist()
docs

['This paper presents a new vision Transformer, called Swin Transformer, that capably serves as a general-purpose backbone for computer vision. Challenges in adapting Transformer from language to vision arise from differences between the two domains, such as large variations in the scale of visual entities and the high resolution of pixels in images compared to words in text. To address these differences, we propose a hierarchical Transformer whose representation is computed with Shifted windows. The shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping local windows while also allowing for cross-window connection. This hierarchical architecture has the flexibility to model at various scales and has linear computational complexity with respect to image size. These qualities of Swin Transformer make it compatible with a broad range of vision tasks, including image classification (87.3 top-1 accuracy on ImageNet-1K) and dense predict

In [63]:
for i in range(len(docs)):
    docs[i] = docs[i].lower()

In [64]:
docs

['this paper presents a new vision transformer, called swin transformer, that capably serves as a general-purpose backbone for computer vision. challenges in adapting transformer from language to vision arise from differences between the two domains, such as large variations in the scale of visual entities and the high resolution of pixels in images compared to words in text. to address these differences, we propose a hierarchical transformer whose representation is computed with shifted windows. the shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping local windows while also allowing for cross-window connection. this hierarchical architecture has the flexibility to model at various scales and has linear computational complexity with respect to image size. these qualities of swin transformer make it compatible with a broad range of vision tasks, including image classification (87.3 top-1 accuracy on imagenet-1k) and dense predict

# Tockenization

In [65]:
tokenizer = TreebankWordTokenizer()

for i in range(len(docs)):
    docs[i] = " ".join(tokenizer.tokenize(docs[i]))

In [66]:
docs

['this paper presents a new vision transformer , called swin transformer , that capably serves as a general-purpose backbone for computer vision. challenges in adapting transformer from language to vision arise from differences between the two domains , such as large variations in the scale of visual entities and the high resolution of pixels in images compared to words in text. to address these differences , we propose a hierarchical transformer whose representation is computed with shifted windows. the shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping local windows while also allowing for cross-window connection. this hierarchical architecture has the flexibility to model at various scales and has linear computational complexity with respect to image size. these qualities of swin transformer make it compatible with a broad range of vision tasks , including image classification ( 87.3 top-1 accuracy on imagenet-1k ) and dense 

# POS Tagging & Lammatization Effect

In [67]:
def get_wordnet_pos(treebank_tag):
    if treebank_tag.startswith('J'):
        return wordnet.ADJ
    elif treebank_tag.startswith('V'):
        return wordnet.VERB
    elif treebank_tag.startswith('N'):
        return wordnet.NOUN
    elif treebank_tag.startswith('R'):
        return wordnet.ADV
    else:
        return None

In [68]:
tokenizer = TreebankWordTokenizer()

a = tokenizer.tokenize(docs[0])
" ".join(a)

'this paper presents a new vision transformer , called swin transformer , that capably serves as a general-purpose backbone for computer vision. challenges in adapting transformer from language to vision arise from differences between the two domains , such as large variations in the scale of visual entities and the high resolution of pixels in images compared to words in text. to address these differences , we propose a hierarchical transformer whose representation is computed with shifted windows. the shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping local windows while also allowing for cross-window connection. this hierarchical architecture has the flexibility to model at various scales and has linear computational complexity with respect to image size. these qualities of swin transformer make it compatible with a broad range of vision tasks , including image classification ( 87.3 top-1 accuracy on imagenet-1k ) and dense p

## Test  Effect

In [69]:
lemmatizer = WordNetLemmatizer()
a = docs[0].split(" ")

print(a)
print([lemmatizer.lemmatize(word) for word in a])
tokens = word_tokenize(docs[0])
print([lemmatizer.lemmatize(word[0],pos = get_wordnet_pos(word[1])) if get_wordnet_pos(word[1]) else word[0]  for word in nltk.pos_tag(tokens)])

['this', 'paper', 'presents', 'a', 'new', 'vision', 'transformer', ',', 'called', 'swin', 'transformer', ',', 'that', 'capably', 'serves', 'as', 'a', 'general-purpose', 'backbone', 'for', 'computer', 'vision.', 'challenges', 'in', 'adapting', 'transformer', 'from', 'language', 'to', 'vision', 'arise', 'from', 'differences', 'between', 'the', 'two', 'domains', ',', 'such', 'as', 'large', 'variations', 'in', 'the', 'scale', 'of', 'visual', 'entities', 'and', 'the', 'high', 'resolution', 'of', 'pixels', 'in', 'images', 'compared', 'to', 'words', 'in', 'text.', 'to', 'address', 'these', 'differences', ',', 'we', 'propose', 'a', 'hierarchical', 'transformer', 'whose', 'representation', 'is', 'computed', 'with', 'shifted', 'windows.', 'the', 'shifted', 'windowing', 'scheme', 'brings', 'greater', 'efficiency', 'by', 'limiting', 'self-attention', 'computation', 'to', 'non-overlapping', 'local', 'windows', 'while', 'also', 'allowing', 'for', 'cross-window', 'connection.', 'this', 'hierarchical'

In [70]:
lemmatizer = WordNetLemmatizer()
tokenizer = TreebankWordTokenizer()

for i in range(len(docs)):
    tokens = word_tokenize(docs[i])
    docs[i] = " ".join([lemmatizer.lemmatize(word[0],pos = get_wordnet_pos(word[1])) if get_wordnet_pos(word[1]) else word[0]  for word in nltk.pos_tag(tokens)])

In [71]:
docs

['this paper present a new vision transformer , call swin transformer , that capably serve as a general-purpose backbone for computer vision . challenge in adapt transformer from language to vision arise from difference between the two domain , such as large variation in the scale of visual entity and the high resolution of pixel in image compare to word in text . to address these difference , we propose a hierarchical transformer whose representation be compute with shifted window . the shifted windowing scheme bring great efficiency by limit self-attention computation to non-overlapping local window while also allow for cross-window connection . this hierarchical architecture have the flexibility to model at various scale and have linear computational complexity with respect to image size . these quality of swin transformer make it compatible with a broad range of vision task , include image classification ( 87.3 top-1 accuracy on imagenet-1k ) and dense prediction task such as objec

# Remove Stop words

In [72]:
stop_words = set(stopwords.words('english')) 

# word_tockens = word_tokenize(docs[0])
# print(word_tockens)

for i in range(len(docs)):
    word_tockens = word_tokenize(docs[i])
    result =[]
    for word in word_tockens:
        if word not in stop_words:
            result.append(word)
    docs[i] = " ".join(result)


In [73]:
docs

['paper present new vision transformer , call swin transformer , capably serve general-purpose backbone computer vision . challenge adapt transformer language vision arise difference two domain , large variation scale visual entity high resolution pixel image compare word text . address difference , propose hierarchical transformer whose representation compute shifted window . shifted windowing scheme bring great efficiency limit self-attention computation non-overlapping local window also allow cross-window connection . hierarchical architecture flexibility model various scale linear computational complexity respect image size . quality swin transformer make compatible broad range vision task , include image classification ( 87.3 top-1 accuracy imagenet-1k ) dense prediction task object detection ( 58.7 box ap 51.1 mask ap coco test-dev ) semantic segmentation ( 53.5 miou ade20k val ) . performance surpass previous state-of-the-art large margin +2.7 box ap +2.6 mask ap coco , +3.2 mio

# MODEL

In [74]:
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
umap_model = UMAP(n_neighbors=15, n_components=5, min_dist=0.0, metric='cosine')
hdbscan_model = HDBSCAN(min_cluster_size=15, metric='euclidean', cluster_selection_method='eom', prediction_data=True)
vectorizer_model = CountVectorizer(stop_words="english")
ctfidf_model = ClassTfidfTransformer()

In [75]:
model = BERTopic(
  embedding_model=embedding_model,        
  umap_model=umap_model,               
  hdbscan_model=hdbscan_model,              
  vectorizer_model=vectorizer_model, 
  ctfidf_model=ctfidf_model,
  nr_topics=50,
  n_gram_range=(1,2)       
)


In [76]:
topics, probabilities = model.fit_transform(docs)

In [77]:
topic_df = model.get_topic_info()
topic_df

Unnamed: 0,Topic,Count,Name
0,-1,218,-1_method_detection_image_propose
1,0,66,0_disease_plant_image_model
2,1,79,1_covid_19_medical_image
3,2,44,2_face_facial_expression_recognition
4,3,72,3_image_method_propose_light
5,4,87,4_human_action_recognition_pose
6,5,17,5_tracking_track_tracker_object
7,6,65,6_defect_crack_inspection_detection
8,7,23,7_text_language_video_word
9,8,29,8_image_gan_generative_gans


In [78]:
get_topic = model.get_topic
get_topic(1)

[('covid', 0.046626820614528074),
 ('19', 0.04544537553135328),
 ('medical', 0.03944371825354722),
 ('image', 0.03482750098591339),
 ('model', 0.030340519873300418),
 ('learning', 0.02670070476878616),
 ('use', 0.026518613764364693),
 ('deep', 0.02588149530084337),
 ('data', 0.021644739385360396),
 ('ct', 0.020789002703773988)]

In [79]:
zero=[]
number=[]
for i in range(len(topic_df)):
  zero.append('0')
  number.append(i-1)

topic_word_df=pd.DataFrame({
    'topic_num':number,
    'topic':topic_df['Name'],
    'w1':zero,
    'w2':zero,
    'w3':zero,
    'w4':zero,
    'w5':zero,
    'w6':zero,
    'w7':zero,
    'w8':zero,
    'w9':zero,
    'w10':zero
})

# 각 토픽별 10개 단어 정리
col = topic_word_df.columns
for i in range(len(topic_word_df)):
  for j in range(10):
    topic_word_df[col[j+2]][i]=get_topic(i-1)[j][0]

In [80]:
topic_word_df

Unnamed: 0,topic_num,topic,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10
0,-1,-1_method_detection_image_propose,method,detection,image,propose,feature,task,vision,model,learning,computer
1,0,0_disease_plant_image_model,disease,plant,image,model,use,leaf,crop,accuracy,classification,deep
2,1,1_covid_19_medical_image,covid,19,medical,image,model,learning,use,deep,data,ct
3,2,2_face_facial_expression_recognition,face,facial,expression,recognition,feature,use,propose,emotion,fer,mask
4,3,3_image_method_propose_light,image,method,propose,light,enhancement,shadow,dehazing,network,denoising,color
5,4,4_human_action_recognition_pose,human,action,recognition,pose,use,hand,feature,video,model,motion
6,5,5_tracking_track_tracker_object,tracking,track,tracker,object,mot,network,siamese,challenge,rgbt,target
7,6,6_defect_crack_inspection_detection,defect,crack,inspection,detection,damage,image,use,surface,method,detect
8,7,7_text_language_video_word,text,language,video,word,natural,model,caption,scene,method,visual
9,8,8_image_gan_generative_gans,image,gan,generative,gans,adversarial,synthesis,network,data,model,generate


In [81]:
model.get_document_info(docs)

Unnamed: 0,Document,Topic,Name,Top_n_words,Probability,Representative_document
0,"paper present new vision transformer , call sw...",12,12_attention_transformer_semantic_vision,attention - transformer - semantic - vision - ...,1.000000,False
1,represent feature multiple scale great importa...,11,11_network_pruning_architecture_cnn,network - pruning - architecture - cnn - layer...,0.844634,False
2,high-resolution representation essential posit...,-1,-1_method_detection_image_propose,method - detection - image - propose - feature...,0.000000,False
3,"recently , neural network purely base attentio...",12,12_attention_transformer_semantic_vision,attention - transformer - semantic - vision - ...,1.000000,False
4,sota computer vision system train predict fixe...,10,10_learning_supervised_data_label,learning - supervised - data - label - deep - ...,1.000000,False
...,...,...,...,...,...,...
995,increase manufacturing productivity automated ...,6,6_defect_crack_inspection_detection,defect - crack - inspection - detection - dama...,1.000000,False
996,accurately recognize different category scener...,12,12_attention_transformer_semantic_vision,attention - transformer - semantic - vision - ...,1.000000,False
997,many problem computer graphic computer vision ...,-1,-1_method_detection_image_propose,method - detection - image - propose - feature...,0.000000,False
998,synthetic aperture radar ( sar ) ship detectio...,-1,-1_method_detection_image_propose,method - detection - image - propose - feature...,0.000000,False


In [82]:
model.visualize_barchart()

In [83]:
model.visualize_heatmap()

In [84]:
model.visualize_topics()