In [None]:
import pandas as pd
import numpy as np
import pickle
from tqdm.auto import tqdm
import re
import time

In [None]:
# 2022 라이덴 클러스터 포함 논문 (06-22) Pub Text 불러오기
with open('F:\cluster_2022_run\LEIDEN2022_JYLee\leiden_cluster_text_info_202230_pickle파일들\leiden_cluster_text_info_202230.pkl', 'rb') as f:
    df_text_full_20_full = pickle.load(f)
df_text_full_20_full['aukey_modified'] = df_text_full_20_full['aukey'].str.replace(';', ' ')

In [None]:
df_text_full_20_full.columns

Index(['uid', 'cluster_id', 'pubyear2', 'item', 'aukey', 'abstract',
       'aukey_modified'],
      dtype='object')

In [None]:
# OR로만 이루어진 단순한 검색식의 경우 사용할 함수 선언
def search_and_organize(technology, search_texts, df_text_full_20_full,technology_search_results):
    technology_search_results = technology_search_results

    # aukey 컬럼에서 검색
    results_aukey = []
    for text in tqdm(search_texts, desc=f"search_in_aukey for {technology}"):
        result = df_text_full_20_full[df_text_full_20_full['aukey_modified'].str.contains(text, case=False, regex=True)]
        results_aukey.append(result)
    concatenated_results = pd.concat(results_aukey).drop_duplicates().reset_index(drop=True)

    # abstract 컬럼에서 검색
    results_abstract = []
    for text in tqdm(search_texts, desc=f"search_in_abstract for {technology}"):
        result_abs = df_text_full_20_full[df_text_full_20_full['abstract'].str.contains(text, case=False, regex=True)]
        results_abstract.append(result_abs)
    concatenated_results_abs = pd.concat(results_abstract).drop_duplicates().reset_index(drop=True)

    # 검색어를 포함하는 행의 수 계산
    shapes = [result.shape[0] for result in results_aukey]
    shapes_abs = [result_abs.shape[0] for result_abs in results_abstract]
    npubs_contain_searchterms = pd.DataFrame([shapes, shapes_abs]).T
    npubs_contain_searchterms.columns = ['aukey_npubs','abstract_npubs']
    npubs_contain_searchterms.index = search_texts
    npubs_contain_searchterms.loc['total_npubs'] = [concatenated_results.shape[0],concatenated_results_abs.shape[0]]

    # 결과를 딕셔너리에 저장
    technology_search_results[technology] = {
        'concatenated_results': concatenated_results,
        'concatenated_results_abs': concatenated_results_abs,
        'npubs_contain_searchterms': npubs_contain_searchterms
    }

    return technology_search_results

In [None]:
# 최초 딕셔너리 선언
technology_search_results = {}

In [None]:
# 단순 OR 검색식부터 실행
technology = '6.무선 전력 전송'
search_texts = ["wireless power transfer",  "wireless power transmission", "wireless energy transmission", "Wireless Energy Transfer"]
# OR단순검색식 탐색 함수 실행
technology_search_results = search_and_organize(technology, search_texts, df_text_full_20_full, technology_search_results)

search_in_aukey for 6.무선 전력 전송:   0%|          | 0/4 [00:00<?, ?it/s]

search_in_abstract for 6.무선 전력 전송:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
# 기술명 선언
technology = '8. 면역세포치료'
# 검색식 입력
search_texts = ["Immunotherapy","Immune cell therap.*"]
# OR단순검색식 탐색 함수 실행
technology_search_results = search_and_organize(technology, search_texts, df_text_full_20_full, technology_search_results)

search_in_aukey for 8. 면역세포치료:   0%|          | 0/2 [00:00<?, ?it/s]

search_in_abstract for 8. 면역세포치료:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# 기술명 선언
technology = '10. 상변화 소재 활용 열에너지 저장'
# 검색식 입력
search_texts = ["Phase change material.*","thermal energy storage.*"]
# OR단순검색식 탐색 함수 실행
technology_search_results = search_and_organize(technology, search_texts, df_text_full_20_full, technology_search_results)

search_in_aukey for 10. 상변화 소재 활용 열에너지 저장:   0%|          | 0/2 [00:00<?, ?it/s]

search_in_abstract for 10. 상변화 소재 활용 열에너지 저장:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
## OR 조회 (임시)

In [None]:
technology_search_results['6.무선 전력 전송']['npubs_contain_searchterms']

Unnamed: 0,aukey_npubs,abstract_npubs
wireless power transfer,2221,2396
wireless power transmission,417,458
wireless energy transmission,23,56
Wireless Energy Transfer,188,271
total_npubs,2826,3130


In [None]:
# 2단계 조회 그룹

In [None]:
# AND가 2개의 OR검색식을 연결한 경우 사용할 함수 선언
def search_and_organize_1and(technology, search_texts_1st, search_texts_2nd, df_text_full_20_full,technology_search_results):

    # OR단순검색식 탐색 함수 1회 실행
    technology_search_results = search_and_organize(technology, search_texts_1st, df_text_full_20_full, technology_search_results)

    # 2차 검색식 정의
    search_texts_2nd = search_texts_2nd
    # 2차 검색 범위 정의
    concatenated_results1 = technology_search_results[technology]['concatenated_results']
    concatenated_results_abs1 = technology_search_results[technology]['concatenated_results_abs']
    npubs_contain_searchterms = technology_search_results[technology]['npubs_contain_searchterms']

    # aukey 컬럼에서 검색
    results_aukey = []
    for text in tqdm(search_texts_2nd, desc=f"search_in_aukey for {technology}"):
        result = concatenated_results1[concatenated_results1['aukey_modified'].str.contains(text, case=False, regex=True)]
        results_aukey.append(result)
    concatenated_results = pd.concat(results_aukey).drop_duplicates().reset_index(drop=True)

    # abstract 컬럼에서 검색
    results_abstract = []
    for text in tqdm(search_texts_2nd, desc=f"search_in_abstract for {technology}"):
        result_abs = concatenated_results_abs1[concatenated_results_abs1['abstract'].str.contains(text, case=False, regex=True)]
        results_abstract.append(result_abs)
    concatenated_results_abs = pd.concat(results_abstract).drop_duplicates().reset_index(drop=True)

    # 검색어를 포함하는 행의 수 계산
    shapes = [result.shape[0] for result in results_aukey]
    shapes_abs = [result_abs.shape[0] for result_abs in results_abstract]
    npubs_contain_searchterms1 = pd.DataFrame([shapes, shapes_abs]).T
    npubs_contain_searchterms1.columns = ['aukey_npubs','abstract_npubs']
    npubs_contain_searchterms1.index = search_texts_2nd
    npubs_contain_searchterms1.loc['total_npubs'] = [concatenated_results.shape[0],concatenated_results_abs.shape[0]]

    # 결과를 딕셔너리에 저장
    technology_search_results[technology] = {
        'concatenated_results': concatenated_results,
        'concatenated_results_abs': concatenated_results_abs,
        'npubs_contain_searchterms': pd.concat([npubs_contain_searchterms, npubs_contain_searchterms1])
    }

    return technology_search_results

In [None]:
## 수동, AND 존재 검색식. "(Nanofluid* or “Nano-Fluid*” “Nano Fluid*”) and (Energy or thermal* or Heat*) "
# 기술명 선언
technology = '9. 나노유체 이용 에너지 효율화'
# 1차 검색식 입력
search_texts_1st = ["Nano.*fluid.*"]
# 2차 검색식 입력
search_texts_2nd = ["energy", "thermal.*", "heat.*"]

In [None]:
technology_search_results = search_and_organize_1and(technology, search_texts_1st, search_texts_2nd, df_text_full_20_full,technology_search_results)


search_in_aukey for 9. 나노유체 이용 에너지 효율화:   0%|          | 0/1 [00:00<?, ?it/s]

search_in_abstract for 9. 나노유체 이용 에너지 효율화:   0%|          | 0/1 [00:00<?, ?it/s]

search_in_aukey for 9. 나노유체 이용 에너지 효율화:   0%|          | 0/3 [00:00<?, ?it/s]

search_in_abstract for 9. 나노유체 이용 에너지 효율화:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
technology_search_results['9. 나노유체 이용 에너지 효율화']['npubs_contain_searchterms']

Unnamed: 0,aukey_npubs,abstract_npubs
Nano.*fluid.*,13650,42193
total_npubs,13650,42193
energy,588,5673
thermal.*,2705,10888
heat.*,3760,11868
total_npubs,5913,18807


In [None]:
# 기술명 선언
technology = '4. 암 진단/예측 바이오마커'
# 1차 검색식 입력
search_texts_1st = ["bio.*marker.*"]
# 2차 검색식 입력
search_texts_2nd = ["cancer", "tumor.*", "oncology"]


In [None]:
technology_search_results = search_and_organize_1and(technology, search_texts_1st, search_texts_2nd, df_text_full_20_full,technology_search_results)


search_in_aukey for 4. 암 진단/예측 바이오마커:   0%|          | 0/1 [00:00<?, ?it/s]

search_in_abstract for 4. 암 진단/예측 바이오마커:   0%|          | 0/1 [00:00<?, ?it/s]

search_in_aukey for 4. 암 진단/예측 바이오마커:   0%|          | 0/3 [00:00<?, ?it/s]

search_in_abstract for 4. 암 진단/예측 바이오마커:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
technology_search_results['4. 암 진단/예측 바이오마커']['npubs_contain_searchterms']

Unnamed: 0,aukey_npubs,abstract_npubs
bio.*marker.*,72107,310756
total_npubs,72107,310756
cancer,13947,79674
tumor.*,2867,57858
oncology,300,3082
total_npubs,15742,98204


In [None]:
# 기술명 선언
technology = '3. 휴먼 마이크로바이옴'
# 1차 검색식 입력
search_texts_1st = ["microbiome", "microbiota", "metagenomics", "metagenome", "second Genome"]
# 2차 검색식 입력
search_texts_2nd = ["human", "Gut", "second Genome"]

In [None]:
technology_search_results = search_and_organize_1and(technology, search_texts_1st, search_texts_2nd, df_text_full_20_full,technology_search_results)


search_in_aukey for 3. 휴먼 마이크로바이옴:   0%|          | 0/5 [00:00<?, ?it/s]

search_in_abstract for 3. 휴먼 마이크로바이옴:   0%|          | 0/5 [00:00<?, ?it/s]

search_in_aukey for 3. 휴먼 마이크로바이옴:   0%|          | 0/3 [00:00<?, ?it/s]

search_in_abstract for 3. 휴먼 마이크로바이옴:   0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
technology_search_results['3. 휴먼 마이크로바이옴']['npubs_contain_searchterms']

Unnamed: 0,aukey_npubs,abstract_npubs
microbiome,11463,22604
microbiota,18458,43034
metagenomics,3969,4122
metagenome,1363,4181
second Genome,0,74
total_npubs,32543,63180
human,1337,17175
Gut,9613,27238
second Genome,0,74
total_npubs,10506,35681


In [None]:
# 기술명 선언
technology = '2. 신축성 전자소자'
# 1차 검색식 입력
#(stretchable or  wearable or flexible or “self-healing” or “self healing” or selfhealing)
search_texts_1st = ['stretchable','wearable','flexible','self.?healing']
# 2차 검색식 입력
# (electronic* or material* or substrate* or device* or electrode* or interconnector)
search_texts_2nd = ['electronic.*','material.*','substrate.*','device.*','electrode.*','interconnector']

In [None]:
technology_search_results = search_and_organize_1and(technology, search_texts_1st, search_texts_2nd, df_text_full_20_full,technology_search_results)


search_in_aukey for 2. 신축성 전자소자:   0%|          | 0/4 [00:00<?, ?it/s]

search_in_abstract for 2. 신축성 전자소자:   0%|          | 0/4 [00:00<?, ?it/s]

search_in_aukey for 2. 신축성 전자소자:   0%|          | 0/6 [00:00<?, ?it/s]

search_in_abstract for 2. 신축성 전자소자:   0%|          | 0/6 [00:00<?, ?it/s]

In [None]:
technology_search_results['2. 신축성 전자소자']['npubs_contain_searchterms']

Unnamed: 0,aukey_npubs,abstract_npubs
stretchable,1818,5404
wearable,7643,20369
flexible,24990,190066
self.?healing,4256,9029
total_npubs,37537,215260
electronic.*,4084,21956
material.*,1722,36965
substrate.*,1419,20797
device.*,3029,36331
electrode.*,1875,14999


In [None]:
## 다시 OR검색만

In [None]:
# 기술명 선언
technology = '5. 유전자 편집'
# 1차 검색식 입력
search_texts = ["genome editing", "Gene Editing", "ZFN", "zinc finger nuclease"
                    , "TALEN", "Transcriptor Activator.*Like Effector Nuclease ", "Transcription Activator.*Like Effector Nuclease"
                    , "CRISPR", "CRISPR.*Cas9", "Clustered Regularly Interspaced Short Palindromic Repeat.*"]


In [None]:
# OR단순검색식 탐색 함수 실행
technology_search_results = search_and_organize(technology, search_texts, df_text_full_20_full, technology_search_results)

search_in_aukey for 5. 유전자 편집:   0%|          | 0/10 [00:00<?, ?it/s]

search_in_abstract for 5. 유전자 편집:   0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
technology_search_results['5. 유전자 편집']['npubs_contain_searchterms']

Unnamed: 0,aukey_npubs,abstract_npubs
genome editing,1971,5729
Gene Editing,1051,3143
ZFN,177,822
zinc finger nuclease,189,601
TALEN,2663,11325
Transcriptor Activator.*Like Effector Nuclease,0,0
Transcription Activator.*Like Effector Nuclease,99,822
CRISPR,6439,16923
CRISPR.*Cas9,3822,11856
Clustered Regularly Interspaced Short Palindromic Repeat.*,163,3017


In [None]:
# 기술명 선언
technology = '7. 초대용량 데이터 대응 광통신 기술'
# 1차 검색식 입력
search_texts = ["Multi.*core fiber", "multi.*mode fiber", "Few.*Mode Fiber", "Spacial Multiplexing", "Spatial Division Multiplexing", "Space Division Multiplexing", "PAM-4", "pulse.*amplitude modulation", "High.*Speed Optical Communications", "High.*Speed Optical Transmission", "Frequency Division Multiplexing"
                , "Time Division Multiplexing", "Code Division Multiplexing", "Wavelength Division Multiplexing"]

In [None]:
# OR단순검색식 탐색 함수 실행
technology_search_results = search_and_organize(technology, search_texts, df_text_full_20_full, technology_search_results)

search_in_aukey for 7. 초대용량 데이터 대응 광통신 기술:   0%|          | 0/14 [00:00<?, ?it/s]

search_in_abstract for 7. 초대용량 데이터 대응 광통신 기술:   0%|          | 0/14 [00:00<?, ?it/s]

In [None]:
technology_search_results['7. 초대용량 데이터 대응 광통신 기술']['npubs_contain_searchterms']

Unnamed: 0,aukey_npubs,abstract_npubs
Multi.*core fiber,374,984
multi.*mode fiber,490,3750
Few.*Mode Fiber,257,734
Spacial Multiplexing,0,0
Spatial Division Multiplexing,66,113
Space Division Multiplexing,233,217
PAM-4,79,300
pulse.*amplitude modulation,410,1475
High.*Speed Optical Communications,0,46
High.*Speed Optical Transmission,14,47


In [None]:
### 재실행!
## case3 가장 복잡한 경우
# 기술명 선언
technology = '1-1. 금속 3D 프린팅(A-set)2'
# 1차 검색식 입력
search_texts_1st = [r'\bmetal\b']

# 2차 검색식 입력
search_texts_2nd = ['3D print.*', 'Additive Manufactur.*', r'\bAM\b', 'Rapid Prototyp.*', r'\bRP\b', r'\bSLS\b'
                    , 'Selective Laser Sintering', r'\bSLM\b', 'Selective Laser Melting', r'\bSLA\b', 'Stereo.?lithograph.*'
                    , r'\bPBF\b', 'Powder Bed Fusion', r'\bDED\b', 'Direct Ejection Deposition', r'\bFDM\b'
                    , 'Fused deposition modeling', r'\bFFF\b', 'Fused filament fabrication']

In [None]:
technology_search_results = search_and_organize_1and(technology, search_texts_1st, search_texts_2nd, df_text_full_20_full,technology_search_results)


search_in_aukey for 1-1. 금속 3D 프린팅(A-set)2:   0%|          | 0/1 [00:00<?, ?it/s]

search_in_abstract for 1-1. 금속 3D 프린팅(A-set)2:   0%|          | 0/1 [00:00<?, ?it/s]

search_in_aukey for 1-1. 금속 3D 프린팅(A-set)2:   0%|          | 0/19 [00:00<?, ?it/s]

search_in_abstract for 1-1. 금속 3D 프린팅(A-set)2:   0%|          | 0/19 [00:00<?, ?it/s]

In [None]:
technology_search_results['1-1. 금속 3D 프린팅(A-set)2']['npubs_contain_searchterms']

Unnamed: 0,aukey_npubs,abstract_npubs
\bmetal\b,131488,607421
total_npubs,131488,607421
3D print.*,194,774
Additive Manufactur.*,697,2311
\bAM\b,69,3458
Rapid Prototyp.*,62,297
\bRP\b,12,465
\bSLS\b,11,260
Selective Laser Sintering,37,166
\bSLM\b,42,639


In [None]:
# 3차 검색식 입력
search_texts_3rd = [r'\bDMP\b', 'Direct Metal Printing', r'\bDMLS\b', 'Direct Metal Laser Sintering'
                    , r'\bDMLM\b', 'Direct Metal Laser Melting']

In [None]:
# OR단순검색식 탐색 함수 실행
technology = '1-2. 금속 3D 프린팅(B-set)'

In [None]:
technology_search_results = search_and_organize(technology, search_texts_3rd, df_text_full_20_full , technology_search_results)

search_in_aukey for 1-2. 금속 3D 프린팅(B-set):   0%|          | 0/6 [00:00<?, ?it/s]

search_in_abstract for 1-2. 금속 3D 프린팅(B-set):   0%|          | 0/6 [00:00<?, ?it/s]

In [None]:
technology_search_results['1-1. 금속 3D 프린팅(A-set)']['npubs_contain_searchterms']

Unnamed: 0,aukey_npubs,abstract_npubs
metal,260466,959811
total_npubs,260466,959811
3D print.*,285,1130
Additive Manufactur.*,917,3387
\bAM\b,95,5266
Rapid Prototyp.*,84,400
\bRP\b,21,758
\bSLS\b,15,403
Selective Laser Sintering,55,244
\bSLM\b,56,1138


In [None]:
technology_search_results['1-2. 금속 3D 프린팅(B-set)']['npubs_contain_searchterms']

Unnamed: 0,aukey_npubs,abstract_npubs
\bDMP\b,218,3187
Direct Metal Printing,10,12
\bDMLS\b,130,441
Direct Metal Laser Sintering,193,374
\bDMLM\b,1,17
Direct Metal Laser Melting,1,14
total_npubs,489,3758


In [None]:
## 최종 파일 피클 저장 후 세부 내용 체크
df_technology_search_results = pd.DataFrame(technology_search_results)
df_technology_search_results.to_pickle('technology_search_results_2018top18_240319.pkl')

In [None]:
df_technology_search_results

Unnamed: 0,9. 나노유체 이용 에너지 효율화,4. 암 진단/예측 바이오마커,3. 휴먼 마이크로바이옴,2. 신축성 전자소자,5. 유전자 편집,7. 초대용량 데이터 대응 광통신 기술,1-1. 금속 3D 프린팅(A-set),1-2. 금속 3D 프린팅(B-set),6.무선 전력 전송,8. 면역세포치료,10. 상변화 소재 활용 열에너지 저장
concatenated_results,uid cluster_id pubyear2 ...,uid cluster_id pubyear2...,uid cluster_id pubyear2...,uid cluster_id pubyear2...,uid cluster_id pubyear2...,uid cluster_id pubyear2 ...,uid cluster_id pubyear2 ...,uid cluster_id pubyear2 ...,uid cluster_id pubyear2 ...,uid cluster_id pubyear2...,uid cluster_id pubyear2 ...
concatenated_results_abs,uid cluster_id pubyear2...,uid cluster_id pubyear2...,uid cluster_id pubyear2...,uid cluster_id pubyear2...,uid cluster_id pubyear2...,uid cluster_id pubyear2...,uid cluster_id pubyear2...,uid cluster_id pubyear2 ...,uid cluster_id pubyear2 ...,uid cluster_id pubyear2...,uid cluster_id pubyear2...
npubs_contain_searchterms,aukey_npubs abstract_npubs Nan...,aukey_npubs abstract_npubs bio...,aukey_npubs abstract_npubs mic...,aukey_npubs abstract_npubs st...,...,aukey_npub...,aukey_npubs abstr...,aukey_npubs abs...,aukey_npubs abs...,aukey_npubs abstract_np...,aukey_npubs abstrac...


In [None]:
# 뽑힌 데이터 저장
# 데이터 저장을 위한 데스트 작업 먼저

In [None]:
sorted(list(technology_search_results.keys()))

['1-1. 금속 3D 프린팅(A-set)',
 '1-1. 금속 3D 프린팅(A-set)2',
 '1-2. 금속 3D 프린팅(B-set)',
 '10. 상변화 소재 활용 열에너지 저장',
 '2. 신축성 전자소자',
 '3. 휴먼 마이크로바이옴',
 '4. 암 진단_예측 바이오마커',
 '5. 유전자 편집',
 '6.무선 전력 전송',
 '7. 초대용량 데이터 대응 광통신 기술',
 '8. 면역세포치료',
 '9. 나노유체 이용 에너지 효율화']

In [None]:
techs = sorted(list(technology_search_results.keys()))

In [None]:
technology_search_results[list(technology_search_results.keys())[0]].keys()

dict_keys(['concatenated_results', 'concatenated_results_abs', 'npubs_contain_searchterms'])

In [None]:
for tech in techs:
    print(tech)
    technology_search_results[tech]['concatenated_results'].to_csv(f'{tech}_search_results_from_aukey.csv')
    technology_search_results[tech]['concatenated_results_abs'].to_csv(f'{tech}_search_results_from_abstract.csv')
    technology_search_results[tech]['npubs_contain_searchterms'].to_csv(f'{tech}_npubs_contain_searchterms.csv')

1-1. 금속 3D 프린팅(A-set)
1-1. 금속 3D 프린팅(A-set)2
1-2. 금속 3D 프린팅(B-set)
10. 상변화 소재 활용 열에너지 저장
2. 신축성 전자소자
3. 휴먼 마이크로바이옴
4. 암 진단_예측 바이오마커
5. 유전자 편집
6.무선 전력 전송
7. 초대용량 데이터 대응 광통신 기술
8. 면역세포치료
9. 나노유체 이용 에너지 효율화


In [None]:
sorted(list(technology_search_results.keys()))[0]

'1-1. 금속 3D 프린팅(A-set)'

In [None]:
tech = sorted(list(technology_search_results.keys()))[9]

In [None]:
tech

'7. 초대용량 데이터 대응 광통신 기술'

In [None]:
technology_search_results[tech]['npubs_contain_searchterms']

Unnamed: 0,aukey_npubs,abstract_npubs
Multi.*core fiber,374,984
multi.*mode fiber,490,3750
Few.*Mode Fiber,257,734
Spacial Multiplexing,0,0
Spatial Division Multiplexing,66,113
Space Division Multiplexing,233,217
PAM-4,79,300
pulse.*amplitude modulation,410,1475
High.*Speed Optical Communications,0,46
High.*Speed Optical Transmission,14,47


In [None]:
termlist =  list(technology_search_results[tech]['npubs_contain_searchterms'].index)

In [None]:
df = technology_search_results[tech]['concatenated_results']

In [None]:
df[df['aukey_modified'].str.contains(f".*{termlist[0]}.*", regex=True)]

Unnamed: 0,uid,cluster_id,pubyear2,item,aukey,abstract,aukey_modified
8,WOS:000350216400013,120,2015,"Two-Dimensional, 37-Channel, High-Bandwidth, U...",Multicore fiber fan-out;optical fiber couplers...,"We demonstrate a hexagonal, monolithic optical...",Multicore fiber fan-out optical fiber couplers...
9,WOS:000351806300009,120,2015,TCF-MMF-TCF fiber structure based interferomet...,Multimode fiber;Thinned core fiber;in-fiber MZ...,A liquid refractive index (RI) sensor based on...,Multimode fiber Thinned core fiber in-fiber MZ...
11,WOS:000375860700008,120,2016,Low-temperature-sensitive relative humidity se...,Multimode interference;No-core fiber;SiO2 nano...,A low-temperature-sensitive relative humidity ...,Multimode interference No-core fiber SiO2 nano...
14,WOS:000393764000001,120,2017,Spatially Arrayed Long Period Gratings in Mult...,Multicore fiber;fiber grating;spatial division...,"Based on electrical arc discharges mechanisms,...",Multicore fiber fiber grating spatial division...
16,WOS:000396442800013,120,2017,Refractive index sensor based on tapered multi...,Fiber optics sensors;Refractive index;Multicor...,A novel refractive index (RI) sensor based on ...,Fiber optics sensors Refractive index Multicor...
...,...,...,...,...,...,...,...
359,WOS:000538151700002,2671,2020,"Digital Back Propagation in Long-Haul, MIMO-Su...",MIMO communication;Gain;Q-factor;Crosstalk;Mul...,This work presents an experimental evaluation ...,MIMO communication Gain Q-factor Crosstalk Mul...
361,WOS:000554904400033,2671,2020,Spatial Density and Splicing Characteristic Op...,Optical fiber networks;Multicore processing;Sp...,Few-mode fiber design suitable for long-haul d...,Optical fiber networks Multicore processing Sp...
362,WOS:000565710400001,2671,2020,Efficient Channel Model for Homogeneous Weakly...,Channel models;Couplings;Optical fibers;Multic...,"In recent years, several channel models for ho...",Channel models Couplings Optical fibers Multic...
368,WOS:000594923000017,2671,2020,High-Speed Performance Evaluation of Graded-In...,Multicore fiber;multimode fiber;fiber design a...,A comprehensive study on the optical character...,Multicore fiber multimode fiber fiber design a...


In [None]:
# 각 용어를 검색하고 결과를 새 컬럼으로 추가하는 코드 테스트
for term in tqdm(termlist):
    df[term] = df['aukey_modified'].str.contains(f".*{termlist[0]}.*", case=False, regex=True)

  0%|          | 0/15 [00:00<?, ?it/s]

In [None]:
# 각 용어(term)이 df(저자키워드 검색결과), df_abs(초록 검색결과)에서 어느 행의 검색에 기여했는지를 찾고
# , 이를 새 컬럼(True/False)으로 추가하고, csv로 저장
for tech in tqdm(techs, desc=f'processing tech {tech}'):
    termlist =  list(technology_search_results[tech]['npubs_contain_searchterms'].index)
    df = technology_search_results[tech]['concatenated_results']
    df_abs = technology_search_results[tech]['concatenated_results_abs']
    for term in tqdm(termlist, desc = 'search terms in aukey'):
        df[term] = df['aukey_modified'].str.contains(f".*{term}.*", case=False, regex=True)
    for term in tqdm(termlist, desc = 'search terms in abstract'):
        df_abs[term] = df_abs['abstract'].str.contains(f".*{term}.*", case=False, regex=True)
    df.to_csv(f'{tech}_search_results_from_aukey_updated.csv')
    df_abs.to_csv(f'{tech}_search_results_from_abstract_updated.csv')
    print(tech)

processing tech {tech}:   0%|          | 0/12 [00:00<?, ?it/s]

search terms in aukey:   0%|          | 0/22 [00:00<?, ?it/s]

search terms in abstract:   0%|          | 0/22 [00:00<?, ?it/s]

1-1. 금속 3D 프린팅(A-set)


search terms in aukey:   0%|          | 0/22 [00:00<?, ?it/s]

search terms in abstract:   0%|          | 0/22 [00:00<?, ?it/s]

1-1. 금속 3D 프린팅(A-set)2


search terms in aukey:   0%|          | 0/7 [00:00<?, ?it/s]

search terms in abstract:   0%|          | 0/7 [00:00<?, ?it/s]

1-2. 금속 3D 프린팅(B-set)


search terms in aukey:   0%|          | 0/3 [00:00<?, ?it/s]

search terms in abstract:   0%|          | 0/3 [00:00<?, ?it/s]

10. 상변화 소재 활용 열에너지 저장


search terms in aukey:   0%|          | 0/12 [00:00<?, ?it/s]

search terms in abstract:   0%|          | 0/12 [00:00<?, ?it/s]

2. 신축성 전자소자


search terms in aukey:   0%|          | 0/10 [00:00<?, ?it/s]

search terms in abstract:   0%|          | 0/10 [00:00<?, ?it/s]

3. 휴먼 마이크로바이옴


search terms in aukey:   0%|          | 0/6 [00:00<?, ?it/s]

search terms in abstract:   0%|          | 0/6 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
# df_abs(초록 검색결과) 중, df(저자키워드 검색결과)에 존재하는 행을 제외한 나머지행만 따로 추출하여 csv 저장
for i in range(1,12):
    print(techs[i])
    df = technology_search_results[techs[i]]['concatenated_results']
    df_abs = technology_search_results[techs[i]]['concatenated_results_abs']
    df_abs_div = df_abs.loc[~(df_abs['uid'].isin(df['uid']))]
    df_abs_div.to_csv(f'{techs[i]}_only-abs_not-aukey.csv')

1-1. 금속 3D 프린팅(A-set)2
1-2. 금속 3D 프린팅(B-set)
10. 상변화 소재 활용 열에너지 저장
2. 신축성 전자소자
3. 휴먼 마이크로바이옴
4. 암 진단_예측 바이오마커
5. 유전자 편집
6.무선 전력 전송
7. 초대용량 데이터 대응 광통신 기술
8. 면역세포치료
9. 나노유체 이용 에너지 효율화
