In [1]:
import gzip, os, requests, glob, time, socket, math
import pandas as pd
import numpy as np
from platform import python_version
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from PyComplexHeatmap import *

print("【日期時間】{}".format(time.strftime("%Y/%m/%d %H:%M:%S")))
print("【工作目錄】{}".format(os.getcwd()))
print("【主機名稱】{} ({})".format(socket.gethostname(),socket.gethostbyname(socket.gethostname())))
print("【Python】{}".format(python_version()))
print("【Matplotlib】{}".format(mpl.__version__))

%autosave 120

【日期時間】2023/04/24 08:19:30
【工作目錄】/Users/normi/Documents/myPython
【主機名稱】chenyixuandeMacBook-Air.local (127.0.0.1)
【Python】3.10.10
【Matplotlib】3.7.1


Autosaving every 120 seconds


## 讀入資料
* ntpu 表示專委提供之學校教師的 Scopus 對照表
* scopus 表示從 SciVal 下載的本校教師資料

In [25]:
ntpu = pd.read_csv('NTPU_Researchers_Scopus_Author_ID__2022-03.csv', sep = ',', encoding = 'utf-8', engine = 'python')
scopus = pd.read_csv('Top_500_authors,_by_Scholarly_Output.csv', sep = ',', encoding = 'utf-8', engine = 'python')

In [26]:
print(f'ntpu = {ntpu.shape}, scopus = {scopus.shape}')

ntpu = (246, 5), scopus = (85, 9)


## 篩選 2016 年之後的資料

In [27]:
scopus = scopus[scopus['Most recent publication'] >= 2016]

In [28]:
ntpu.head()

Unnamed: 0,Author,Scopus Author ID,Level 1,Level 2,Tags
0,"Li, Ronggeng",57214067305,法律學院,法律學系,李榮耕
1,"Lin, Frederick Chao Chun",57193596799,法律學院,法律學系,林超駿
2,"Chang, Hsinti",57202386906,法律學院,法律學系,張心悌
3,"Kuo, Linghwei",35362283200,法律學院,法律學系,郭玲惠
4,"Chen, Haoyun",57223763322,法律學院,法律學系,陳皓芸


In [29]:
scopus.head()

Unnamed: 0,Name,Scholarly Output,Most recent publication,Citations,Citations per Publication,Field-Weighted Citation Impact,h-index,Scopus author ID,Scopus author profile
0,"Tsai, Mingchang",8,2020,36,4.5,0.39,13,35241288100,https://www.scopus.com/authid/detail.url?autho...
1,"Wei, Hsisheng",8,2021,91,11.4,1.53,17,8610315000,https://www.scopus.com/authid/detail.url?autho...
2,"Chen, Wanchi",7,2017,138,19.7,0.76,5,56092825900,https://www.scopus.com/authid/detail.url?autho...
3,"Chang, Wenchun",6,2018,51,8.5,0.58,9,13305049300,https://www.scopus.com/authid/detail.url?autho...
4,"Peng, Chien Wen",6,2021,17,2.8,0.31,5,35368713100,https://www.scopus.com/authid/detail.url?autho...


In [30]:
print(f'ntpu = {ntpu.shape}, scopus = {scopus.shape}')

ntpu = (246, 5), scopus = (64, 9)


## 依照 Scopus ID 合併兩資料表

In [31]:
df = pd.merge(left = scopus[['Name', 'Scopus author ID', 'Most recent publication', 'Scopus author profile']], right = ntpu, how = 'left', left_on = 'Scopus author ID', right_on = 'Scopus Author ID')

In [32]:
df.head()

Unnamed: 0,Name,Scopus author ID,Most recent publication,Scopus author profile,Author,Scopus Author ID,Level 1,Level 2,Tags
0,"Tsai, Mingchang",35241288100,2020,https://www.scopus.com/authid/detail.url?autho...,,,,,
1,"Wei, Hsisheng",8610315000,2021,https://www.scopus.com/authid/detail.url?autho...,"Wei, Hsisheng",8610315000.0,社會科學學院,社會工作學系,魏希聖
2,"Chen, Wanchi",56092825900,2017,https://www.scopus.com/authid/detail.url?autho...,"Chen, Wanchi",56092830000.0,社會科學學院,社會學系,陳婉琪
3,"Chang, Wenchun",13305049300,2018,https://www.scopus.com/authid/detail.url?autho...,"Chang, Wenchun",13305050000.0,公共事務學院,財政學系,張文俊
4,"Peng, Chien Wen",35368713100,2021,https://www.scopus.com/authid/detail.url?autho...,"Peng, Chien Wen",35368710000.0,公共事務學院,不動產與城鄉環境學系,彭建文


## 篩選出未找到歸屬單位的發表者

In [33]:
df[df['Tags'].isna()]

Unnamed: 0,Name,Scopus author ID,Most recent publication,Scopus author profile,Author,Scopus Author ID,Level 1,Level 2,Tags
0,"Tsai, Mingchang",35241288100,2020,https://www.scopus.com/authid/detail.url?autho...,,,,,
6,"Chen, Yin zu",56459681700,2020,https://www.scopus.com/authid/detail.url?autho...,,,,,
22,"Liao, Hsueh Fei",57221957661,2020,https://www.scopus.com/authid/detail.url?autho...,,,,,
28,"Chiu, Chihsin",55925422000,2019,https://www.scopus.com/authid/detail.url?autho...,,,,,
30,"Hung, Koyu",56492282900,2016,https://www.scopus.com/authid/detail.url?autho...,,,,,
34,"Lin, Yu Ruei",57191522495,2016,https://www.scopus.com/authid/detail.url?autho...,,,,,
35,"Lin, Yihsuan",57191821664,2019,https://www.scopus.com/authid/detail.url?autho...,,,,,
36,"Chiu, Hungyu",57192544405,2016,https://www.scopus.com/authid/detail.url?autho...,,,,,
37,"Huang, Jhong You",57195775143,2019,https://www.scopus.com/authid/detail.url?autho...,,,,,
42,"Sung, Sumei",57203309269,2017,https://www.scopus.com/authid/detail.url?autho...,,,,,


In [34]:
l1 = ['社會科學學院', '社會科學學院', '商學院', '公共事務學院', '', '商學院', '', '', '公共事務學院', '', '商學院', '商學院', '', '', '社會科學學院', '社會科學學院', '', '商學院', '商學院', '', '公共事務學院', '公共事務學院']
l2 = ['社會學系', '社會學系', '統計學系', '不動產與城鄉環境學系', '', '資訊管理研究所', '', '', '不動產與城鄉環境學系', '', '企業管理學系', '企業管理學系', '', '', '經濟學系', '社會學系', '', '企業管理學系', '企業管理學系', '', '公共行政暨政策學系', '都市計劃研究所']
t = ['蔡明璋', '陳韻如', '學生', '邱啟新', '', '學生', '', '', '學生', '', '學生', '學生', '', '', '林國明', '學生', '', '學生', '學生', '', '劉嘉薇', '曾國雄']

In [35]:
print(f'l1 = {len(l1)}, l2 = {len(l2)}, t = {len(t)}')

l1 = 22, l2 = 22, t = 22


## 將逐一查到的歸屬教學單位指定會資料表中

In [36]:
df.loc[df['Tags'].isna(), 'Level 1'] = l1
df.loc[df['Tags'].isna(), 'Level 2'] = l2
df.loc[df['Tags'].isna(), 'Tags'] = t

## 未找到的發表者列為 “帶確認”

In [45]:
df.loc[df['Level 1'] == '', 'Level 1'] = '待確認'
df.loc[df['Level 2'] == '', 'Level 2'] = '待確認'
df.loc[df['Tags'] == '', 'Tags'] = '待確認'

In [46]:
df

Unnamed: 0,Name,Scopus author ID,Most recent publication,Scopus author profile,Author,Scopus Author ID,Level 1,Level 2,Tags
0,"Tsai, Mingchang",35241288100,2020,https://www.scopus.com/authid/detail.url?autho...,,,社會科學學院,社會學系,蔡明璋
1,"Wei, Hsisheng",8610315000,2021,https://www.scopus.com/authid/detail.url?autho...,"Wei, Hsisheng",8.610315e+09,社會科學學院,社會工作學系,魏希聖
2,"Chen, Wanchi",56092825900,2017,https://www.scopus.com/authid/detail.url?autho...,"Chen, Wanchi",5.609283e+10,社會科學學院,社會學系,陳婉琪
3,"Chang, Wenchun",13305049300,2018,https://www.scopus.com/authid/detail.url?autho...,"Chang, Wenchun",1.330505e+10,公共事務學院,財政學系,張文俊
4,"Peng, Chien Wen",35368713100,2021,https://www.scopus.com/authid/detail.url?autho...,"Peng, Chien Wen",3.536871e+10,公共事務學院,不動產與城鄉環境學系,彭建文
...,...,...,...,...,...,...,...,...,...
59,"Hwang, Yiting",7402311223,2016,https://www.scopus.com/authid/detail.url?autho...,"Hwang, Yiting",7.402311e+09,商學院,統計學系,黃怡婷
60,"Wu, Tai Hsi",7404814990,2020,https://www.scopus.com/authid/detail.url?autho...,"Wu, Tai His",7.404815e+09,商學院,企業管理學系,吳泰熙
61,"Lin, Meichen",7404816718,2020,https://www.scopus.com/authid/detail.url?autho...,"Lin, Meichen",7.404817e+09,商學院,企業管理學系,林美珍
62,"Chih, Hsianglin",8658017200,2020,https://www.scopus.com/authid/detail.url?autho...,"Chih, Hsianglin",8.658017e+09,商學院,金融與合作經營學系,池祥麟


## 將資料寫出

In [52]:
df.iloc[:, [0,1,2,3,6,7,8]].to_csv('scopus_sociology_author_subject_NTPU.csv', sep=',', index=False, encoding='utf-8')

In [51]:
df.iloc[:, [0,1,2,3,6,7,8]]

Unnamed: 0,Name,Scopus author ID,Most recent publication,Scopus author profile,Level 1,Level 2,Tags
0,"Tsai, Mingchang",35241288100,2020,https://www.scopus.com/authid/detail.url?autho...,社會科學學院,社會學系,蔡明璋
1,"Wei, Hsisheng",8610315000,2021,https://www.scopus.com/authid/detail.url?autho...,社會科學學院,社會工作學系,魏希聖
2,"Chen, Wanchi",56092825900,2017,https://www.scopus.com/authid/detail.url?autho...,社會科學學院,社會學系,陳婉琪
3,"Chang, Wenchun",13305049300,2018,https://www.scopus.com/authid/detail.url?autho...,公共事務學院,財政學系,張文俊
4,"Peng, Chien Wen",35368713100,2021,https://www.scopus.com/authid/detail.url?autho...,公共事務學院,不動產與城鄉環境學系,彭建文
...,...,...,...,...,...,...,...
59,"Hwang, Yiting",7402311223,2016,https://www.scopus.com/authid/detail.url?autho...,商學院,統計學系,黃怡婷
60,"Wu, Tai Hsi",7404814990,2020,https://www.scopus.com/authid/detail.url?autho...,商學院,企業管理學系,吳泰熙
61,"Lin, Meichen",7404816718,2020,https://www.scopus.com/authid/detail.url?autho...,商學院,企業管理學系,林美珍
62,"Chih, Hsianglin",8658017200,2020,https://www.scopus.com/authid/detail.url?autho...,商學院,金融與合作經營學系,池祥麟
