# Q1. Crawl all the unique URLs for the detailed publication pages

In [1]:
# Import Required Module
import requests
from bs4 import BeautifulSoup

In [2]:
def getHtmlText(url):
    try:
        r = requests.get(url,timeout=20) # Get URL Content,set request timeout to 20 seconds
        r.raise_for_status()  # If the response status code returned != 200, an exception will be generated
        r.encoding=r.apparent_encoding  #Set the text encoding format to be consistent with the web page
        return r.text
    except:
        return ""

In [3]:
start_url =  'https://community.dur.ac.uk/hubert.shum/comp42315/' # Start Web URL
page = getHtmlText(start_url) # Getting the url
soup = BeautifulSoup(page, 'html.parser') # Parse HTML Code
#print(soup.prettify())

In [4]:
# Get the url of the publication page
publication=soup.find('div',class_='navigator').find_all('a')[-1].get('href')
pub_url = start_url + publication

page = getHtmlText(pub_url) # Getting the publication page url
soup = BeautifulSoup(page, 'html.parser') # Parse the publication page

#print(soup.prettify())

#get the text of topics 
topic_text = soup.find('p',class_='TextOption').text
print(topic_text)

# Obtaining the first topic
first_topic =topic_text.split('/')[0].split('Topic:') 
print(first_topic)

first_topic = first_topic[-1].split('\xa0')[-1] # Removing '\xa0' of the first topic
print(first_topic)

Topic_a=soup.find('p',class_='TextOption').find_all('a') # Get all topic's link by finding class=TextOption
Topic_urls = []

for u in Topic_a: # Store all index urls and topics in a list
    Topic_urls.append({'url':start_url+u.get('href'),'topic':u.text})
print(Topic_urls)

Topic:   Character Animation / Motion Analysis / Interaction Modelling / 3D Reconstruction / Action Recognition / Surface Modelling / Virtual Reality / Biometrics / Face Modelling / Crowd Modelling / Biomedical Engineering / Hand and Gesture / Robotics / Machine Learning / Topology Analysis
['', '\xa0\xa0\xa0Character Animation ']
Character Animation 
[{'url': 'https://community.dur.ac.uk/hubert.shum/comp42315/publicationfull_year_motionanalysis.htm', 'topic': 'Motion Analysis'}, {'url': 'https://community.dur.ac.uk/hubert.shum/comp42315/publicationfull_year_interactionmodelling.htm', 'topic': 'Interaction Modelling'}, {'url': 'https://community.dur.ac.uk/hubert.shum/comp42315/publicationfull_year_3dreconstruction.htm', 'topic': '3D Reconstruction'}, {'url': 'https://community.dur.ac.uk/hubert.shum/comp42315/publicationfull_year_actionrecognition.htm', 'topic': 'Action Recognition'}, {'url': 'https://community.dur.ac.uk/hubert.shum/comp42315/publicationfull_year_surfacemodelling.htm', 

In [7]:
# Getting all links for each topic page by finding all of the tags 'calss = w3-cell-middle'

def get_page_urls(page,topic):
    soup = BeautifulSoup(page, 'html.parser')
    tool_arry=soup.find_all('div',class_='w3-cell-middle')
    result = []
    
    for tls in tool_arry:
        tool_list = list(tls.strings)
        ar_name=tool_list[0].strip()
       
        spans = tls.find_all('span',class_='TextSmallDefault')
        
        all_span = spans[1].find_all('a') #Get the author's URL
        authors = []
        for a in all_span:
            name = a.string
            href = a.get('href')
            if href.find('http') == -1:
                href = start_url + href
            authors.append({'name':name,'url':href})
        
        all_span = spans[2].find_all('a') #Get the web buttons' URL
        re = {'topic':topic,'name':ar_name,'authors':authors}
#         print(all_span[0].text)

        if 'Webpage' in all_span[0].text:
            re['webpage'] = start_url + all_span[0].get('href')
        else:
            re['webpage']=''
        if 'DOI' in all_span[1].text:
            re['DOI'] = all_span[1].get('href')
        else:
            re['DOI'] = ''
        if 'YouTube' in all_span[2].text:
            re['YouTube'] = all_span[2].get('href')
        else:
            re['YouTube']=''
        print(re)
        result.append(re)
    return result

In [8]:
urls=get_page_urls(page,first_topic)

{'topic': 'Character Animation ', 'name': 'Spatio-temporal Manifold Learning for Human Motions via Long-horizon Modeling', 'authors': [{'name': 'He Wang', 'url': 'http://www.drhewang.com/'}, {'name': 'Edmond S. L. Ho', 'url': 'http://www.edho.net/'}, {'name': 'Hubert P. H. Shum', 'url': 'https://community.dur.ac.uk/hubert.shum/comp42315/index.htm'}, {'name': 'Zhanxing Zhu', 'url': 'http://english.math.pku.edu.cn/peoplefaculty/473.html'}], 'webpage': 'https://community.dur.ac.uk/hubert.shum/comp42315/pbl_tvcg2021motionsynthesis.htm', 'DOI': 'http://doi.org/10.1109/TVCG.2019.2936810', 'YouTube': 'https://www.youtube.com/watch?v=1eZxWkLj1lg'}
{'topic': 'Character Animation ', 'name': 'A Quadruple Diffusion Convolutional Recurrent Network for Human Motion Prediction', 'authors': [{'name': 'Qianhui Men', 'url': 'https://orcid.org/0000-0002-0059-5484'}, {'name': 'Edmond S. L. Ho', 'url': 'http://www.edho.net/'}, {'name': 'Hubert P. H. Shum', 'url': 'https://community.dur.ac.uk/hubert.shum/co

In [9]:
# Get all links for each topic page
all_urls = []
all_urls = all_urls + urls
for u in Topic_urls:
#     print(u)
    url = u['url']
    page = getHtmlText(url)
    if page !='':
        urls = get_page_urls(page,u['topic'])
        all_urls = all_urls + urls
    else:
        print('not get page',url)
    

{'topic': 'Motion Analysis', 'name': 'Interpreting Deep Learning based Cerebral Palsy Prediction with Channel Attention', 'authors': [{'name': 'Manli Zhu', 'url': 'https://researchportal.northumbria.ac.uk/en/persons/manli-zhu'}, {'name': 'Qianhui Men', 'url': 'https://orcid.org/0000-0002-0059-5484'}, {'name': 'Edmond S. L. Ho', 'url': 'http://www.edho.net/'}, {'name': 'Howard Leung', 'url': 'http://www.cs.cityu.edu.hk/~howard/'}, {'name': 'Hubert P. H. Shum', 'url': 'https://community.dur.ac.uk/hubert.shum/comp42315/index.htm'}], 'webpage': 'https://community.dur.ac.uk/hubert.shum/comp42315/pbl_bhi2021cerebralpalsy.htm', 'DOI': 'http://doi.org/10.1109/BHI50953.2021.9508619', 'YouTube': 'https://www.youtube.com/watch?v=sO1gg9pHGP4'}
{'topic': 'Motion Analysis', 'name': 'Interaction-based Human Activity Comparison', 'authors': [{'name': 'Yijun Shen', 'url': 'https://scholar.google.com/citations?user=eqtsGlUAAAAJ'}, {'name': 'Longzhi Yang', 'url': 'http://lyang.uk/'}, {'name': 'Edmond S. 

In [10]:
# Get all links for each topic page
all_urls = []
all_urls = all_urls + urls
for u in Topic_urls:
#     print(u)
    url = u['url']
    page = getHtmlText(url)
    if page !='':
        urls = get_page_urls(page,u['topic'])
        all_urls = all_urls + urls
    else:
        print('not get page',url)
    

{'topic': 'Motion Analysis', 'name': 'Interpreting Deep Learning based Cerebral Palsy Prediction with Channel Attention', 'authors': [{'name': 'Manli Zhu', 'url': 'https://researchportal.northumbria.ac.uk/en/persons/manli-zhu'}, {'name': 'Qianhui Men', 'url': 'https://orcid.org/0000-0002-0059-5484'}, {'name': 'Edmond S. L. Ho', 'url': 'http://www.edho.net/'}, {'name': 'Howard Leung', 'url': 'http://www.cs.cityu.edu.hk/~howard/'}, {'name': 'Hubert P. H. Shum', 'url': 'https://community.dur.ac.uk/hubert.shum/comp42315/index.htm'}], 'webpage': 'https://community.dur.ac.uk/hubert.shum/comp42315/pbl_bhi2021cerebralpalsy.htm', 'DOI': 'http://doi.org/10.1109/BHI50953.2021.9508619', 'YouTube': 'https://www.youtube.com/watch?v=sO1gg9pHGP4'}
{'topic': 'Motion Analysis', 'name': 'Interaction-based Human Activity Comparison', 'authors': [{'name': 'Yijun Shen', 'url': 'https://scholar.google.com/citations?user=eqtsGlUAAAAJ'}, {'name': 'Longzhi Yang', 'url': 'http://lyang.uk/'}, {'name': 'Edmond S. 

# Q2. crawl all the text-based information of each publication page

In [11]:
# crawl all the text-based information of each publication page
import time
all_title = []
all_text = []
all_topic = []

for webpage in all_urls:
    page = getHtmlText(webpage['webpage'])
    
    if page != '':
        soup = BeautifulSoup(page, 'html.parser')  
        #print(soup.prettify())
        
        txt_tags = soup.find(id='divBackground') 
        #print(txt_tags)
        
        full_txt =''.join(list(txt_tags.strings)) # Concatenating the returned array into a string
        #print(full_txt)
        
        all_title.append(webpage['name']) 
        all_text.append(full_txt.replace(u'\xa0',' ')) # Solve garbled characters
        all_topic.append(webpage['topic'])

    else:
        print('get none url:',webpage['webpage'])
    time.sleep(2) # Avoid network congestion
#     print(all_text)
    

In [30]:
import pandas as pd

# Store all text-based infomation into a csv file
dataframe = pd.DataFrame({'title':all_title,'text':all_text,'topic':all_topic})
dataframe.to_csv("all_text.csv",index=False,sep=',')

pd.read_csv('all_text.csv')

Unnamed: 0,title,text,topic
0,Coordinated Crowd Simulation with Topological ...,\n\n\nHOMEto discover\nPUBLICATIONSto innovate...,Topology Analysis
1,Topology Aware Data-Driven Inverse Kinematics,\n\n\nHOMEto discover\nPUBLICATIONSto innovate...,Topology Analysis
2,Motion Adaptation for Humanoid Robots in Const...,\n\n\nHOMEto discover\nPUBLICATIONSto innovate...,Topology Analysis
3,Interpreting Deep Learning based Cerebral Pals...,\n\n\nHOMEto discover\nPUBLICATIONSto innovate...,Motion Analysis
4,Interaction-based Human Activity Comparison,\n\n\nHOMEto discover\nPUBLICATIONSto innovate...,Motion Analysis
...,...,...,...
125,Towards Sparse Rule Base Generation for Fuzzy ...,\n\n\nHOMEto discover\nPUBLICATIONSto innovate...,Machine Learning
126,TSK Inference with Sparse Rule Bases,\n\n\nHOMEto discover\nPUBLICATIONSto innovate...,Machine Learning
127,Coordinated Crowd Simulation with Topological ...,\n\n\nHOMEto discover\nPUBLICATIONSto innovate...,Topology Analysis
128,Topology Aware Data-Driven Inverse Kinematics,\n\n\nHOMEto discover\nPUBLICATIONSto innovate...,Topology Analysis


# Q3. Find out the 100 most popular words

In [67]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
nltk.download('punkt')
print(stopwords.words('english'))

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

[nltk_data] Downloading package stopwords to
[nltk_data]     /home2/mdzr98/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home2/mdzr98/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [45]:
# Read the csv file obtained in Question 2
tetxdata = pd.read_csv('all_text.csv') #old data
now_data = textdata.copy(deep=True) #word data

In [71]:
# Stopword filter 
def fiter_stopword(words):
    result = []
    for w in words:
        x = w.lower() # Converts all uppercase characters in a string into lowercase characters 
        if x not in stopWords:
            result.append(x)
    return result

In [72]:
text_colu = now_data.columns[-2]
text_title = now_data.columns[0]

# Replace "[(),.?';:]" as ' ' to get all words
now_data[text_colu].replace(regex=r"[(),.?';:]",value=' ',inplace=True)
now_data[text_title].replace(regex=r"[(),.?';:]",value=' ',inplace=True)
all_word = []

# Create a replica index of the data
for x in now_data.index:
    full_text = now_data.loc[x, text_colu]
#     print(full_text)
    title = now_data.loc[x, text_title]
#     print(title)
    
    start = full_text.find('Abstract') + len('Abstract')
    end = full_text.find('Publication')
    
    abstract = full_text[start:end].strip() # removing spaces or specified characters at the start and end of a string
    abstract = fiter_stopword(abstract.split(' ')) # Splitting abstract into a list
    title = fiter_stopword(title.split(' ')) # Splitting title into a list
    
    all_word = all_word + abstract
    all_word = all_word + title


In [73]:
all_word = [x for x in all_word if x != '']
tool = [0 for x in all_word]
# print(tool)

# Unify word tenses and singular to ensure that words return to their original form
for x in all_word:
    for i in range(0,len(all_word)):
        len_x = len(x)
        if len_x<=2:
            continue
#         print(i)
        len_i = len(all_word[i])
        if x in all_word[i] and (len_i-len_x) >=1 and (len_i-len_x)<=2 and tool[i]==0:
            all_word[i] = x
            tool[i] = 1
word_dic = {}
# print('data-driven' in word_dic)

# Counting the frequency of word occurrences
for w in all_word:
    x = w in word_dic
    if x is False:
        word_dic[str(w)] = 1
#         print(word_dic)
    else:
#         print(str(word_dic[w]))
        word_dic[w] = word_dic[w] + 1

# Sort vocabulary frequency from largest to smallest
top_100 = sorted(word_dic.items(), key = lambda kv:(kv[1], kv[0]),reverse=True)[:100]
print(top_100)


[('propose', 225), ('motion', 177), ('method', 166), ('use', 159), ('system', 149), ('feature', 132), ('using', 125), ('hand', 111), ('learning', 110), ('result', 109), ('interaction', 105), ('data', 99), ('image', 97), ('joint', 93), ('per', 91), ('human', 90), ('base', 90), ('character', 83), ('show', 82), ('environment', 81), ('3d', 79), ('algorithm', 77), ('formation', 75), ('application', 75), ('movement', 73), ('new', 72), ('control', 67), ('approach', 67), ('experiment', 63), ('pose', 62), ('crowd', 62), ('posture', 61), ('network', 60), ('different', 59), ('framework', 58), ('shape', 56), ('graph', 56), ('problem', 55), ('object', 55), ('kinect', 54), ('generate', 54), ('also', 53), ('high', 52), ('database', 52), ('effective', 51), ('deep', 51), ('body', 50), ('work', 49), ('however', 49), ('game', 49), ('car', 49), ('rule', 48), ('set', 47), ('depth', 47), ('facial', 46), ('performance', 45), ('one', 45), ('action', 45), ('accuracy', 45), ('scene', 44), ('occlusion', 44), ('c

# Q4. Data analysis and visualization for author collaboration

In [55]:
# Creating a replica index of the data
author_data = textdata.copy(deep=True)
author_dic = {}

In [56]:
for x in author_data.index:
    
#  Locating the full text from the text file     
    full_text = author_data.loc[x, text_colu]
#     print(full_text)

# Locating titles from the text file
    title = author_data.loc[x, text_title]
#     print(title)

# Extracting the position of text content that contains author information    
    start = full_text.find('Publication') + len('Publication')
#     print(start)
    end = full_text.find('Downloads')
#     print(end)
    Publication = full_text[start:end].strip()
#     print(Publication)

# Munging the extrated contents  
    Publication = Publication.split('\t')[0].replace('and',',').split(',')[:-1]
#     print(Publication) 

# Establishing correspondence between authors and articles
    for p in Publication:
        p = p.strip()
        if p !="":
            x = p in author_dic
            if x is False:
                author_dic[p] = []
            author_dic[p].append(title)


In [57]:
keys = list(author_dic.keys())
# print(keys)
# print(len(keys))

# Building collaborations among authors
targets = []
for i in range(0,len(keys)):
    for k2 in author_dic[keys[i]]:
        for j in range(i,len(keys)):
            if i != j:
                if k2 in author_dic[keys[j]]:
                    targets.append([keys[i],keys[j]])
print(targets)


df = pd.DataFrame(targets)
df['count'] = df.groupby([0,1])[0].transform('count')
# print(counts)
# df['count'] = df[].map(counts)

df = df.drop_duplicates()
df.columns=['source','target','value'] 

print(df)
                    

[['Adam Barnett', 'Hubert P. H. Shum'], ['Adam Barnett', 'Taku Komura'], ['Adam Barnett', 'Hubert P. H. Shum'], ['Adam Barnett', 'Taku Komura'], ['Adam Barnett', 'Hubert P. H. Shum'], ['Adam Barnett', 'Taku Komura'], ['Hubert P. H. Shum', 'Taku Komura'], ['Hubert P. H. Shum', 'Edmond S. L. Ho'], ['Hubert P. H. Shum', 'Yiu-ming Cheung'], ['Hubert P. H. Shum', 'P. C. Yuen'], ['Hubert P. H. Shum', 'Edmond S. L. Ho'], ['Hubert P. H. Shum', 'Edmond S. L. Ho'], ['Hubert P. H. Shum', 'Manli Zhu'], ['Hubert P. H. Shum', 'Qianhui Men'], ['Hubert P. H. Shum', 'Howard Leung'], ['Hubert P. H. Shum', 'Edmond S. L. Ho'], ['Hubert P. H. Shum', 'Yijun Shen'], ['Hubert P. H. Shum', 'Longzhi Yang'], ['Hubert P. H. Shum', 'Edmond S. L. Ho'], ['Hubert P. H. Shum', 'Kevin D. McCay'], ['Hubert P. H. Shum', 'Gerhard Fehringer'], ['Hubert P. H. Shum', 'Claire Marcroft'], ['Hubert P. H. Shum', 'Nicholas Embleton'], ['Hubert P. H. Shum', 'Edmond S. L. Ho'], ['Hubert P. H. Shum', 'Jake Hall'], ['Hubert P. H. Shu

In [58]:
# Import Required Module for visualisation 
import numpy as np
from bokeh.models import BasicTicker, ColorBar, LinearColorMapper, PrintfTickFormatter
from bokeh.plotting import figure
from bokeh.io import output_notebook, show
output_notebook()

s = list(set(df['target'].values))
s2 = list(set(df['source'].values))
s3 = list(set(s + s2))
names = s3

nodes = []
for n in names:
    nodes.append({'name':n,'group':1})

N = len(nodes)
counts = np.zeros((N, N))

for link in df.values:

    counts[names.index(link[0]), names.index(link[1])] = link[-1]
    counts[names.index(link[1]), names.index(link[0])] = link[-1]

colormap = ["#444444", "#a6cee3", "#1f78b4", "#b2df8a", "#33a02c", "#fb9a99",
            "#e31a1c", "#fdbf6f", "#ff7f00", "#cab2d6", "#6a3d9a"]

xname = []
yname = []
color = []
alpha = []

for i, node1 in enumerate(nodes):
    for j, node2 in enumerate(nodes):
        xname.append(node1['name'])
        yname.append(node2['name'])

        alpha.append(min(counts[i,j]/4.0, 0.9) + 0.1)

        if node1['group'] == node2['group']:
            color.append(colormap[node1['group']])
        else:
            color.append('lightgrey')

data=dict(
    xname=xname,
    yname=yname,
    colors=color,
    alphas=alpha,
    count=counts.flatten(),
)
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
p = figure(
           x_axis_location="above", tools=TOOLS,
           x_range=list(reversed(names)), y_range=names,
           tooltips = [('names', '@yname, @xname'), ('count', '@count')])

p.width = 800
p.height = 800
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "7px"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = np.pi/3

p.rect('xname', 'yname', 0.9, 0.9, source=data,
       color='colors', alpha='alphas', line_color=None,
       hover_line_color='black', hover_color='colors')

show(p)

# Q5. Analysing how the features of a publication would affect its “citation”

In [59]:
# Creating a replica index of the data
citation = textdata.copy(deep=True)
topics = citation.columns[-1]
print(citation['topic'])

0      Topology Analysis
1      Topology Analysis
2      Topology Analysis
3        Motion Analysis
4        Motion Analysis
             ...        
125     Machine Learning
126     Machine Learning
127    Topology Analysis
128    Topology Analysis
129    Topology Analysis
Name: topic, Length: 130, dtype: object


In [60]:
impact_factor = []

#  Locating the full text 
for x in citation.index:
    full_text = citation.loc[x, text_colu]
    title = citation.loc[x, text_title]
    topic = citation.loc[x, topics]

    #  Defining the scope of impact factor    
    start = full_text.find('Publication') + len('Publication')
    end = full_text.find('Downloads')

    #  Locating the citation    
    Publication = full_text[start:end]
    tool_arry = Publication.split('\n')

    # Extracting the year of publications
    year = int(tool_arry[4][-4:])
#     print(year)

    Impact_Factor = tool_arry[5]    
    Impact_Factor = Impact_Factor.split('#')
#     print(Impact_Factor,len(Impact_Factor))
    
    impact_value = 0
    cita_value = 0

    # Finding Determine whether the impact factor exists
    if Impact_Factor[0] == '  ':
        continue
    if Impact_Factor[0].find('Impact Factor') !=-1:
        impact_pos = Impact_Factor[0].find('Impact Factor') + len('Impact Factor:')
#         print(impact_pos)
        impact_value = Impact_Factor[0][impact_pos :].strip()
#         print(impact_value)
        if len(Impact_Factor)>=2 and Impact_Factor[1].find('Citation') !=-1:
            cita_pos = Impact_Factor[1].find('Citation')+ len('Citation:')
#             print(cita_pos)
            cita_value = Impact_Factor[1][cita_pos :].strip()
#             print(cita_value)
    else:
        if Impact_Factor[0].find('Citation') !=-1:
            cita_pos = Impact_Factor[0].find('Citation')+ len('Citation:')
            cita_value = Impact_Factor[0][cita_pos :].strip()
#     print(len(Impact_Factor))

    start = full_text.find('Publication') + len('Publication')
    end = full_text.find('Downloads')
    Publication = full_text[start:end].strip()    
#     print(Publication)
    Publication = Publication.split('\t')[0].replace('and',',').split(',')[:-1]
    impact_factor.append({'Impact Factor':float(impact_value),'Citation':float(cita_value),
                          'Year':year,'First Author':Publication[0],'topic':topic})

In [61]:
impact_cita = pd.DataFrame(impact_factor)

# impact_cita.sort_values(by='Citation')
print(impact_cita)

     Impact Factor  Citation  Year      First Author              topic
0            2.078      41.0  2016      Adam Barnett  Topology Analysis
1            2.078      26.0  2013   Edmond S. L. Ho  Topology Analysis
2            0.000      31.0  2013  Edmond S. L. Ho   Topology Analysis
3            0.000       1.0  2021         Manli Zhu    Motion Analysis
4            4.579      21.0  2020        Yijun Shen    Motion Analysis
..             ...       ...   ...               ...                ...
110          0.000      14.0  2016           Yao Tan   Machine Learning
111          0.000      18.0  2016            Jie Li   Machine Learning
112          2.078      41.0  2016      Adam Barnett  Topology Analysis
113          2.078      26.0  2013   Edmond S. L. Ho  Topology Analysis
114          0.000      31.0  2013  Edmond S. L. Ho   Topology Analysis

[115 rows x 5 columns]


In [62]:

p = figure(tooltips = [('Impact Factor', '@x'), ('Citation', '@y')])
p.xaxis.axis_label = 'Impact Factor'
p.yaxis.axis_label = 'Citation'

p.scatter(impact_cita["Impact Factor"], impact_cita["Citation"],fill_alpha=0.2, size=10)

show(p)
# fig2 = px.scatter(impact_cita, x="Impact Factor", y="Citation", color="Year")
# fig2.show()

In [63]:
p = figure(tooltips = [('Year', '@x'), ('Citation', '@y'),('Impact Factor','@size')])
p.xaxis.axis_label = 'Year'
p.yaxis.axis_label = 'Citation'

p.scatter(impact_cita["Year"], impact_cita["Citation"],fill_alpha=0.2, size=impact_cita["Impact Factor"])

show(p)
# fig3 = px.scatter(impact_cita, x="First Author", y="Citation", color="Year")
# fig3.show()

In [64]:
# fig3 = px.scatter(impact_cita, x="Year", y="Citation",size="Impact Factor")
# fig3.show()
aou=list(set(impact_cita['First Author'].values))

p = figure(width=800,x_range=aou,tooltips = [('First Author', '@x'), ('Citation', '@y'),('Impact Factor','@size')])
p.xaxis.axis_label = 'First Author'
p.yaxis.axis_label = 'Citation'
p.xaxis.major_label_orientation = 3.14/4
p.scatter(impact_cita["First Author"], impact_cita["Citation"],fill_alpha=0.2, size=impact_cita["Impact Factor"])

show(p)

In [65]:
aou=list(set(impact_cita['topic'].values))

p = figure(width=900,x_range=aou,tooltips = [('topic', '@x'), ('Citation', '@y'),('Impact Factor','@size')])
p.xaxis.axis_label = 'topic'
p.yaxis.axis_label = 'Citation'
p.xaxis.major_label_orientation = 3.14/4
p.scatter(impact_cita["topic"], impact_cita["Citation"],fill_alpha=0.2, size=impact_cita["Impact Factor"])

show(p)
# fig4 = px.scatter(impact_cita, x="Year", y="Citation",size="Impact Factor",color='topic')
# fig4.show()