In [63]:
import itertools
import requests
import pandas as pd
import networkx as nx
import operator
from functools import reduce
from collections import Counter

In [64]:
YEAR = 2021

In [103]:
def get_topics_for_coauthor(author_id):
  print(author_id)
  query = requests.get(f"https://api.semanticscholar.org/v1/author/{author_id}").json()
  recent_papers = list(filter(lambda x: (x['year'] if x['year'] is not None else 0) >= YEAR, query['papers']))
  paper_queries = [requests.get(f"https://api.semanticscholar.org/v1/paper/URL:{x['url']}").json() for x in recent_papers]
  topics = [n['topics'] for n in paper_queries]
  return Counter([x['topic'] for x in reduce(operator.concat, topics)]).most_common()

In [104]:
def get_coauthors_as_edge_list(author_id):
  # Get the original author's profile
  query = requests.get(f"https://api.semanticscholar.org/v1/author/{author_id}").json()
  original_author = {'authorId': query['authorId'], 'name': query['name'], 'url': query['url']}
  # Make a DataFrame of the original author's papers
  papers = pd.DataFrame(query['papers'])
  recent_papers = papers[papers['year'] >= YEAR]
  # Query for the actual papers where year >= YEAR
  specific_papers = list(map(lambda url: requests.get(f"https://api.semanticscholar.org/v1/paper/URL:{url}"), recent_papers['url']))
  # Convert to a list of JSONs
  specific_paper_jsons = list(map(lambda x: x.json(), specific_papers))
  # Get coauthors
  coauthors = [specific_paper_json['authors'] for specific_paper_json in specific_paper_jsons]
  coauthors = list(itertools.chain.from_iterable(coauthors))
  edge_list = [(original_author['name'], coauthor['name']) for coauthor in coauthors]
  edge_list_with_ids = [(original_author['authorId'], coauthor['authorId']) for coauthor in coauthors]
  return edge_list, edge_list_with_ids

In [105]:
_, edge_list_with_ids = get_coauthors_as_edge_list(1697232)

In [106]:
coauthors = list(pd.Series([x[1] for x in edge_list_with_ids if x[1] is not None]).unique())

In [107]:
coauthor_topics = {x: get_topics_for_coauthor(x) for x in coauthors}

9725200
1697232
13700002
102814355
46867502
46584367
40031488
80266867
49605911
1724815
143784081
1714602
66807781
2072740551
11218514
2080136131
65855568
2045178324
2528276
145157784
2914608


In [108]:
coauthor_topics

{'102814355': [],
 '11218514': [],
 '13700002': [],
 '143784081': [],
 '145157784': [('Experience', 1),
  ('Screenshot', 1),
  ('Digital media', 1),
  ('End-to-end principle', 1),
  ('End system', 1),
  ('Theory', 1),
  ('Categories', 1)],
 '1697232': [],
 '1714602': [('Deep learning', 2),
  ('Artificial neural network', 2),
  ('Image segmentation', 1),
  ('Semiconductor industry', 1),
  ('Semi-supervised learning', 1),
  ('Tracer', 1),
  ('biologic segmentation', 1),
  ('Medical image computing', 1),
  ('Pixel', 1),
  ('Retina', 1),
  ('Convolutional neural network', 1),
  ('Liver diseases', 1),
  ('Optic Disk', 1),
  ('CT scan', 1),
  ('Medical imaging', 1),
  ('Scanning', 1),
  ('Neural Network Simulation', 1),
  ('Dermoscopy', 1),
  ('Glaucoma', 1),
  ('Matrix regularization', 1),
  ('Neoplasms', 1),
  ('Eye', 1),
  ('Skin Physiological Phenomena', 1),
  ('Silo (dataset)', 1),
  ('Computation (action)', 1),
  ('hearing impairment', 1),
  ('Object detection', 1),
  ('Sensor', 1),
  