In [1]:
import requests
import json
import pandas as pd
import config

In [2]:
pubs = pd.read_csv(f'{config.project_path}/tables/topics_publications.csv')

In [3]:
topics = pubs.drop_duplicates('Topic number')[['Topic number', 'Topic name',
                                               'Topic Cluster number', 'Topic Cluster name']]

In [4]:
topics = topics[~pd.isna(topics['Topic number'])]

In [5]:
topics.loc[:, 'Topic number'] = topics['Topic number'].astype(int)

In [6]:
relations = {}

In [8]:
base_url = 'https://api.elsevier.com/analytics/scival/topic/metrics'
for topic_id in topics['Topic number']:
    params = {'topicIds': topic_id,
              'metricTypes': 'relatedTopics',
              'insttoken': config.elsevier_instkey,
              'apiKey': config.elsevier_apikey}
    headers = {'Accept': 'application/json'}
    result = requests.get(base_url, params=params, headers=headers)
    if result.status_code == 200:
        relations[int(topic_id)] = result.json()

In [11]:
# temp storage
with open('data/relations.json', 'w') as f:
    f.write(json.dumps(relations))

In [14]:
relation_scores = {}
for topic_id in relations:
    relation = relations[topic_id]
    for related_topic in relation['results'][0]['metrics'][0]['values']:
        related_topic_id = related_topic['topic']['id']
        score = related_topic['relationScore']
        if topic_id not in relation_scores:
            relation_scores[topic_id] = {}
        relation_scores[topic_id][related_topic_id] = score

In [18]:
# are the topics in our set also related?
relatedness = []
for topic_id in topics['Topic number']:
    for related_topic_id in topics['Topic number']:
        if related_topic_id in relation_scores[topic_id]:
            relatedness.append({'topic_id': topic_id,
                                'related_topic_id': related_topic_id,
                                'topic_name': topics[topics['Topic number']==topic_id]['Topic name'].iloc[0],
                                'related_topic_name': topics[topics['Topic number']==related_topic_id]['Topic name'].iloc[0],
                                'score': relation_scores[topic_id][related_topic_id]})

In [19]:
df = pd.DataFrame(relatedness)

In [21]:
df.to_csv(f'{config.project_path}/relations.csv', index=False)