In [1]:
concepts = {
    "Time Series": "https://api.openalex.org/works?filter=concepts.id:C151406439",
    "Probabilistic forecasting": "https://api.openalex.org/works?filter=concepts.id:C122282355",
    "Demand forecasting": "https://api.openalex.org/works?filter=concepts.id:C193809577"
}
unparsed_works_ids = {
    "Time Series": "C151406439",
    "Probabilistic forecasting": "C122282355",
    "Demand forecasting": "C193809577"
}
unparsed_works_dict = {
    "Time Series": [],
    "Probabilistic forecasting": [],
    "Demand forecasting": []
}

In [2]:
import requests

import seaborn as sns
import pandas as pd
pd.set_option('display.max_colwidth', 300)

In [3]:
CONCEPT_ID = "C122282355"
YEAR = 2021
cited_by_count = 10

In [4]:
TEMPLATE = "https://api.openalex.org/works?filter=concepts.id:{concept_id},publication_year:{publication_year},cited_by_count:>{cited_by_count}&per-page=50&page={page_id}"

In [5]:
responce = requests.get(
    TEMPLATE.format(
        concept_id=CONCEPT_ID,
        publication_year=YEAR,
        cited_by_count=cited_by_count
    )
)

KeyError: 'page_id'

In [6]:
from typing import Any, List, Optional

from pydantic import BaseModel

class Author(BaseModel):
    id: str
    display_name: str
    orcid: Optional[str]

class Institution(BaseModel):
    id: Optional[str]
    display_name: Optional[str]
    country_code: Optional[str]

class Authorship(BaseModel):
    author: Author
    institutions: List[Institution]
    author_position: str

class Concept(BaseModel):
    id: str
    display_name: str
    score: Optional[float]
    level: int
    wikidata: str

class Work(BaseModel):
    id: str
    display_name: str
    publication_date: str
    relevance_score: Optional[float]
    authorships: List[Authorship]
    concepts: List[Concept]
    cited_by_count: int
    publication_year: int
    cited_by_api_url: List[str]
    doi: Optional[str]
    referenced_works: List[str]
    related_works: List[str]
    abstract_inverted_index: Optional[dict]

In [8]:
for concept_id in unparsed_works_ids:
    page_id = 1
    while True:
        template = TEMPLATE.format(concept_id=unparsed_works_ids[concept_id], publication_year=YEAR, cited_by_count=cited_by_count, page_id=page_id)
        print(template)
        responce = requests.get(template).json()
        unparsed_works_dict[concept_id] += responce["results"]
        print(len(responce["results"]))
        if len(responce["results"]) == 0:
            break
        else:
            page_id += 1

https://api.openalex.org/works?filter=concepts.id:C151406439,publication_year:2021,cited_by_count:>10&per-page=50&page=1
50
https://api.openalex.org/works?filter=concepts.id:C151406439,publication_year:2021,cited_by_count:>10&per-page=50&page=2
50
https://api.openalex.org/works?filter=concepts.id:C151406439,publication_year:2021,cited_by_count:>10&per-page=50&page=3
12
https://api.openalex.org/works?filter=concepts.id:C151406439,publication_year:2021,cited_by_count:>10&per-page=50&page=4
0
https://api.openalex.org/works?filter=concepts.id:C122282355,publication_year:2021,cited_by_count:>10&per-page=50&page=1
23
https://api.openalex.org/works?filter=concepts.id:C122282355,publication_year:2021,cited_by_count:>10&per-page=50&page=2
0
https://api.openalex.org/works?filter=concepts.id:C193809577,publication_year:2021,cited_by_count:>10&per-page=50&page=1
22
https://api.openalex.org/works?filter=concepts.id:C193809577,publication_year:2021,cited_by_count:>10&per-page=50&page=2
0


In [9]:
parsed_works = dict()
for concept_id in unparsed_works_dict:
    parsed_works[concept_id] = [Work(**i) for i in unparsed_works_dict[concept_id]]

In [10]:
for i in parsed_works:
    print(f"{i}: {len(unparsed_works_dict[i])}")

Time Series: 112
Probabilistic forecasting: 23
Demand forecasting: 22


In [11]:
parsed_works_with_index = dict()

In [12]:
import tqdm

In [13]:
for concept_id in parsed_works:
    for work in tqdm.tqdm(parsed_works[concept_id]):
        if work.id not in parsed_works_with_index:
            url_api = work.id[:8] + "api." + work.id[8:]
            parsed_works_with_index[work.id] = Work(**requests.get(url_api).json())

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 112/112 [16:11<00:00,  8.67s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [02:05<00:00,  5.46s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 22/22 [02:13<00:00,  6.05s/it]


In [14]:
df = pd.DataFrame([val.dict() for i, val in parsed_works_with_index.items()])

In [15]:
df.head()

Unnamed: 0,id,display_name,publication_date,relevance_score,authorships,concepts,cited_by_count,publication_year,cited_by_api_url,doi,referenced_works,related_works,abstract_inverted_index
0,https://openalex.org/W3167954679,A guide to state-space modeling of ecological time series,2021-11-01,,"[{'author': {'id': 'https://openalex.org/A2398706194', 'display_name': 'Marie Auger-Méthé', 'orcid': 'https://orcid.org/0000-0003-3550-4930'}, 'institutions': [{'id': 'https://openalex.org/I141945490', 'display_name': 'University of British Columbia', 'country_code': 'CA'}], 'author_position': '...","[{'id': 'https://openalex.org/C18903297', 'display_name': 'Ecology', 'score': 0.50035, 'level': 1, 'wikidata': 'https://www.wikidata.org/wiki/Q7150'}, {'id': 'https://openalex.org/C143724316', 'display_name': 'Series (stratigraphy)', 'score': 0.475543, 'level': 2, 'wikidata': 'https://www.wikida...",12,2021,[https://api.openalex.org/works?filter=cites:W3167954679],https://doi.org/10.1002/ecm.1470,"[https://openalex.org/W1965175390, https://openalex.org/W2035636354, https://openalex.org/W2098767549, https://openalex.org/W2124078578, https://openalex.org/W1976253647, https://openalex.org/W2049228615, https://openalex.org/W2139606141, https://openalex.org/W2155702894, https://openalex.org/W2...","[https://openalex.org/W3004555295, https://openalex.org/W2884241775, https://openalex.org/W3138627724, https://openalex.org/W3109498698, https://openalex.org/W2499390929, https://openalex.org/W2914721251, https://openalex.org/W2416768583, https://openalex.org/W3097013233, https://openalex.org/W2...","{'State–space': [0], 'models': [1, 15, 193], '(SSMs)': [2], 'are': [3, 16, 29, 40, 44, 169], 'an': [4, 239, 259], 'important': [5], 'modeling': [6], 'framework': [7], 'for': [8, 163, 228], 'analyzing': [9], 'ecological': [10, 37, 53, 110, 232], 'time': [11], 'series.': [12], 'These': [13], 'hier..."
1,https://openalex.org/W3015378124,Adversarial Attacks on Time Series,2021-10-01,,"[{'author': {'id': 'https://openalex.org/A2229077872', 'display_name': 'Fazle Karim', 'orcid': 'https://orcid.org/0000-0002-7998-8546'}, 'institutions': [{'id': 'https://openalex.org/I39422238', 'display_name': 'University of Illinois at Chicago', 'country_code': 'US'}], 'author_position': 'firs...","[{'id': 'https://openalex.org/C41008148', 'display_name': 'Computer science', 'score': 0.724428, 'level': 0, 'wikidata': 'https://www.wikidata.org/wiki/Q21198'}, {'id': 'https://openalex.org/C37736160', 'display_name': 'Adversarial system', 'score': 0.641261, 'level': 2, 'wikidata': 'https://www...",24,2021,[https://api.openalex.org/works?filter=cites:W3015378124],https://doi.org/10.1109/tpami.2020.2986319,"[https://openalex.org/W2551393996, https://openalex.org/W2754051771, https://openalex.org/W2039260438, https://openalex.org/W2402972623, https://openalex.org/W2962718684, https://openalex.org/W2787615497, https://openalex.org/W2892035503, https://openalex.org/W2964253222, https://openalex.org/W2...","[https://openalex.org/W2460937040, https://openalex.org/W3103075267, https://openalex.org/W2963857521, https://openalex.org/W2556782416, https://openalex.org/W2243397390, https://openalex.org/W3131164743, https://openalex.org/W3100485466, https://openalex.org/W2975486888, https://openalex.org/W2...","{'Time': [0], 'series': [1, 54, 80, 172], 'classification': [2, 55, 62, 81, 173], 'models': [3, 121, 174], 'have': [4], 'been': [5, 18], 'garnering': [6], 'significant': [7], 'importance': [8], 'in': [9], 'the': [10, 61, 73, 76, 137, 158], 'research': [11, 16], 'community.': [12], 'However,': [1..."
2,https://openalex.org/W2888954762,"Hidden Markov Models for Time Series: An Introduction Using R, Second Edition",2021-09-30,,"[{'author': {'id': 'https://openalex.org/A2213794972', 'display_name': 'Walter Zucchini', 'orcid': None}, 'institutions': [], 'author_position': 'first'}, {'author': {'id': 'https://openalex.org/A2085390103', 'display_name': 'Iain L. MacDonald', 'orcid': 'https://orcid.org/0000-0001-6433-2707'},...","[{'id': 'https://openalex.org/C143724316', 'display_name': 'Series (stratigraphy)', 'score': 0.617789, 'level': 2, 'wikidata': 'https://www.wikidata.org/wiki/Q312468'}, {'id': 'https://openalex.org/C41008148', 'display_name': 'Computer science', 'score': 0.555022, 'level': 0, 'wikidata': 'https:...",28,2021,[https://api.openalex.org/works?filter=cites:W2888954762],,[],"[https://openalex.org/W2999872301, https://openalex.org/W2341959691, https://openalex.org/W3119426301, https://openalex.org/W2485874359, https://openalex.org/W2617619904, https://openalex.org/W2547039301, https://openalex.org/W138349194, https://openalex.org/W1882651929, https://openalex.org/W42...",
3,https://openalex.org/W1580469992,Forecasting Financial Markets Using Neural Networks An Analysis of Methods and Accuracy,2021-08-16,,"[{'author': {'id': 'https://openalex.org/A287426837', 'display_name': 'Jason E. Kutsurelis', 'orcid': None}, 'institutions': [], 'author_position': 'first'}]","[{'id': 'https://openalex.org/C50644808', 'display_name': 'Artificial neural network', 'score': 0.575266, 'level': 2, 'wikidata': 'https://www.wikidata.org/wiki/Q192776'}, {'id': 'https://openalex.org/C41008148', 'display_name': 'Computer science', 'score': 0.447679, 'level': 0, 'wikidata': 'htt...",39,2021,[https://api.openalex.org/works?filter=cites:W1580469992],,"[https://openalex.org/W1669375728, https://openalex.org/W2124776405, https://openalex.org/W1526844615, https://openalex.org/W1589922412, https://openalex.org/W1521061969, https://openalex.org/W1492620359, https://openalex.org/W1582318580, https://openalex.org/W1587239851, https://openalex.org/W1...","[https://openalex.org/W2573444250, https://openalex.org/W2275717981, https://openalex.org/W2104846125, https://openalex.org/W2139997280, https://openalex.org/W2183616920, https://openalex.org/W3113190877, https://openalex.org/W3194426173, https://openalex.org/W1995456904, https://openalex.org/W2...","{'Abstract': [0], ':': [1], 'This': [2, 83, 101], 'research': [3, 65, 102], 'examines': [4], 'and': [5, 69, 108, 127], 'analyzes': [6], 'the': [7, 44, 47, 67, 80, 87, 104, 110, 138, 148, 158, 164], 'use': [8, 165], 'of': [9, 25, 46, 71, 106, 112, 122, 132, 166], 'neural': [10, 18, 61, 73, 114, 1..."
4,https://openalex.org/W3191026187,Time Series Data Augmentation for Deep Learning: A Survey.,2021-08-09,,"[{'author': {'id': 'https://openalex.org/A2141035074', 'display_name': 'Qingsong Wen', 'orcid': None}, 'institutions': [{'id': 'https://openalex.org/I45928872', 'display_name': 'Alibaba Group', 'country_code': 'CN'}], 'author_position': 'first'}, {'author': {'id': 'https://openalex.org/A32107449...","[{'id': 'https://openalex.org/C41008148', 'display_name': 'Computer science', 'score': 0.745651, 'level': 0, 'wikidata': 'https://www.wikidata.org/wiki/Q21198'}, {'id': 'https://openalex.org/C108583219', 'display_name': 'Deep learning', 'score': 0.558097, 'level': 2, 'wikidata': 'https://www.wik...",14,2021,[https://api.openalex.org/works?filter=cites:W3191026187],https://doi.org/10.24963/ijcai.2021/631,[],"[https://openalex.org/W2496879562, https://openalex.org/W3033757695, https://openalex.org/W2975792400, https://openalex.org/W2965323200, https://openalex.org/W3044203329, https://openalex.org/W2922386288, https://openalex.org/W3176197839, https://openalex.org/W2515550864, https://openalex.org/W3...","{'Deep': [0], 'learning': [1, 80], 'performs': [2], 'remarkably': [3], 'well': [4], 'on': [5, 21, 82], 'many': [6, 36], 'time': [7, 38, 49, 83, 97, 134], 'series': [8, 39, 50, 84, 135], 'analysis': [9], 'tasks': [10, 132], 'recently.': [11], 'The': [12], 'superior': [13], 'performance': [14], 'o..."


In [16]:
def index_to_abstract(index):
    if index is None:
        return
    max_len = -1
    for i, val in index.items():
        max_len = max(max_len, max(val))
    
    abstract_list = ["" for i in range(max_len+1)]
    for i, val in index.items():
        for j in val:
            abstract_list[j] = i
    
    return " ".join(abstract_list).lower()

In [19]:
df["summary"] = df.abstract_inverted_index.apply(index_to_abstract)
df["title"] = df["display_name"]

## Top 20 papers in Time Series in 2021 year

In [20]:
df.sort_values(by="cited_by_count", ascending=False)[["title", "cited_by_count"]].head(20)

Unnamed: 0,title,cited_by_count
100,Recurrent Neural Networks for Time Series Forecasting: Current status and future directions,147
33,Time-series forecasting with deep learning: a survey.,57
95,A novel displacement prediction method using gated recurrent unit model with time series analysis in the Erdaohe landslide,54
127,Computationally efficient forecasting procedures for Kuhn-Tucker consumer demand model systems: Application to residential energy consumption analysis,53
71,Deep Learning for Time Series Forecasting: A Survey.,43
77,LPG consumption and environmental Kuznets curve hypothesis in South Asia: a time-series ARDL analysis with multiple structural breaks,40
3,Forecasting Financial Markets Using Neural Networks An Analysis of Methods and Accuracy,39
65,"A combined forecasting system based on statistical method, artificial neural networks, and deep learning methods for short-term wind speed forecasting",38
16,Spatial prediction of COVID-19 epidemic using ARIMA techniques in India.,37
9,An empirical survey of data augmentation for time series classification with neural networks.,37


In [22]:
from utils import pipeline_normalize
from nltk import ngrams

In [23]:
df = (
    df
    .pipe(lambda x: x.assign(parsed_title = x.title.apply(pipeline_normalize)))
    .pipe(lambda x: x.assign(parsed_summary = x.summary.apply(pipeline_normalize)))
    .pipe(lambda x: x.assign(parsed_title_2gram = x.parsed_title.apply(lambda x: list(ngrams(x, 2)))))
    .pipe(lambda x: x.assign(parsed_summary_2gram = x.parsed_summary.apply(lambda x: list(ngrams(x, 2)))))
)

In [24]:
df.parsed_summary_2gram.explode().value_counts().head(20)

(time, seri)         169
(neural, network)     72
(wind, speed)         63
(short, term)         62
(deep, learn)         40
(long, short)         35
(term, memori)        34
(machin, learn)       26
(speed, forecast)     26
(forecast, model)     26
(propos, method)      24
(predict, model)      23
(seri, data)          23
(propos, model)       22
(result, show)        20
(data, set)           20
(real, world)         19
(land, cover)         19
(seri, forecast)      18
(wind, power)         17
Name: parsed_summary_2gram, dtype: int64

In [25]:
df.parsed_title_2gram.explode().value_counts().head(20)

(time, seri)            62
(short, term)           23
(neural, network)       23
(deep, learn)           17
(seri, forecast)        12
(wind, speed)           12
(speed, forecast)       11
(forecast, model)        9
(term, wind)             8
(machin, learn)          7
(recurr, neural)         7
(seri, analysi)          6
(term, memori)           6
(long, short)            6
(seri, data)             5
(model, short)           5
(learn, approach)        4
(autoregress, model)     4
(model, use)             4
(artifici, neural)       4
Name: parsed_title_2gram, dtype: int64

## Top 20 papers in Time Series in 2021 year with `neural-network` bigram

In [26]:
(
    df
    .pipe(lambda x: x[lambda y: y.parsed_summary_2gram.apply(lambda z: ("neural", "network") in z)])
    .sort_values(by="cited_by_count", ascending=False)[["title", "cited_by_count"]]
)

Unnamed: 0,title,cited_by_count
100,Recurrent Neural Networks for Time Series Forecasting: Current status and future directions,147
71,Deep Learning for Time Series Forecasting: A Survey.,43
3,Forecasting Financial Markets Using Neural Networks An Analysis of Methods and Accuracy,39
60,Deep-Convolution-Based LSTM Network for Remaining Useful Life Prediction,37
9,An empirical survey of data augmentation for time series classification with neural networks.,37
52,A time series-based statistical approach for outbreak spread forecasting: Application of COVID-19 in Greece,29
68,Short-term wind speed forecasting using recurrent neural networks with error correction,29
22,"Multi-hour and multi-site air quality index forecasting in Beijing using CNN, LSTM, CNN-LSTM, and spatiotemporal clustering",27
108,Evaluating the performances of several artificial intelligence methods in forecasting daily streamflow time series for sustainable water resources management,26
64,An Experimental Review on Deep Learning Architectures for Time Series Forecasting.,25
