In [1]:
import pandas as pd
import requests
from itertools import chain

In [None]:
#API keys
api_springer = "" #full text for open-access articles in XML format (api.springernature.com/openaccess/jats/doi/[DOI]?api_key=[API])
api_elsevier = "" #full text for any article I have access to in XML format (https://api.elsevier.com/content/article/doi/[DOI]?APIKey=[API]?view=FULL)
api_wiley = "" #full text, but PDF download only.
api_gpt = ""
#BioC API: offers PMC OA and Author Manuscript Collection in XML format via PubMed ID or PMC ID (https://www.ncbi.nlm.nih.gov/research/bionlp/RESTful/pmcoa.cgi/BioC_[format]/[ID]/[encoding])
api_semanticscholar = ""

In [3]:
# I can filter using multiple ORCIDs by using the '|' operator. (Up to 50 values)

def build_author_works_url(id_list):
    # specify endpoint
    endpoint = 'works'

    ids = '|'.join(id_list)

    # build the 'filter' parameter
    filters = (
      f'author.id:{ids}',
      'type:article', #excludes book-chapter, dissertation, book, dataset, paratext, other, reference-entry, report, peer-review, standard, editorial, erratum, grant, letter
      'is_paratext:false' #excludes paratext
    )

    # put the URL together
    return f'https://api.openalex.org/{endpoint}?filter={",".join(filters)}&mailto=kl4898@stern.nyu.edu'


In [4]:
def get_publications(id_longlist):
    # split the list into chunks of 50
    id_chunks = [id_longlist[i:i + 50] for i in range(0, len(id_longlist), 50)]
    df = pd.DataFrame()

    # get the URL
    for id_list in id_chunks:
        url_with_cursor = build_author_works_url(id_list) + "&cursor={}"

        # initialize
        cursor = '*'
        while cursor:
            url = url_with_cursor.format(cursor)
            response = requests.get(url).json()
            data = pd.DataFrame(response['results'])
            df = pd.concat([df, data])
            cursor = response['meta']['next_cursor']

    return df

In [5]:
#get source ID for nature and science
url = "https://api.openalex.org/sources?filter=display_name.search:nature|science"
response = requests.get(url).json()
for result in response['results']:
    print(result['id'], result['display_name'])

https://openalex.org/S137773608 Nature
https://openalex.org/S3880285 Science
https://openalex.org/S64187185 Nature Communications
https://openalex.org/S137905309 Nature Genetics
https://openalex.org/S203256638 Nature Medicine
https://openalex.org/S106963461 Nature Biotechnology
https://openalex.org/S103895331 Nature Materials
https://openalex.org/S2298632 Nature Neuroscience
https://openalex.org/S127827428 Nature Methods
https://openalex.org/S41063453 Nature Immunology
https://openalex.org/S7822423 Nature Nanotechnology
https://openalex.org/S106296714 Lecture Notes in Computer Science
https://openalex.org/S160464432 Nature Reviews Cancer
https://openalex.org/S151741590 Nature Cell Biology
https://openalex.org/S13479253 Environmental Science & Technology
https://openalex.org/S26843219 Nature Reviews Neuroscience
https://openalex.org/S93373720 Nature Reviews Immunology
https://openalex.org/S109387254 Nature Protocols
https://openalex.org/S8553189 Nature Reviews Genetics
https://openalex.

In [5]:
# Nature_ID = response['results'][0]['id'] #https://openalex.org/S137773608
# Science_ID = response['results'][1]['id'] #https://openalex.org/S3880285
Nature_ID = "https://openalex.org/S137773608"
Science_ID = "https://openalex.org/S3880285"

In [6]:
# Get works published in Nature and Science
def NS_works(year):
    df = pd.DataFrame()

    filters = (
        'type:article', #excludes book-chapter, dissertation, book, dataset, paratext, other, reference-entry, report, peer-review, standard, editorial, erratum, grant, letter
        'is_paratext:false', #excludes paratext
        f'primary_location.source.id:{Nature_ID}|{Science_ID}',
        f'publication_year:{year}'
    )

    url_with_cursor = f'https://api.openalex.org/works?filter={",".join(filters)}' + '&cursor={}&mailto=kl4898@stern.nyu.edu'
    # initialize
    cursor = '*'
    while cursor:
        url = url_with_cursor.format(cursor)
        response = requests.get(url).json()
        data = pd.DataFrame(response['results'])
        df = pd.concat([df, data])
        cursor = response['meta']['next_cursor']

    return df

In [54]:
ns_2018 = NS_works(2018)

In [None]:
ns_2020 = NS_works(2020)

In [39]:
ns_2020.to_parquet('ns_2020.parquet')

In [103]:
ns_2018.to_parquet('ns_2018.parquet')

In [7]:
ns_2018_parquet = pd.read_parquet('ns_2018.parquet')

In [25]:
ns_2018_parquet

Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,language,primary_location,type,...,referenced_works_count,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated
0,https://openalex.org/W2793369479,https://doi.org/10.1038/nature26160,Unconventional superconductivity in magic-angl...,Unconventional superconductivity in magic-angl...,2018,2018-03-05,"{'doi': 'https://doi.org/10.1038/nature26160',...",en,"{'is_accepted': False, 'is_oa': False, 'is_pub...",article,...,63,"[https://openalex.org/W1488440066, https://ope...","[https://openalex.org/W2609137446, https://ope...",https://api.openalex.org/works/W2793369479/ngrams,"{' My': None, ' 2.': None, ' 3.': None, '...",https://api.openalex.org/works?filter=cites:W2...,"[{'cited_by_count': 110, 'year': 2024}, {'cite...",2024-02-08T10:38:18.485726,2018-03-29,
1,https://openalex.org/W2895486342,https://doi.org/10.1038/s41586-018-0579-z,The UK Biobank resource with deep phenotyping ...,The UK Biobank resource with deep phenotyping ...,2018,2018-10-01,{'doi': 'https://doi.org/10.1038/s41586-018-05...,en,"{'is_accepted': True, 'is_oa': True, 'is_publi...",article,...,40,"[https://openalex.org/W1966585637, https://ope...","[https://openalex.org/W4286002085, https://ope...",https://api.openalex.org/works/W2895486342/ngrams,"{' My': None, ' 2.': None, ' 3.': None, '...",https://api.openalex.org/works?filter=cites:W2...,"[{'cited_by_count': 86, 'year': 2024}, {'cited...",2024-02-07T01:57:53.112661,2018-10-12,
2,https://openalex.org/W2790166049,https://doi.org/10.1126/science.aap9559,The spread of true and false news online,The spread of true and false news online,2018,2018-03-09,{'doi': 'https://doi.org/10.1126/science.aap95...,en,"{'is_accepted': True, 'is_oa': True, 'is_publi...",article,...,48,"[https://openalex.org/W1105550512, https://ope...","[https://openalex.org/W3210159890, https://ope...",https://api.openalex.org/works/W2790166049/ngrams,"{' My': None, ' 2.': None, ' 3.': None, '...",https://api.openalex.org/works?filter=cites:W2...,"[{'cited_by_count': 68, 'year': 2024}, {'cited...",2024-02-03T22:39:56.622618,2018-03-29,
3,https://openalex.org/W2791483720,https://doi.org/10.1126/science.aar4060,Cancer immunotherapy using checkpoint blockade,Cancer immunotherapy using checkpoint blockade,2018,2018-03-23,{'doi': 'https://doi.org/10.1126/science.aar40...,en,"{'is_accepted': True, 'is_oa': True, 'is_publi...",article,...,63,"[https://openalex.org/W1423342398, https://ope...","[https://openalex.org/W4304783156, https://ope...",https://api.openalex.org/works/W2791483720/ngrams,"{' My': None, ' 2.': None, ' 3.': None, '...",https://api.openalex.org/works?filter=cites:W2...,"[{'cited_by_count': 89, 'year': 2024}, {'cited...",2024-02-02T00:10:31.184263,2018-03-29,
4,https://openalex.org/W2765314233,https://doi.org/10.1126/science.aan3706,Gut microbiome influences efficacy of PD-1–bas...,Gut microbiome influences efficacy of PD-1–bas...,2018,2018-01-05,{'doi': 'https://doi.org/10.1126/science.aan37...,en,"{'is_accepted': True, 'is_oa': True, 'is_publi...",article,...,34,"[https://openalex.org/W1498931318, https://ope...","[https://openalex.org/W2768863811, https://ope...",https://api.openalex.org/works/W2765314233/ngrams,"{' My': None, ' 2.': None, ' 3.': None, '...",https://api.openalex.org/works?filter=cites:W2...,"[{'cited_by_count': 74, 'year': 2024}, {'cited...",2024-02-06T15:41:57.208498,2017-11-10,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23,https://openalex.org/W4294091206,https://doi.org/10.1126/science.2018.361.6398....,Editors' Choice,Editors' Choice,2018,2018-07-13,{'doi': 'https://doi.org/10.1126/science.2018....,,"{'is_accepted': False, 'is_oa': False, 'is_pub...",article,...,0,[],"[https://openalex.org/W2899084033, https://ope...",https://api.openalex.org/works/W4294091206/ngrams,,https://api.openalex.org/works?filter=cites:W4...,[],2024-02-01T07:57:48.243689,2022-09-01,
24,https://openalex.org/W4298340596,https://doi.org/10.1126/science.359.6383.1479-b,AAAS annual election: Preliminary announcement,AAAS annual election: Preliminary announcement,2018,2018-03-30,{'doi': 'https://doi.org/10.1126/science.359.6...,en,"{'is_accepted': True, 'is_oa': True, 'is_publi...",article,...,0,[],"[https://openalex.org/W2899084033, https://ope...",https://api.openalex.org/works/W4298340596/ngrams,,https://api.openalex.org/works?filter=cites:W4...,[],2024-02-03T14:51:40.528624,2022-10-02,
0,https://openalex.org/W4390767771,https://doi.org/10.1126/science.2018.361.6398....,This Week in Science,This Week in Science,2018,2018-07-13,{'doi': 'https://doi.org/10.1126/science.2018....,en,"{'is_accepted': True, 'is_oa': True, 'is_publi...",article,...,0,[],"[https://openalex.org/W2748952813, https://ope...",https://api.openalex.org/works/W4390767771/ngrams,,https://api.openalex.org/works?filter=cites:W4...,[],2024-01-31T17:16:54.359121,2024-01-13,
1,https://openalex.org/W4390767772,https://doi.org/10.1126/science.2018.362.6418....,This Week in Science,This Week in Science,2018,2018-11-30,{'doi': 'https://doi.org/10.1126/science.2018....,en,"{'is_accepted': True, 'is_oa': True, 'is_publi...",article,...,0,[],"[https://openalex.org/W3007404728, https://ope...",https://api.openalex.org/works/W4390767772/ngrams,,https://api.openalex.org/works?filter=cites:W4...,[],2024-02-02T00:35:09.281753,2024-01-13,


In [21]:
ns_2018['authorships']

0     [{'author_position': 'first', 'author': {'id':...
1     [{'author_position': 'first', 'author': {'id':...
2     [{'author_position': 'first', 'author': {'id':...
3     [{'author_position': 'first', 'author': {'id':...
4     [{'author_position': 'first', 'author': {'id':...
                            ...                        
23    [{'author_position': 'first', 'author': {'id':...
24                                                   []
0     [{'author_position': 'first', 'author': {'id':...
1     [{'author_position': 'first', 'author': {'id':...
2                                                    []
Name: authorships, Length: 8853, dtype: object

In [55]:
author_ids_2 = []
for authorships in ns_2018['authorships']:
    author_list = [d['author']['id'] for d in authorships]
    author_ids_2.append(author_list)

author_ids_2 = list(set(chain.from_iterable(author_ids_2)))

In [8]:
author_ids = []
for authorships in ns_2018_parquet['authorships']:
    author_list = [d['author']['id'] for d in authorships]
    author_ids.append(author_list)

author_ids = list(set(chain.from_iterable(author_ids)))

In [57]:
author_ids_2[:10]

['https://openalex.org/A5017801365',
 'https://openalex.org/A5039454027',
 'https://openalex.org/A5035067308',
 'https://openalex.org/A5085410573',
 'https://openalex.org/A5078727884',
 'https://openalex.org/A5026803226',
 'https://openalex.org/A5027582796',
 'https://openalex.org/A5005589678',
 'https://openalex.org/A5059895815',
 'https://openalex.org/A5091454644']

In [59]:
author_ids[:10]

['https://openalex.org/A5017801365',
 'https://openalex.org/A5039454027',
 'https://openalex.org/A5035067308',
 'https://openalex.org/A5085410573',
 'https://openalex.org/A5078727884',
 'https://openalex.org/A5026803226',
 'https://openalex.org/A5027582796',
 'https://openalex.org/A5005589678',
 'https://openalex.org/A5059895815',
 'https://openalex.org/A5091454644']

In [9]:
len(author_ids)

25599

In [16]:
#save author_ids
with open('author_ids.txt', 'w') as f:
    for item in author_ids:
        f.write("%s\n" % item)

In [63]:
#load author_ids disregarding the newline character
with open('author_ids.txt', 'r') as f:
    author_ids = f.read().splitlines()

In [64]:
author_ids[:10]

['https://openalex.org/A5017801365',
 'https://openalex.org/A5039454027',
 'https://openalex.org/A5035067308',
 'https://openalex.org/A5085410573',
 'https://openalex.org/A5078727884',
 'https://openalex.org/A5026803226',
 'https://openalex.org/A5027582796',
 'https://openalex.org/A5005589678',
 'https://openalex.org/A5059895815',
 'https://openalex.org/A5091454644']

In [None]:
corr_author_ids = []
for authorships in ns_2018['authorships']:
    author_list = [d['author']['id'] for d in authorships if d['is_corresponding'] or d['author_position']=='last']
    corr_author_ids.append(author_list)

corr_author_ids = list(set(chain.from_iterable(corr_author_ids)))

In [11]:
corr_author_ids = []
for authorships in ns_2018_parquet['authorships']:
    author_list = [d['author']['id'] for d in authorships if d['is_corresponding'] or d['author_position']=='last']
    corr_author_ids.append(author_list)

corr_author_ids = list(set(chain.from_iterable(corr_author_ids)))

In [12]:
len(corr_author_ids)

5571

In [13]:
def chunk_list(input_list, N):
    return [input_list[i:i+N] for i in range(0, len(input_list),N)]

In [14]:
author_chunks = chunk_list(author_ids,50)

In [91]:
#save author_chunks
with open('author_chunks.txt', 'w') as f:
    for item in author_chunks:
        f.write("%s\n" % item)

In [5]:
#load author_chunks disregarding the newline character
with open('author_chunks.txt', 'r') as f:
    author_chunks = f.read().splitlines()

In [8]:
author_chunks = [eval(x) for x in author_chunks]

In [10]:
author_chunks[511]

['https://openalex.org/A5040773420',
 'https://openalex.org/A5032776183',
 'https://openalex.org/A5086448413',
 'https://openalex.org/A5034222438',
 'https://openalex.org/A5005890720',
 'https://openalex.org/A5081751581',
 'https://openalex.org/A5073262712',
 'https://openalex.org/A5046220109',
 'https://openalex.org/A5038900860',
 'https://openalex.org/A5014791286',
 'https://openalex.org/A5071887725',
 'https://openalex.org/A5089338975',
 'https://openalex.org/A5031810877',
 'https://openalex.org/A5044625530',
 'https://openalex.org/A5063601349',
 'https://openalex.org/A5011747705',
 'https://openalex.org/A5087961058',
 'https://openalex.org/A5019904930',
 'https://openalex.org/A5018009111',
 'https://openalex.org/A5090289358',
 'https://openalex.org/A5041100379',
 'https://openalex.org/A5082914171',
 'https://openalex.org/A5001219422',
 'https://openalex.org/A5078615235',
 'https://openalex.org/A5046281941',
 'https://openalex.org/A5027205545',
 'https://openalex.org/A5020926476',
 

In [78]:
def extract_author_ids(authorships):
    author_ids = []
    for authorship in authorships:
        author_ids.append(authorship['author']['id'])
    return author_ids

In [79]:
def add_author_id_list(chunk):
    chunk['author_ids'] = chunk['authorships'].apply(extract_author_ids)
    return chunk


In [24]:
for i in range(len(author_chunks)):
    try:
        df = get_publications(author_chunks[i])
        df = add_author_id_list(df)
        df.to_parquet(f'author_publications_{i}.parquet')
        # write progress in txt
        with open('ns2018_progress.txt', 'a') as f:
            f.write(f"chunk {i} done\n")
    except:
        with open('ns2018_errorlist.txt', 'a') as f:
            f.write(f"chunk {i} failed\n")

chunk 0 done
chunk 1 done
chunk 2 done
chunk 3 done


In [14]:
errorlist = []
for i in reversed(range(len(author_chunks))):
    try:
        df = get_publications(author_chunks[i])
        df.to_csv(f"/Volumes/Samsung_T5/OpenAlex_Works/works_chunk_{i}.csv")
        print(f"chunk {i} done")
    except:
        errorlist.append(i)
        print(f"chunk {i} failed")

chunk 511 done
chunk 510 done
chunk 509 done
chunk 508 done
chunk 507 done
chunk 506 done
chunk 505 done
chunk 504 done
chunk 503 done
chunk 502 done
chunk 501 done
chunk 500 done
chunk 499 done
chunk 498 done
chunk 497 done
chunk 496 done
chunk 495 done
chunk 494 done
chunk 493 done
chunk 492 done
chunk 491 done
chunk 490 done
chunk 489 failed
chunk 488 done
chunk 487 done
chunk 486 done
chunk 485 done
chunk 484 done
chunk 483 done
chunk 482 done
chunk 481 done
chunk 480 done
chunk 479 done
chunk 478 done
chunk 477 done
chunk 476 done
chunk 475 done
chunk 474 done
chunk 473 done
chunk 472 done
chunk 471 done
chunk 470 done
chunk 469 done
chunk 468 failed
chunk 467 done
chunk 466 done
chunk 465 done
chunk 464 done
chunk 463 done
chunk 462 done
chunk 461 done
chunk 460 done
chunk 459 done
chunk 458 done
chunk 457 done
chunk 456 done
chunk 455 done
chunk 454 done
chunk 453 done
chunk 452 done
chunk 451 done
chunk 450 done
chunk 449 done
chunk 448 done
chunk 447 done
chunk 446 done
chunk 

In [17]:
chunk_0 = pd.read_csv("/Volumes/Samsung_T5/OpenAlex_Works/works_chunk_0.csv")

In [27]:
chunk_0['authorships']

0       [{'author_position': 'first', 'author': {'id':...
1       [{'author_position': 'first', 'author': {'id':...
2       [{'author_position': 'first', 'author': {'id':...
3       [{'author_position': 'first', 'author': {'id':...
4       [{'author_position': 'first', 'author': {'id':...
                              ...                        
8329    [{'author_position': 'first', 'author': {'id':...
8330    [{'author_position': 'first', 'author': {'id':...
8331    [{'author_position': 'first', 'author': {'id':...
8332    [{'author_position': 'first', 'author': {'id':...
8333    [{'author_position': 'first', 'author': {'id':...
Name: authorships, Length: 8334, dtype: object

In [30]:
#comprehend entry as list
eval(chunk_0['authorships'].loc[0])

[{'author_position': 'first',
  'author': {'id': 'https://openalex.org/A5067792979',
   'display_name': 'Michael J. Caterina',
   'orcid': 'https://orcid.org/0000-0002-7845-5297'},
  'institutions': [{'id': 'https://openalex.org/I4210150520',
    'display_name': 'Cellular Research (United States)',
    'ror': 'https://ror.org/04jvb9y72',
    'country_code': 'US',
    'type': 'company',
    'lineage': ['https://openalex.org/I4210150520']}],
  'countries': ['US'],
  'is_corresponding': False,
  'raw_author_name': 'Michael J. Caterina',
  'raw_affiliation_string': 'Departments of Cellular and Molecular Pharmacology, San Francisco, USA',
  'raw_affiliation_strings': ['Departments of Cellular and Molecular Pharmacology, San Francisco, USA']},
 {'author_position': 'middle',
  'author': {'id': 'https://openalex.org/A5039559342',
   'display_name': 'Mark Schumacher',
   'orcid': None},
  'institutions': [],
  'countries': ['US'],
  'is_corresponding': False,
  'raw_author_name': 'Mark A. Schum

In [80]:
chunk_0 = get_publications(author_chunks[0])

In [81]:
chunk_0 = add_author_id_list(chunk_0)

In [84]:
chunk_0.to_csv("works_chunk_0.csv")

In [85]:
chunk_0 = pd.read_csv("works_chunk_0.csv")

In [88]:
chunk_0['author_ids']

0        ['https://openalex.org/A5021736013', 'https://...
1        ['https://openalex.org/A5004515522', 'https://...
2        ['https://openalex.org/A5004515522', 'https://...
3        ['https://openalex.org/A5083991449', 'https://...
4        ['https://openalex.org/A5042384828', 'https://...
                               ...                        
14750    ['https://openalex.org/A5025839822', 'https://...
14751    ['https://openalex.org/A5046416298', 'https://...
14752                 ['https://openalex.org/A5013110276']
14753    ['https://openalex.org/A5004515522', 'https://...
14754    ['https://openalex.org/A5020849392', 'https://...
Name: author_ids, Length: 14755, dtype: object

In [90]:
chunk_0[chunk_0['author_ids'].apply(lambda x: any(item in author_chunks[0] for item in x))]

Unnamed: 0.1,Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,language,primary_location,...,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated,author_ids
0,0,https://openalex.org/W2256016639,https://doi.org/10.1038/nature19057,Analysis of protein-coding genetic variation i...,Analysis of protein-coding genetic variation i...,2016,2016-08-01,{'openalex': 'https://openalex.org/W2256016639...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",...,"['https://openalex.org/W1533942137', 'https://...","['https://openalex.org/W2484239316', 'https://...",https://api.openalex.org/works/W2256016639/ngrams,"{'Large-scale': [0], 'reference': [1], 'data':...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 42}, {'year'...",2024-02-09T16:01:28.162872,2016-06-24,,"[https://openalex.org/A5021736013, https://ope..."
1,1,https://openalex.org/W2097950056,https://doi.org/10.1016/s0140-6736(12)61689-4,Disability-adjusted life years (DALYs) for 291...,Disability-adjusted life years (DALYs) for 291...,2012,2012-12-01,{'openalex': 'https://openalex.org/W2097950056...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"['https://openalex.org/W1503206236', 'https://...","['https://openalex.org/W2131839676', 'https://...",https://api.openalex.org/works/W2097950056/ngrams,"{'Background': [0], 'Measuring': [1], 'disease...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 34}, {'year'...",2024-02-09T15:44:30.115740,2016-06-24,True,"[https://openalex.org/A5004515522, https://ope..."
2,2,https://openalex.org/W2112334970,https://doi.org/10.1016/s0140-6736(96)07492-2,Alternative projections of mortality and disab...,Alternative projections of mortality and disab...,1997,1997-05-01,{'openalex': 'https://openalex.org/W2112334970...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"['https://openalex.org/W1565391189', 'https://...","['https://openalex.org/W2056522719', 'https://...",https://api.openalex.org/works/W2112334970/ngrams,"{'Plausible': [0], 'projections': [1, 150, 204...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 5}, {'year':...",2024-02-03T01:09:06.322989,2016-06-24,,"[https://openalex.org/A5004515522, https://ope..."
3,3,https://openalex.org/W2909678677,https://doi.org/10.1016/s0140-6736(18)31788-4,Food in the Anthropocene: the EAT–Lancet Commi...,Food in the Anthropocene: the EAT–Lancet Commi...,2019,2019-02-01,{'openalex': 'https://openalex.org/W2909678677...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"['https://openalex.org/W595212506', 'https://o...","['https://openalex.org/W4200315805', 'https://...",https://api.openalex.org/works/W2909678677/ngrams,"{'Food': [0, 207, 241, 310, 337, 353, 386, 483...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 135}, {'year...",2024-02-06T14:32:49.072483,2019-01-25,,"[https://openalex.org/A5083991449, https://ope..."
4,4,https://openalex.org/W2115830594,https://doi.org/10.1016/s0140-6736(06)68770-9,Global and regional burden of disease and risk...,Global and regional burden of disease and risk...,2006,2006-05-01,{'openalex': 'https://openalex.org/W2115830594...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"['https://openalex.org/W1981986207', 'https://...","['https://openalex.org/W2131839676', 'https://...",https://api.openalex.org/works/W2115830594/ngrams,"{'Our': [0, 322], 'aim': [1], 'was': [2, 157, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 13}, {'year'...",2024-02-04T11:52:57.006034,2016-06-24,,"[https://openalex.org/A5042384828, https://ope..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14750,0,https://openalex.org/W3147227403,,Astrophysics with MILAGRO,Astrophysics with MILAGRO,1993,1993-01-01,{'openalex': 'https://openalex.org/W3147227403...,en,"{'is_oa': False, 'landing_page_url': 'http://u...",...,[],"['https://openalex.org/W346237057', 'https://o...",https://api.openalex.org/works/W3147227403/ngrams,,https://api.openalex.org/works?filter=cites:W3...,[],2024-02-07T14:06:39.790932,2021-04-13,,"[https://openalex.org/A5025839822, https://ope..."
14751,1,https://openalex.org/W3168899959,,Search for Emission of Ultra High Energy Radia...,Search for Emission of Ultra High Energy Radia...,1993,1993-05-01,{'openalex': 'https://openalex.org/W3168899959...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,[],"['https://openalex.org/W126798772', 'https://o...",https://api.openalex.org/works/W3168899959/ngrams,,https://api.openalex.org/works?filter=cites:W3...,[],2024-02-09T02:45:52.138393,2021-06-22,,"[https://openalex.org/A5046416298, https://ope..."
14752,2,https://openalex.org/W325855580,,Simulations of multiparticle production at hig...,Simulations of multiparticle production at hig...,1993,1993-06-15,{'openalex': 'https://openalex.org/W325855580'...,en,"{'is_oa': False, 'landing_page_url': 'http://w...",...,[],"['https://openalex.org/W72940282', 'https://op...",https://api.openalex.org/works/W325855580/ngrams,"{'A': [0], 'Monte': [1], 'Carlo': [2], 'genera...",https://api.openalex.org/works?filter=cites:W3...,[],2024-02-06T08:24:28.741848,2016-06-24,,[https://openalex.org/A5013110276]
14753,3,https://openalex.org/W4247458054,https://doi.org/10.2307/25512956,The Herne's Egg,The Herne's Egg,1993,1993-01-01,{'openalex': 'https://openalex.org/W4247458054...,,"{'is_oa': False, 'landing_page_url': 'https://...",...,[],"['https://openalex.org/W2082860237', 'https://...",https://api.openalex.org/works/W4247458054/ngrams,,https://api.openalex.org/works?filter=cites:W4...,[],2024-02-02T14:47:22.326577,2022-05-12,,"[https://openalex.org/A5004515522, https://ope..."


In [72]:
def chunk_check(i,j):
    chunk_i = pd.read_csv(f"/Volumes/Samsung_T5/OpenAlex_Works/works_chunk_{i}.csv")
    chunk_i = add_author_id_list(chunk_i)
    chunk_i = chunk_i[chunk_i['author_ids'].apply(lambda x: any(item in author_chunks[j] for item in x))]
    return len(chunk_i)
    

In [89]:
chunk_0['author_ids'] = chunk_0['author_ids'].apply(eval)

In [53]:
#count number of entries for each author
author_counts = {}
for author in author_ids:
    count = 0
    df = chunk_0[chunk_0['author_ids'].apply(lambda x: author in x)]
    count += len(df)
    author_counts[author] = count

#sort in descending order
author_counts = dict(sorted(author_counts.items(), key=lambda item: item[1], reverse=True))
print(author_counts)

{'https://openalex.org/A5059319698': 771, 'https://openalex.org/A5054256781': 545, 'https://openalex.org/A5045504233': 452, 'https://openalex.org/A5005730211': 436, 'https://openalex.org/A5032410036': 428, 'https://openalex.org/A5005354758': 409, 'https://openalex.org/A5078504185': 329, 'https://openalex.org/A5047434698': 324, 'https://openalex.org/A5055277410': 261, 'https://openalex.org/A5061115598': 251, 'https://openalex.org/A5058301915': 233, 'https://openalex.org/A5005897340': 223, 'https://openalex.org/A5082324446': 223, 'https://openalex.org/A5064241770': 206, 'https://openalex.org/A5091588944': 199, 'https://openalex.org/A5082072514': 194, 'https://openalex.org/A5061172218': 178, 'https://openalex.org/A5040055379': 175, 'https://openalex.org/A5014886042': 174, 'https://openalex.org/A5079634281': 173, 'https://openalex.org/A5040423859': 166, 'https://openalex.org/A5006444990': 165, 'https://openalex.org/A5080512785': 160, 'https://openalex.org/A5055710883': 156, 'https://openal

In [67]:
import csv
import sys

In [68]:
def count_rows_in_csv(filename):
    csv.field_size_limit(sys.maxsize)

    with open(filename, 'r', encoding='utf-8') as file:
        csv_reader = csv.reader(file)
        row_count = sum(1 for row in csv_reader)
    return row_count

In [69]:
count_rows_in_csv("/Volumes/Samsung_T5/OpenAlex_Works/works_chunk_0.csv")

8335

In [70]:
count = 0
for i in range(512):
    count += count_rows_in_csv(f"/Volumes/Samsung_T5/OpenAlex_Works/works_chunk_{i}.csv") - 1
    print(f"chunk {i} done")

print(count)

chunk 0 done
chunk 1 done
chunk 2 done
chunk 3 done
chunk 4 done
chunk 5 done
chunk 6 done
chunk 7 done
chunk 8 done
chunk 9 done
chunk 10 done
chunk 11 done
chunk 12 done
chunk 13 done
chunk 14 done
chunk 15 done
chunk 16 done
chunk 17 done
chunk 18 done
chunk 19 done
chunk 20 done
chunk 21 done
chunk 22 done
chunk 23 done
chunk 24 done
chunk 25 done
chunk 26 done
chunk 27 done
chunk 28 done
chunk 29 done
chunk 30 done
chunk 31 done
chunk 32 done
chunk 33 done
chunk 34 done
chunk 35 done
chunk 36 done
chunk 37 done
chunk 38 done
chunk 39 done
chunk 40 done
chunk 41 done
chunk 42 done
chunk 43 done
chunk 44 done
chunk 45 done
chunk 46 done
chunk 47 done
chunk 48 done
chunk 49 done
chunk 50 done
chunk 51 done
chunk 52 done
chunk 53 done
chunk 54 done
chunk 55 done
chunk 56 done
chunk 57 done
chunk 58 done
chunk 59 done
chunk 60 done
chunk 61 done
chunk 62 done
chunk 63 done
chunk 64 done
chunk 65 done
chunk 66 done
chunk 67 done
chunk 68 done
chunk 69 done
chunk 70 done
chunk 71 done
ch

In [117]:
import csv
import sqlite3

def process_csv_files(filepaths, dbname="openalex_db.sqlite"):
    # Connect to SQLite database (this creates the database if it doesn't exist)
    conn = sqlite3.connect(dbname)
    cursor = conn.cursor()

    with open(filepaths[0], 'r', encoding='utf-8') as file:
        csv_reader = csv.reader(file)
        header = next(csv_reader)
        id = header[1]
        columns = header[2:]
        columns = f'{id} PRIMARY KEY, '+', '.join(f'{name} TEXT' for name in columns)
        sql = f'CREATE TABLE IF NOT EXISTS data ({columns})'
        cursor.execute(sql)

    for filepath in filepaths:
        print(f'Processing {filepath}...')
        with open(filepath, 'r', encoding='utf-8') as file:
            csv_reader = csv.DictReader(file)
            for row in csv_reader:
                row_data = {k: v for k, v in row.items() if k in header[1:]}
                print(row_data['id'])
                columns = ', '.join(row_data.keys())
                placeholders = ', '.join('?' * len(row_data))
                sql = f'INSERT OR IGNORE INTO data ({columns}) VALUES ({placeholders})'
                cursor.execute(sql, list(row_data.values()))

    # Commit changes and close the connection
    conn.commit()
    conn.close()

    # Optional: Export data from SQLite to new CSV files
    # This step is omitted for brevity but would involve querying the table
    # and writing the results to one or more CSV files.


In [118]:
process_csv_files([f"/Volumes/Samsung_T5/OpenAlex_Works/works_chunk_{i}.csv" for i in range(512)])

TypeError: 'generator' object is not subscriptable

In [108]:
conn = sqlite3.connect("/Volumes/Samsung_T5/OpenAlex_Works/openalex_db.sqlite")
cur = conn.cursor()
cur.execute("SELECT COUNT(*) FROM data")

<sqlite3.Cursor at 0x12d570bc0>

In [109]:
cur.fetchall()

[(37520,)]

In [90]:
with open('/Volumes/Samsung_T5/OpenAlex_Works/works_chunk_0.csv', 'r', encoding='utf-8') as file:
    csv_reader = csv.reader(file)
    header = next(csv_reader)
    id = header[1]
    columns = header[2:]
    columns = f'{id} PRIMARY KEY, '+', '.join(f'{name} TEXT' for name in columns)

print(columns)

id PRIMARY KEY, doi TEXT, title TEXT, display_name TEXT, publication_year TEXT, publication_date TEXT, ids TEXT, language TEXT, primary_location TEXT, type TEXT, type_crossref TEXT, indexed_in TEXT, open_access TEXT, authorships TEXT, countries_distinct_count TEXT, institutions_distinct_count TEXT, corresponding_author_ids TEXT, corresponding_institution_ids TEXT, apc_list TEXT, apc_paid TEXT, has_fulltext TEXT, fulltext_origin TEXT, cited_by_count TEXT, cited_by_percentile_year TEXT, biblio TEXT, is_retracted TEXT, is_paratext TEXT, primary_topic TEXT, topics TEXT, keywords TEXT, concepts TEXT, mesh TEXT, locations_count TEXT, locations TEXT, best_oa_location TEXT, sustainable_development_goals TEXT, grants TEXT, referenced_works_count TEXT, referenced_works TEXT, related_works TEXT, ngrams_url TEXT, abstract_inverted_index TEXT, cited_by_api_url TEXT, counts_by_year TEXT, updated_date TEXT, created_date TEXT, is_authors_truncated TEXT


In [94]:
chunk_0.to_json('chunk_0.json', orient='records', lines=True)

In [95]:
chunk_0_json = pd.read_json('chunk_0.json', lines=True)

In [97]:
chunk_0.to_parquet('chunk_0.parquet')

In [98]:
chunk_0_parquet = pd.read_parquet('chunk_0.parquet')

In [99]:
chunk_0_parquet

Unnamed: 0.1,Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,language,primary_location,...,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated,author_ids
0,0,https://openalex.org/W2256016639,https://doi.org/10.1038/nature19057,Analysis of protein-coding genetic variation i...,Analysis of protein-coding genetic variation i...,2016,2016-08-01,{'openalex': 'https://openalex.org/W2256016639...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",...,"['https://openalex.org/W1533942137', 'https://...","['https://openalex.org/W2484239316', 'https://...",https://api.openalex.org/works/W2256016639/ngrams,"{'Large-scale': [0], 'reference': [1], 'data':...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 42}, {'year'...",2024-02-09T16:01:28.162872,2016-06-24,,"[https://openalex.org/A5021736013, https://ope..."
1,1,https://openalex.org/W2097950056,https://doi.org/10.1016/s0140-6736(12)61689-4,Disability-adjusted life years (DALYs) for 291...,Disability-adjusted life years (DALYs) for 291...,2012,2012-12-01,{'openalex': 'https://openalex.org/W2097950056...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"['https://openalex.org/W1503206236', 'https://...","['https://openalex.org/W2131839676', 'https://...",https://api.openalex.org/works/W2097950056/ngrams,"{'Background': [0], 'Measuring': [1], 'disease...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 34}, {'year'...",2024-02-09T15:44:30.115740,2016-06-24,True,"[https://openalex.org/A5004515522, https://ope..."
2,2,https://openalex.org/W2112334970,https://doi.org/10.1016/s0140-6736(96)07492-2,Alternative projections of mortality and disab...,Alternative projections of mortality and disab...,1997,1997-05-01,{'openalex': 'https://openalex.org/W2112334970...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"['https://openalex.org/W1565391189', 'https://...","['https://openalex.org/W2056522719', 'https://...",https://api.openalex.org/works/W2112334970/ngrams,"{'Plausible': [0], 'projections': [1, 150, 204...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 5}, {'year':...",2024-02-03T01:09:06.322989,2016-06-24,,"[https://openalex.org/A5004515522, https://ope..."
3,3,https://openalex.org/W2909678677,https://doi.org/10.1016/s0140-6736(18)31788-4,Food in the Anthropocene: the EAT–Lancet Commi...,Food in the Anthropocene: the EAT–Lancet Commi...,2019,2019-02-01,{'openalex': 'https://openalex.org/W2909678677...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"['https://openalex.org/W595212506', 'https://o...","['https://openalex.org/W4200315805', 'https://...",https://api.openalex.org/works/W2909678677/ngrams,"{'Food': [0, 207, 241, 310, 337, 353, 386, 483...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 135}, {'year...",2024-02-06T14:32:49.072483,2019-01-25,,"[https://openalex.org/A5083991449, https://ope..."
4,4,https://openalex.org/W2115830594,https://doi.org/10.1016/s0140-6736(06)68770-9,Global and regional burden of disease and risk...,Global and regional burden of disease and risk...,2006,2006-05-01,{'openalex': 'https://openalex.org/W2115830594...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"['https://openalex.org/W1981986207', 'https://...","['https://openalex.org/W2131839676', 'https://...",https://api.openalex.org/works/W2115830594/ngrams,"{'Our': [0, 322], 'aim': [1], 'was': [2, 157, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 13}, {'year'...",2024-02-04T11:52:57.006034,2016-06-24,,"[https://openalex.org/A5042384828, https://ope..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14750,0,https://openalex.org/W3147227403,,Astrophysics with MILAGRO,Astrophysics with MILAGRO,1993,1993-01-01,{'openalex': 'https://openalex.org/W3147227403...,en,"{'is_oa': False, 'landing_page_url': 'http://u...",...,[],"['https://openalex.org/W346237057', 'https://o...",https://api.openalex.org/works/W3147227403/ngrams,,https://api.openalex.org/works?filter=cites:W3...,[],2024-02-07T14:06:39.790932,2021-04-13,,"[https://openalex.org/A5025839822, https://ope..."
14751,1,https://openalex.org/W3168899959,,Search for Emission of Ultra High Energy Radia...,Search for Emission of Ultra High Energy Radia...,1993,1993-05-01,{'openalex': 'https://openalex.org/W3168899959...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,[],"['https://openalex.org/W126798772', 'https://o...",https://api.openalex.org/works/W3168899959/ngrams,,https://api.openalex.org/works?filter=cites:W3...,[],2024-02-09T02:45:52.138393,2021-06-22,,"[https://openalex.org/A5046416298, https://ope..."
14752,2,https://openalex.org/W325855580,,Simulations of multiparticle production at hig...,Simulations of multiparticle production at hig...,1993,1993-06-15,{'openalex': 'https://openalex.org/W325855580'...,en,"{'is_oa': False, 'landing_page_url': 'http://w...",...,[],"['https://openalex.org/W72940282', 'https://op...",https://api.openalex.org/works/W325855580/ngrams,"{'A': [0], 'Monte': [1], 'Carlo': [2], 'genera...",https://api.openalex.org/works?filter=cites:W3...,[],2024-02-06T08:24:28.741848,2016-06-24,,[https://openalex.org/A5013110276]
14753,3,https://openalex.org/W4247458054,https://doi.org/10.2307/25512956,The Herne's Egg,The Herne's Egg,1993,1993-01-01,{'openalex': 'https://openalex.org/W4247458054...,,"{'is_oa': False, 'landing_page_url': 'https://...",...,[],"['https://openalex.org/W2082860237', 'https://...",https://api.openalex.org/works/W4247458054/ngrams,,https://api.openalex.org/works?filter=cites:W4...,[],2024-02-02T14:47:22.326577,2022-05-12,,"[https://openalex.org/A5004515522, https://ope..."


In [100]:
chunk_0.to_pickle('chunk_0.pkl')

In [101]:
chunk_0_pickle = pd.read_pickle('chunk_0.pkl')

In [102]:
chunk_0_pickle

Unnamed: 0.1,Unnamed: 0,id,doi,title,display_name,publication_year,publication_date,ids,language,primary_location,...,referenced_works,related_works,ngrams_url,abstract_inverted_index,cited_by_api_url,counts_by_year,updated_date,created_date,is_authors_truncated,author_ids
0,0,https://openalex.org/W2256016639,https://doi.org/10.1038/nature19057,Analysis of protein-coding genetic variation i...,Analysis of protein-coding genetic variation i...,2016,2016-08-01,{'openalex': 'https://openalex.org/W2256016639...,en,"{'is_oa': True, 'landing_page_url': 'https://d...",...,"['https://openalex.org/W1533942137', 'https://...","['https://openalex.org/W2484239316', 'https://...",https://api.openalex.org/works/W2256016639/ngrams,"{'Large-scale': [0], 'reference': [1], 'data':...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 42}, {'year'...",2024-02-09T16:01:28.162872,2016-06-24,,"[https://openalex.org/A5021736013, https://ope..."
1,1,https://openalex.org/W2097950056,https://doi.org/10.1016/s0140-6736(12)61689-4,Disability-adjusted life years (DALYs) for 291...,Disability-adjusted life years (DALYs) for 291...,2012,2012-12-01,{'openalex': 'https://openalex.org/W2097950056...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"['https://openalex.org/W1503206236', 'https://...","['https://openalex.org/W2131839676', 'https://...",https://api.openalex.org/works/W2097950056/ngrams,"{'Background': [0], 'Measuring': [1], 'disease...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 34}, {'year'...",2024-02-09T15:44:30.115740,2016-06-24,True,"[https://openalex.org/A5004515522, https://ope..."
2,2,https://openalex.org/W2112334970,https://doi.org/10.1016/s0140-6736(96)07492-2,Alternative projections of mortality and disab...,Alternative projections of mortality and disab...,1997,1997-05-01,{'openalex': 'https://openalex.org/W2112334970...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"['https://openalex.org/W1565391189', 'https://...","['https://openalex.org/W2056522719', 'https://...",https://api.openalex.org/works/W2112334970/ngrams,"{'Plausible': [0], 'projections': [1, 150, 204...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 5}, {'year':...",2024-02-03T01:09:06.322989,2016-06-24,,"[https://openalex.org/A5004515522, https://ope..."
3,3,https://openalex.org/W2909678677,https://doi.org/10.1016/s0140-6736(18)31788-4,Food in the Anthropocene: the EAT–Lancet Commi...,Food in the Anthropocene: the EAT–Lancet Commi...,2019,2019-02-01,{'openalex': 'https://openalex.org/W2909678677...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"['https://openalex.org/W595212506', 'https://o...","['https://openalex.org/W4200315805', 'https://...",https://api.openalex.org/works/W2909678677/ngrams,"{'Food': [0, 207, 241, 310, 337, 353, 386, 483...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 135}, {'year...",2024-02-06T14:32:49.072483,2019-01-25,,"[https://openalex.org/A5083991449, https://ope..."
4,4,https://openalex.org/W2115830594,https://doi.org/10.1016/s0140-6736(06)68770-9,Global and regional burden of disease and risk...,Global and regional burden of disease and risk...,2006,2006-05-01,{'openalex': 'https://openalex.org/W2115830594...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,"['https://openalex.org/W1981986207', 'https://...","['https://openalex.org/W2131839676', 'https://...",https://api.openalex.org/works/W2115830594/ngrams,"{'Our': [0, 322], 'aim': [1], 'was': [2, 157, ...",https://api.openalex.org/works?filter=cites:W2...,"[{'year': 2024, 'cited_by_count': 13}, {'year'...",2024-02-04T11:52:57.006034,2016-06-24,,"[https://openalex.org/A5042384828, https://ope..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14750,0,https://openalex.org/W3147227403,,Astrophysics with MILAGRO,Astrophysics with MILAGRO,1993,1993-01-01,{'openalex': 'https://openalex.org/W3147227403...,en,"{'is_oa': False, 'landing_page_url': 'http://u...",...,[],"['https://openalex.org/W346237057', 'https://o...",https://api.openalex.org/works/W3147227403/ngrams,,https://api.openalex.org/works?filter=cites:W3...,[],2024-02-07T14:06:39.790932,2021-04-13,,"[https://openalex.org/A5025839822, https://ope..."
14751,1,https://openalex.org/W3168899959,,Search for Emission of Ultra High Energy Radia...,Search for Emission of Ultra High Energy Radia...,1993,1993-05-01,{'openalex': 'https://openalex.org/W3168899959...,en,"{'is_oa': False, 'landing_page_url': 'https://...",...,[],"['https://openalex.org/W126798772', 'https://o...",https://api.openalex.org/works/W3168899959/ngrams,,https://api.openalex.org/works?filter=cites:W3...,[],2024-02-09T02:45:52.138393,2021-06-22,,"[https://openalex.org/A5046416298, https://ope..."
14752,2,https://openalex.org/W325855580,,Simulations of multiparticle production at hig...,Simulations of multiparticle production at hig...,1993,1993-06-15,{'openalex': 'https://openalex.org/W325855580'...,en,"{'is_oa': False, 'landing_page_url': 'http://w...",...,[],"['https://openalex.org/W72940282', 'https://op...",https://api.openalex.org/works/W325855580/ngrams,"{'A': [0], 'Monte': [1], 'Carlo': [2], 'genera...",https://api.openalex.org/works?filter=cites:W3...,[],2024-02-06T08:24:28.741848,2016-06-24,,[https://openalex.org/A5013110276]
14753,3,https://openalex.org/W4247458054,https://doi.org/10.2307/25512956,The Herne's Egg,The Herne's Egg,1993,1993-01-01,{'openalex': 'https://openalex.org/W4247458054...,,"{'is_oa': False, 'landing_page_url': 'https://...",...,[],"['https://openalex.org/W2082860237', 'https://...",https://api.openalex.org/works/W4247458054/ngrams,,https://api.openalex.org/works?filter=cites:W4...,[],2024-02-02T14:47:22.326577,2022-05-12,,"[https://openalex.org/A5004515522, https://ope..."
