In [2]:
import requests as req
import json
import io
import time
import os
import random
# from pymongo import MongoClient
# from mongoengine import *

with open('config.json', 'r') as read_file:
    client = json.load(read_file)

# db_client = MongoClient('localhost', 27017)

In [3]:
# functions
def ceildiv(a, b):
    return -(-a // b)

def id_generator(size: int = 6):
    chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
    return ''.join(random.choice(chars) for _ in range(size))

def scp_query(query: str = '', inst: str = '', city: str = '', country: str = '', open_access: str = '',
    start_year: int = 0, end_year: int = 0):
    
    inst_q, city_q, country_q = '', '', ''
    if inst:
        inst_q = ' AND '.join(inst.lower().split(' '))
    if city:
        city_q = ' AND '.join(city.lower().split(' '))
    if country:
        country_q = ' AND '.join(country.lower().split(' '))
    query = [
        (f'{query}' if query else ''),
        (f'AFFIL({inst_q})' if inst_q else ''),
        (f'AFFILCITY({city_q})' if city_q else ''),
        (f'AFFILCOUNTRY({country_q})' if country_q else ''),
        (f'ACCESSTYPE({open_access})' if open_access else ''),
        (f'(PUBYEAR > {start_year - 1})' if start_year else ''),
        (f'(PUBYEAR < {end_year + 1})' if end_year != 0 else ''),
    ]
    query = filter(lambda element: element, query)
    return ' AND '.join(query)

def scp_lookup(client:dict, query: str, cnt, start_cursor: int = 0, count_per_page: int = 25):
    base = 'https://api.elsevier.com/content/search/scopus'
    params = {
        'query': query,
        'apiKey': client['apikey'],
        'insttoken': client['insttoken'],
        'view': 'COMPLETE',
        'start': f'{start_cursor + (cnt * count_per_page)}',
        'count': f'{count_per_page}',
    }
    headers = {'Accept': 'application/json'}
    print(query)
    return req.get(url=base, params=params)

def export_file(data: str, inst: str, year: int,
    open_access: str = '', file_id: str = '', folder: str = '', cnt: int = 0, extension: str = '.txt'):
    
    if not os.path.exists(folder):
        os.makedirs(folder)
    
    cnt = str(cnt).zfill(3)
    isnt = inst.replace(' ', '-')
    if extension[0] != '.':
        extension += '.'
    file_name = f'{inst}_y{year}_{cnt}'
    if open_access:
        file_name += f'_access-{open_access.upper()}'
    if file_id:
        file_name += f'_{file_id}'
    file_name += f'_{int(time.time())}{extension}'
    file_name = os.path.join(folder, file_name)
    with io.open(file_name, 'w', encoding='utf8') as f:
        f.write(data) 

def scp_papers(client: dict, query: str, years: list, inst: str, folder: str = ''):
    file_id = id_generator()
    for year in range(years[0], years[1]):
        q = scp_query(query=query, start_year=year, end_year=year)
        res = scp_lookup(client=client, query=q, cnt=0)
        total_results = int(res.json()['search-results']['opensearch:totalResults'])
        
        export_file(data=res.text, file_id=file_id, inst=inst, year=year, folder=folder, cnt=0, extension='.txt')
        if total_results > 5000:
            print(f'WARNING: paper count: {total_results}. Retrieving only the first 5000')
        page_count = min(ceildiv(total_results, 25), 200) # 25 is the max number of papers in each request
        for cnt in range(1, page_count):
            res = scp_lookup(client=client, query=q, cnt=cnt)
            export_file(data=res.text, file_id=file_id, inst=inst, year=year, folder=folder, cnt=cnt, extension='.txt')

In [4]:
start_year = 2017
end_year = 2017
access_type = 'ACCESSTYPE(OA)' #open access
access_type = 'ACCESSTYPE(OTHER)'
affil_IDs = [
    ['AF-ID("Sharif University of Technology" 60027666)', 'Sharif University of Technology'],
    ['AF-ID("Amirkabir University of Technology" 60007751)', 'Amirkabir University of Technology'],
    ['AF-ID("University of Tehran" 60022927)', 'University of Tehran'],
    ['AF-ID("Tarbiat Modares University" 60032053)', 'Tarbiat Modares University'],
    ['AF-ID("Shahid Beheshti University" 60032873)', 'Shahid Beheshti University'],
    # ['AF-ID("Ferdowsi University of Mashhad" 60001800)', 'Ferdowsi University of Mashhad'],
    # ['AF-ID("University of Kashan" 60023478)', 'University of Kashan'],
]
query = scp_query(query=affil_IDs[3], start_year=start_year, end_year=end_year)

res = scp_lookup(client=client, query=query, cnt=0)

# res.json()
# titles = [doc['dc:title'] for doc in res.json()['search-results']['entry']]
# for t in titles:
#     print(t)
#     print()

['AF-ID("Tarbiat Modares University" 60032053)', 'Tarbiat Modares University'] AND (PUBYEAR > 2016) AND (PUBYEAR < 2018)


In [83]:
with io.open('salam.json', 'w', encoding='utf8') as f:
    f.write(res.text)

In [84]:
with io.open('salam.json', 'r', encoding='utf8') as ff:
    data = json.load(ff)

In [8]:
affil_IDs = [
    ['AF-ID("Sharif University of Technology" 60027666)', 'Sharif University of Technology'],
    ['AF-ID("Amirkabir University of Technology" 60007751)', 'Amirkabir University of Technology'],
    ['AF-ID("University of Tehran" 60022927)', 'University of Tehran'],
    ['AF-ID("Tarbiat Modares University" 60032053)', 'Tarbiat Modares University'],
    ['AF-ID("Shahid Beheshti University" 60032873)', 'Shahid Beheshti University'],
    # ['AF-ID("Ferdowsi University of Mashhad" 60001800)', 'Ferdowsi University of Mashhad'],
    # ['AF-ID("University of Kashan" 60023478)', 'University of Kashan'],
]
for uni in affil_IDs[0:1]:
    [q, inst, folder] = [uni[0], uni[1], uni[1]]
    scp_papers(client=client, query=q, years=[2018, 2019], inst=inst, folder=folder)

AF-ID("Sharif University of Technology" 60027666) AND (PUBYEAR > 2017) AND (PUBYEAR < 2019)
AF-ID("Sharif University of Technology" 60027666) AND (PUBYEAR > 2017) AND (PUBYEAR < 2019)
AF-ID("Sharif University of Technology" 60027666) AND (PUBYEAR > 2017) AND (PUBYEAR < 2019)
AF-ID("Sharif University of Technology" 60027666) AND (PUBYEAR > 2017) AND (PUBYEAR < 2019)
AF-ID("Sharif University of Technology" 60027666) AND (PUBYEAR > 2017) AND (PUBYEAR < 2019)
AF-ID("Sharif University of Technology" 60027666) AND (PUBYEAR > 2017) AND (PUBYEAR < 2019)
AF-ID("Sharif University of Technology" 60027666) AND (PUBYEAR > 2017) AND (PUBYEAR < 2019)
AF-ID("Sharif University of Technology" 60027666) AND (PUBYEAR > 2017) AND (PUBYEAR < 2019)
AF-ID("Sharif University of Technology" 60027666) AND (PUBYEAR > 2017) AND (PUBYEAR < 2019)
AF-ID("Sharif University of Technology" 60027666) AND (PUBYEAR > 2017) AND (PUBYEAR < 2019)
AF-ID("Sharif University of Technology" 60027666) AND (PUBYEAR > 2017) AND (PUBY

AF-ID("Sharif University of Technology" 60027666)
Sharif University of Technology
Sharif University of Technology
