In [7]:
import requests
import time

In [14]:
def collect_papers(target, query, limit, offset=0):
    open_access_papers = []
    non_open_access_papers = []
    papers_in_error_list = []
    counter = 1

    while len(open_access_papers) + len(non_open_access_papers) + len(papers_in_error_list) < target:
        
        # fetch paper list
        paper_list = search_papers(query=query, limit=limit, offset=offset)
        
        # fetch paper details
        for basic_paper_info in paper_list:
            paper_id = basic_paper_info['paperId']
            additional_paper_info = fetch_paper_details(paper_id)

            # categorize papers in open access, non open access and error list
            if additional_paper_info is not None:
                paper_profile = {**basic_paper_info, **additional_paper_info}
                if paper_profile['isOpenAccess']:
                    paper_serial = {'serial': counter}
                    paper_profile_numbered = {**paper_serial, **paper_profile}
                    open_access_papers.append(paper_profile_numbered)
                    print(f'Paper {counter} info retrieved successfully.')
                else:
                    paper_serial = {'serial': counter}
                    paper_profile_numbered = {**paper_serial, **paper_profile}
                    non_open_access_papers.append(paper_profile_numbered)
                    print(f'Paper {counter} is not open access.')
            else:
                paper_serial = {'serial': counter}
                basic_paper_info_numbered = {**paper_serial, **basic_paper_info}
                papers_in_error_list.append(basic_paper_info_numbered)
                print(f'Paper {counter} added in error list!')

            time.sleep(2)
            counter += 1

        if not paper_list:
            break

        offset += limit
        time.sleep(2)

    print(f'{len(open_access_papers)} Open access papers: {open_access_papers}')
    print(f'{len(non_open_access_papers)} Non open access papers: {non_open_access_papers}')
    print(f'{len(papers_in_error_list)} Papers in error list: {papers_in_error_list}')
    
    while papers_in_error_list:
        open_, non_open, error = execute_error_list(papers_in_error_list)
        open_access_papers.update(open_)
        non_open_access_papers.update(non_open)
        papers_in_error_list = error.copy()
    else:
        print('Error list empty!!!!!!!!!!!!!!!!!!!!!!')

    # Rearrange the list in descending order based on 'influentialCitationCount' and 'isOpenAccess': True
    data_sorted = sorted(open_access_papers, key=lambda x: x['citationCount'], reverse=True)

    # Print the sorted list
    print()
    print(data_sorted)

In [18]:
def execute_error_list(paper_list):
    open_access_papers = []
    non_open_access_papers = []
    papers_in_error_list = []
    
    # fetch paper details
    for basic_paper_info in paper_list:
        paper_id = basic_paper_info['paperId']
        additional_paper_info = fetch_paper_details(paper_id)

        # categorize papers in open access, non open access and error list
        if additional_paper_info is not None:
            paper_profile = {**basic_paper_info, **additional_paper_info}
            if paper_profile['isOpenAccess']:
                open_access_papers.append(paper_profile)
                print(f'Paper {basic_paper_info["serial"]} info retrieved successfully.')
            else:
                non_open_access_papers.append(paper_profile)
                print(f'Paper {basic_paper_info["serial"]} is not open access.')
        else:
            papers_in_error_list.append(basic_paper_info)
            print(f'Paper {basic_paper_info["serial"]} added in error list!')

        time.sleep(2)
    return open_access_papers, non_open_access_papers, papers_in_error_list

In [9]:
def search_papers(query, limit, offset):
    api_key = 'R4SSSz3eshKj9OjFSzre9QIzp6syQQbaCNmkuUA5'
    search_url = 'https://api.semanticscholar.org/graph/v1/paper/search'
    search_parameters = {
        'query': query,
        'limit': limit,
        'offset': offset
    }
    headers = {
        'x-api-key': api_key
    }

    response = requests.get(search_url, params=search_parameters, headers=headers)

    if response.status_code == 200:
        paper_list = response.json().get('data')
        return paper_list
    else:
        print(f"Request failed with status code {response.status_code}: {response.text}")

In [10]:
def fetch_paper_details(paper_id):
    api_key = 'R4SSSz3eshKj9OjFSzre9QIzp6syQQbaCNmkuUA5'
    base_url = 'https://api.semanticscholar.org/graph/v1/paper/' + paper_id
    paper_data = {
        'fields': 'citationCount,influentialCitationCount,isOpenAccess,openAccessPdf'
    }
    headers = {
        'x-api-key': api_key
    }

    response = requests.get(base_url, params=paper_data, headers=headers)
    if response.status_code == 200:
        return response.json()
    else:
        return None
    

In [19]:
# search_papers(query='cellulose materials', limit=10, offset=0)
collect_papers(target=20, query='cellulose materials', limit=10, offset=0)


Paper 1 info retrieved successfully.
Paper 2 info retrieved successfully.
Paper 3 is not open access.
Paper 4 is not open access.
Paper 5 info retrieved successfully.
Paper 6 is not open access.
Paper 7 info retrieved successfully.
Paper 8 info retrieved successfully.
Paper 9 is not open access.
Paper 10 is not open access.
Paper 11 info retrieved successfully.
Paper 12 is not open access.
Paper 13 is not open access.
Paper 14 is not open access.
Paper 15 is not open access.
Paper 16 info retrieved successfully.
Paper 17 is not open access.
Paper 18 info retrieved successfully.
Paper 19 is not open access.
Paper 20 info retrieved successfully.
9 Open access papers: [{'serial': 1, 'paperId': '1f90ab1392d01dfe6019c477198b7da2ceb6801c', 'title': 'Cellulose materials with high light transmittance and high haze: a review', 'citationCount': 7, 'influentialCitationCount': 0, 'isOpenAccess': True, 'openAccessPdf': {'url': 'https://www.researchsquare.com/article/rs-1310113/latest.pdf', 'status'

In [4]:


# URL of the PDF file
pdf_url = "https://downloads.hindawi.com/journals/ijps/2018/7923068.pdf"

# Send a GET request to the URL
response = requests.get(pdf_url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Get the content of the response (PDF content)
    pdf_content = response.content

    # Save the PDF content to a file
    with open("downloaded_pdf.pdf", "wb") as pdf_file:
        pdf_file.write(pdf_content)
    print("PDF downloaded successfully.")
else:
    print("Failed to download the PDF.")

PDF downloaded successfully.
