In [1]:
import os
import pandas as pd
from unpywall import Unpywall
from unpywall.utils import UnpywallCredentials
import requests

In [10]:
combined_df = pd.read_csv("./COMBINED.csv")
# Ensure there's a "pdf url" column in combined_df
combined_df["pdf url"] = None
# Filter to only rows where DOI is not null
filtered_df = combined_df[combined_df["DOI"].notna()]

In [4]:
# Create a folder for downloads
os.makedirs("./downloads", exist_ok=True)
UnpywallCredentials("sv22900@uga.edu")
# export UNPAYWALL_EMAIL=your_email@example.com

# Initialize Unpywall
unpywall = Unpywall()

In [11]:
# Loop through each filtered row
for index, row in filtered_df.iterrows():
    doi = row["DOI"]

    try:
        # Get Unpaywall metadata for the DOI
        result = unpywall.doi(dois=[doi])
        if not result.empty:
            # Check if paper is open access
            is_oa = result.iloc[0].get("is_oa", False)
            if is_oa:
                # Get the PDF URL if available
                pdf_url = result.iloc[0].get("best_oa_location.url_for_pdf")
                if pdf_url:
                    combined_df.at[index, "pdf url"] = pdf_url
                    print(f"Added PDF URL for DOI: {doi}")
                else:
                    print(f"No PDF URL available for DOI: {doi}")
            else:
                print(f"Paper is not open access: {doi}")
    except Exception as e:
        print(f"Error processing DOI {doi}: {e}")


Added PDF URL for DOI: 10.3390/app9235066
Added PDF URL for DOI: 10.1080/21693277.2022.2090458
Paper is not open access: 10.1109/IC_ASET61847.2024.10596228
Paper is not open access: 10.1109/THMS.2019.2903402
Paper is not open access: 10.1007/s00170-024-14282-4
Paper is not open access: 10.1016/j.rcim.2019.04.012
Added PDF URL for DOI: 10.24425/mper.2024.149987
Added PDF URL for DOI: 10.1051/matecconf/201821804018
Paper is not open access: 10.1109/ines49302.2020.9147169
No PDF URL available for DOI: 10.1177/0018720820929928
Added PDF URL for DOI: 10.1109/RO-MAN53752.2022.9900763
Paper is not open access: 10.1007/978-3-030-85914-5_1
Paper is not open access: 10.1016/j.cie.2018.02.035
Paper is not open access: 10.1007/978-3-030-31154-4_63
Paper is not open access: 10.1109/ichms49158.2020.9209435
Paper is not open access: 10.1145/3056540.3076189
Added PDF URL for DOI: 10.1016/j.ergon.2019.03.003
Paper is not open access: 10.1016/j.rcim.2023.102536
Paper is not open access: 10.1016/j.engapp

In [12]:

# Save the updated DataFrame to a new CSV if needed
combined_df.to_csv("./combined_df_with_pdf_urls.csv", index=False)

In [19]:
# Find duplicate DOIs (returns a Series of True/False for each row)
duplicates = combined_df["DOI"].duplicated()

# Show rows with duplicate DOIs
duplicate_rows = combined_df[duplicates]

In [20]:
duplicate_rows

Unnamed: 0.1,Unnamed: 0,Title,Abstract,Authors,Keywords,DOI,ISSN,Publication Year,pdf url
21,21,SIMULATING THE EFFECT OF WORKERS' MOOD ON THE ...,"Production lines have various components, from...","Pakdamanian, E; Shiyamsunthar, N; Claudio, D","{'state', 'workplace', 'model', 'workload', 'e...",,0891-7736,2016,
30,30,EVALUATION ON ABSENTEEISM EFFECT IN PRODUCTION...,This paper studies the effect of absenteeism i...,"Desa, WLHM; Kamaruddin, S; Nawawi, MKM; Zulkep...","{'hybrid simulation', 'simulation', 'system dy...",,0127-9696,2015,
70,70,Postural Analysis Among Machinists Experiencin...,Work-related musculoskeletal disorders (WMSDs)...,"Necio, AJF; Batac, NEC; Odias, TMP; Ricafort, ...","{'ergonomics', 'machinists', 'steel manufactur...",,2157-3611,2019,
84,84,Construction Worker Workload Assessment for Hu...,Recent advances in robotics and artificial int...,"Okonkwo, C; Liang, XY; Rasheed, U; Awolusi, I;...",set(),,,2024,
88,88,Assessing Worker Health and Well-Being in Cons...,Increase in chronic diseases amongst the U.S. ...,"Trivedi, N; Yellapragada, M; Lin, KY",set(),,,2020,
...,...,...,...,...,...,...,...,...,...
723,723,Identification of interventions to improve emp...,Employee morale is a determinant of productivi...,Nur F.; Harrison D.; Deb S.; Burch V R.F.; Str...,"{'employee morale', 'employee-supervisor relat...",10.1080/23311916.2021.1914287,23311916,2021,https://www.tandfonline.com/doi/pdf/10.1080/23...
729,729,Human-Centered Design for Productivity and Saf...,"Nowadays, the current market trend is oriented...",Boschetti G.; Faccio M.; Granata I.,"{'industry 5.0', 'industry 4.0', 'safety', 'hu...",10.3390/electronics12010167,20799292,2023,https://www.mdpi.com/2079-9292/12/1/167/pdf?ve...
731,731,GBH-YOLOv5: Ghost Convolution with BottleneckC...,Photovoltaic (PV) panel surface-defect detecti...,Li L.; Wang Z.; Zhang T.,"{'bottleneckcsp', 'ghostconv', 'tiny target pr...",10.3390/electronics12030561,20799292,2023,https://www.mdpi.com/2079-9292/12/3/561/pdf?ve...
738,738,Critical Challenges of Quality Assurance of Cr...,Quality Assurance (QA) is a critical tool for ...,Ghansah F.A.; Lu W.,{'cross-border construction logistics and supp...,,26401177,2023,
