In [2]:
# for each journal in each publisher, find links to issues for a specific period of time (here it is set from 2017 to 2021)
# if there is no issue, the dataframe is filled with None value
# save the links in Journals[Publisher].csv

# imports
import pandas as pd
from bs4 import BeautifulSoup
import time

pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.precision', 3)

In [16]:
publishers = ["Wiley", "Springer", "Taylor", "Elsevier"]
publishers = ["Wiley"]

start_year = 2017
end_year = 2021

for publisher in publishers:
    print("Parsing publisher", publisher, "...")
    df = pd.read_csv("Journals" + publisher + ".csv")
    df = df.drop(columns=['links_2017', 'links_2018', 'links_2019', 'links_2020', 'links_2021'])
    display(df)
    journal_ids = df["Journal_Id"].to_numpy()
    print(journal_ids)
    
    links_journals =[] # list of dictionaries
    
    for journal_id in journal_ids:
        print(journal_id)
        # print(publisher.lower() + "/journal" + str(journal_id) + "/main_page" + ".txt")
        content = open(publisher.lower() + "/journal" + str(journal_id) + "/main_page" + ".txt", "r")
        soup = BeautifulSoup(content, 'html.parser')
        content.close()
        
        # get link of 1 issue of every year from start_year to end_year
        data = soup.find("table", {"class": "journal_issues"})
        # print(data.prettify())
        links = data.find_all('a')
        final_links = {}
        for year in range(start_year, end_year + 1):
            final_links[year] = None
        for i in range(len(links)):
            links[i] = links[i].get('href')
            for year in range(start_year, end_year + 1): # check in the good range of years
                if "year=" + str(year) + "&volume" in links[i]: # check it's not the link for the whole year
                    if "issue" in links[i].split('&')[-1]: # check it has an issue
                        if final_links[year] == None:
                            final_links[year] = links[i]
        # add to list
        links_journals.append(final_links)
    
    # add to dataframe
    for year in range(start_year, end_year + 1): 
        list_links = []
        for links in links_journals: # for each dictionary (1 dictionary for each journal)
            list_links.append(links[year]) # append link corresponding to the good year
        df["links_" + str(year)] = pd.Series(list_links)
    
    display(df)
    df.to_csv("Journals" + publisher + ".csv", index=False)


Parsing publisher Wiley ...


Unnamed: 0,Journal,Journal_Id,Publisher,Link,Link_Articles,Number_Pages
0,Abacus,23,John Wiley and Sons,/scimag/journals/23,/scimag/?journal=23&page=1,42
1,About Campus,30,John Wiley and Sons,/scimag/journals/30,/scimag/?journal=30&page=1,43
2,Academic Emergency Medicine,37,John Wiley and Sons,/scimag/journals/37,/scimag/?journal=37&page=1,285
3,Accounting & Finance,56,John Wiley and Sons,/scimag/journals/56,/scimag/?journal=56&page=1,60
4,Acta Anaesthesiologica Scandinavica,193,John Wiley and Sons,/scimag/journals/193,/scimag/?journal=193&page=1,441
...,...,...,...,...,...,...
1325,ZAMM - Journal of Applied Mathematics and Mech...,25815,John Wiley and Sons,/scimag/journals/25815,/scimag/?journal=25815&page=1,757
1326,Zoo Biology,25978,John Wiley and Sons,/scimag/journals/25978,/scimag/?journal=25978&page=1,94
1327,Zoologica Scripta,25982,John Wiley and Sons,/scimag/journals/25982,/scimag/?journal=25982&page=1,72
1328,Zoological Journal of the Linnean Society,25985,John Wiley and Sons,/scimag/journals/25985,/scimag/?journal=25985&page=1,205


[   23    30    37 ... 25982 25985 26003]
23
30
37
56
193
202
207
213
238
264
288
304
306
317
328
356
358
363
365
390
423
424
462
465
466
469
479
626
681
685
686
697
723
794
796
804
813
913
915
933
935
940
941
977
990
1054
1071
1073
1077
1083
1084
1085
1086
1112
1119
1122
1128
1144
1188
1215
1217
1245
1248
1261
1274
1290
1294
1310
1324
1404
1447
1462
1465
1482
1497
1524
1616
1626
1645
1662
1665
1710
1727
1758
1764
1777
1795
1818
1819
1824
1841
1865
1871
1876
1885
1961
1976
2031
2045
2071
2083
2110
2128
2138
2151
2254
2260
2163
2165
2186
2191
2216
2246
2277
2289
2290
2380
2382
2390
2397
2400
2401
2405
2406
2417
2418
2419
2421
2425
2428
2436
2441
2454
2456
2457
2460
2471
2475
2481
2488
2506
2595
2605
2611
2613
2616
2638
2644
2681
2692
2787
2791
2792
2810
2836
2838
2857
2892
2918
2919
2933
2973
2985
2991
3001
3006
3007
3008
3009
3014
3109
3149
3161
3187
3263
3264
3266
3267
3270
3272
3276
3277
3278
3283
3284
3285
3287
3300
3307
3310
3313
3343
3409
3425
3537
3542
3550
3551
3655
3670
3695
37

Unnamed: 0,Journal,Journal_Id,Publisher,Link,Link_Articles,Number_Pages,links_2017,links_2018,links_2019,links_2020,links_2021
0,Abacus,23,John Wiley and Sons,/scimag/journals/23,/scimag/?journal=23&page=1,42,/scimag/?journal=23&year=2017&volume=53&issue=1,/scimag/?journal=23&year=2018&volume=54&issue=1,/scimag/?journal=23&year=2019&volume=55&issue=1,/scimag/?journal=23&year=2020&volume=56&issue=1,
1,About Campus,30,John Wiley and Sons,/scimag/journals/30,/scimag/?journal=30&page=1,43,/scimag/?journal=30&year=2017&volume=21&issue=6,/scimag/?journal=30&year=2018&volume=22&issue=6,/scimag/?journal=30&year=2019&volume=23&issue=6,/scimag/?journal=30&year=2020&volume=24&issue=6,
2,Academic Emergency Medicine,37,John Wiley and Sons,/scimag/journals/37,/scimag/?journal=37&page=1,285,/scimag/?journal=37&year=2017&volume=24&issue=,/scimag/?journal=37&year=2018&volume=25&issue=S1,/scimag/?journal=37&year=2019&volume=26&issue=S1,/scimag/?journal=37&year=2020&volume=27&issue=S1,
3,Accounting & Finance,56,John Wiley and Sons,/scimag/journals/56,/scimag/?journal=56&page=1,60,/scimag/?journal=56&year=2017&volume=57&issue=,/scimag/?journal=56&year=2018&volume=58&issue=,/scimag/?journal=56&year=2019&volume=58&issue=5,/scimag/?journal=56&year=2020&volume=60&issue=S1,
4,Acta Anaesthesiologica Scandinavica,193,John Wiley and Sons,/scimag/journals/193,/scimag/?journal=193&page=1,441,/scimag/?journal=193&year=2017&volume=61&issue=1,/scimag/?journal=193&year=2018&volume=62&issue=1,/scimag/?journal=193&year=2019&volume=63&issue=1,/scimag/?journal=193&year=2020&volume=64&issue=1,/scimag/?journal=193&year=2021&volume=65&issue=1
...,...,...,...,...,...,...,...,...,...,...,...
1325,ZAMM - Journal of Applied Mathematics and Mech...,25815,John Wiley and Sons,/scimag/journals/25815,/scimag/?journal=25815&page=1,757,/scimag/?journal=25815&year=2017&volume=97&iss...,/scimag/?journal=25815&year=2018&volume=98&iss...,/scimag/?journal=25815&year=2019&volume=99&iss...,/scimag/?journal=25815&year=2020&volume=100&is...,
1326,Zoo Biology,25978,John Wiley and Sons,/scimag/journals/25978,/scimag/?journal=25978&page=1,94,/scimag/?journal=25978&year=2017&volume=36&iss...,/scimag/?journal=25978&year=2018&volume=37&iss...,/scimag/?journal=25978&year=2019&volume=38&iss...,/scimag/?journal=25978&year=2020&volume=39&iss...,
1327,Zoologica Scripta,25982,John Wiley and Sons,/scimag/journals/25982,/scimag/?journal=25982&page=1,72,/scimag/?journal=25982&year=2017&volume=46&iss...,/scimag/?journal=25982&year=2018&volume=47&iss...,/scimag/?journal=25982&year=2019&volume=48&iss...,/scimag/?journal=25982&year=2020&volume=49&iss...,
1328,Zoological Journal of the Linnean Society,25985,John Wiley and Sons,/scimag/journals/25985,/scimag/?journal=25985&page=1,205,/scimag/?journal=25985&year=2017&volume=179&is...,/scimag/?journal=25985&year=2018&volume=182&is...,/scimag/?journal=25985&year=2019&volume=185&is...,/scimag/?journal=25985&year=2020&volume=188&is...,
