# Downloading Notre Dame Scholastic PDFs

From the [University Archives](http://archives.nd.edu/digital/):
- "The Scholastic, a student weekly, began in 1867 as The Scholastic Year. For most of its history it provided news about Notre Dame as well as feature articles, literary works, essays, and alumni notes."
- [Notre Dame Scholastic (student magazine), 1867 - 2011](http://archives.nd.edu/Scholastic/)

This Jupyter Notebook inclues codes + comments that downloads all issue PDFs, and also matches issue titles to file names.

# Import Libraries, Load URL, and Create Beautiful Soup Object

In [None]:
# import libraries
import os
import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup
import csv

In [None]:
# load url, create beautifulsoup object
page = requests.get('http://archives.nd.edu/Scholastic/Scholastic.htm')

soup = BeautifulSoup(page.text, 'html.parser')

# isolate HTML with 'ol' tag
url_names = soup.find('ol')

# find all instances of 'a' tag
items = url_names.find_all('a')

items

# Get List of Volume Links and Titles

In [None]:
# create empty list for urls
vol_url_list = []

# create empty list for volume titles
vol_title_list = []

# for loop that extracts href contents, concatenates full url, appends to url_list; extracts tag contents (volume title) and appends to title_list
for item in items:
    vol_url_list.append("http://archives.nd.edu" + item.get('href'))
    vol_title_list.append(item.contents[0])

In [None]:
# show list of volume urls
vol_url_list

In [None]:
# show list of volume titles
vol_title_list

# Get List of Issue Links and Titles

In [None]:
# create empty list for issue html elements
issue_items = []

# create empty list for a tags
a_tags = []

# for loop that loads each volume page as beautifulsoup object, extracts 'a' tag elements on each page, appends to issue_items list
for url in vol_url_list:
    try:
        single_page = requests.get(url)
        soup = BeautifulSoup(single_page.text, 'html.parser')
        a_tags.append(soup.find('ol'))
        for tag in a_tags:
            url_names = tag.find_all('a')
            issue_items.append(url_names)
    except:
        continue

In [None]:
# show sample issue_items value
issue_items[0]

In [None]:
# create empty list for issue urls
issue_url_list = []

# create empty list for issue titles
issue_title_list = []

# for loop that extracts href contents, appends to issue_url_list; extracts tag contents (issue title) and appends to issue_title_list
for issue in issue_items:
    for i in issue:
        issue_url_list.append(i.get('href'))
        issue_title_list.append(i.contents[1].string.strip())

In [None]:
# show issue url sample
issue_url_list[0]

In [None]:
# show issue title sample
issue_title_list[0]

In [None]:
# concatenate full issue url from issue_url_list
full_issue_url_list = []

for url in issue_url_list:
    if url.startswith("VOL_0001"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0001/" + url)
    elif url.startswith("VOL_0002"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0002/" + url)
    elif url.startswith("VOL_0003"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0003/" + url)
    elif url.startswith("VOL_0004"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0004/" + url)
    elif url.startswith("VOL_0005"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0005/" + url)
    elif url.startswith("VOL_0006"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0006/" + url)
    elif url.startswith("VOL_0007"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0007/" + url)
    elif url.startswith("VOL_0008"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0008/" + url)
    elif url.startswith("VOL_0009"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0009/" + url)
    elif url.startswith("VOL_0010"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0010/" + url)
    elif url.startswith("VOL_0011"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0011/" + url)
    elif url.startswith("VOL_0012"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0012/" + url)
    elif url.startswith("VOL_0013"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0013/" + url)
    elif url.startswith("VOL_0014"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0014/" + url)
    elif url.startswith("VOL_0015"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0015/" + url)
    elif url.startswith("VOL_0016"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0016/" + url)
    elif url.startswith("VOL_0017"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0017/" + url)
    elif url.startswith("VOL_0018"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0018/" + url)
    elif url.startswith("VOL_0019"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0019/" + url)
    elif url.startswith("VOL_0020"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0020/" + url)
    elif url.startswith("VOL_0021"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0021/" + url)
    elif url.startswith("VOL_0022"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0022/" + url)
    elif url.startswith("VOL_0023"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0023/" + url)        
    elif url.startswith("VOL_0024"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0024/" + url)
    elif url.startswith("VOL_0025"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0025/" + url)
    elif url.startswith("VOL_0026"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0026/" + url)
    elif url.startswith("VOL_0027"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0027/" + url)
    elif url.startswith("VOL_0028"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0028/" + url)
    elif url.startswith("VOL_0029"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0029/" + url)
    elif url.startswith("VOL_0030"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0030/" + url)
    elif url.startswith("VOL_0031"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0031/" + url)
    elif url.startswith("VOL_0032"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0032/" + url)
    elif url.startswith("VOL_0033"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0033/" + url)
    elif url.startswith("VOL_0034"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0034/" + url)        
    elif url.startswith("VOL_0035"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0035/" + url)
    elif url.startswith("VOL_0036"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0036/" + url)
    elif url.startswith("VOL_0037"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0037/" + url)
    elif url.startswith("VOL_0038"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0038/" + url)
    elif url.startswith("VOL_0039"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0039/" + url)
    elif url.startswith("VOL_0040"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0040/" + url)
    elif url.startswith("VOL_0041"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0041/" + url)
    elif url.startswith("VOL_0042"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0042/" + url)
    elif url.startswith("VOL_0043"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0043/" + url)
    elif url.startswith("VOL_0044"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0044/" + url)
    elif url.startswith("VOL_0045"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0045/" + url)
    elif url.startswith("VOL_0046"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0046/" + url)
    elif url.startswith("VOL_0047"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0047/" + url)
    elif url.startswith("VOL_0048"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0048/" + url)
    elif url.startswith("VOL_0049"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0049/" + url)
    elif url.startswith("VOL_0050"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0050/" + url)
    elif url.startswith("VOL_0051"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0051/" + url)
    elif url.startswith("VOL_0052"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0052/" + url)
    elif url.startswith("VOL_0053"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0053/" + url)
    elif url.startswith("VOL_0054"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0054/" + url)
    elif url.startswith("VOL_0055"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0055/" + url)
    elif url.startswith("VOL_0056"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0056/" + url)
    elif url.startswith("VOL_0057"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0057/" + url)
    elif url.startswith("VOL_0058"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0058/" + url)
    elif url.startswith("VOL_0059"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0059/" + url)
    elif url.startswith("VOL_0060"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0060/" + url)
    elif url.startswith("VOL_0061"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0061/" + url)
    elif url.startswith("VOL_0062"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0062/" + url)
    elif url.startswith("VOL_0063"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0063/" + url)
    elif url.startswith("VOL_0064"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0064/" + url)
    elif url.startswith("VOL_0065"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0065/" + url)
    elif url.startswith("VOL_0066"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0066/" + url)
    elif url.startswith("VOL_0067"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0067/" + url)        
    elif url.startswith("VOL_0068"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0068/" + url)
    elif url.startswith("VOL_0069"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0069/" + url)
    elif url.startswith("VOL_0070"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0070/" + url)
    elif url.startswith("VOL_0071"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0071/" + url)
    elif url.startswith("VOL_0072"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0072/" + url)
    elif url.startswith("VOL_0073"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0073/" + url)
    elif url.startswith("VOL_0074"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0074/" + url)
    elif url.startswith("VOL_0075"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0075/" + url)
    elif url.startswith("VOL_0076"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0076/" + url)
    elif url.startswith("VOL_0077"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0077/" + url)
    elif url.startswith("VOL_0078"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0078/" + url)        
    elif url.startswith("VOL_0079"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0079/" + url)
    elif url.startswith("VOL_0080"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0080/" + url)
    elif url.startswith("VOL_0081"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0081/" + url)
    elif url.startswith("VOL_0082"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0082/" + url)
    elif url.startswith("VOL_0083"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0083/" + url)
    elif url.startswith("VOL_0084"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0084/" + url)
    elif url.startswith("VOL_0085"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0085/" + url)
    elif url.startswith("VOL_0086"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0086/" + url)
    elif url.startswith("VOL_0087"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0087/" + url)
    elif url.startswith("VOL_0088"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0088/" + url)
    elif url.startswith("VOL_0089"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0089/" + url)        
    elif url.startswith("VOL_0090"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0090/" + url)
    elif url.startswith("VOL_0091"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0091/" + url)
    elif url.startswith("VOL_0092"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0092/" + url)
    elif url.startswith("VOL_0093"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0093/" + url)
    elif url.startswith("VOL_0094"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0094/" + url)
    elif url.startswith("VOL_0095"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0095/" + url)
    elif url.startswith("VOL_0096"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0096/" + url)
    elif url.startswith("VOL_0097"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0097/" + url)
    elif url.startswith("VOL_0098"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0098/" + url)
    elif url.startswith("VOL_0099"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0099/" + url)
    elif url.startswith("VOL_0100"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0100/" + url)
    elif url.startswith("VOL_0101"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0101/" + url)
    elif url.startswith("VOL_0102"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0102/" + url)
    elif url.startswith("VOL_0103"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0103/" + url)
    elif url.startswith("VOL_0104"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0104/" + url)
    elif url.startswith("VOL_0105"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0105/" + url)
    elif url.startswith("VOL_0106"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0106/" + url)
    elif url.startswith("VOL_0107"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0107/" + url)
    elif url.startswith("VOL_0108"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0108/" + url)
    elif url.startswith("VOL_0109"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0109/" + url)
    elif url.startswith("VOL_0110"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0110/" + url)
    elif url.startswith("VOL_0111"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0111/" + url)        
    elif url.startswith("VOL_0112"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0112/" + url)
    elif url.startswith("VOL_0113"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0113/" + url)
    elif url.startswith("VOL_0114"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0114/" + url)
    elif url.startswith("VOL_0115"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0115/" + url)
    elif url.startswith("VOL_0116"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0116/" + url)
    elif url.startswith("VOL_0117"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0117/" + url)
    elif url.startswith("VOL_0118"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0118/" + url)
    elif url.startswith("VOL_0119"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0119/" + url)
    elif url.startswith("VOL_0120"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0120/" + url)
    elif url.startswith("VOL_0121"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0121/" + url)
    elif url.startswith("VOL_0122"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0122/" + url)        
    elif url.startswith("VOL_0123"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0123/" + url)
    elif url.startswith("VOL_0124"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0124/" + url)
    elif url.startswith("VOL_0125"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0125/" + url)
    elif url.startswith("VOL_0126"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0126/" + url)
    elif url.startswith("VOL_0127"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0127/" + url)
    elif url.startswith("VOL_0128"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0128/" + url)
    elif url.startswith("VOL_0129"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0129/" + url)
    elif url.startswith("VOL_0130"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0130/" + url)
    elif url.startswith("VOL_0131"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0131/" + url)
    elif url.startswith("VOL_0132"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0132/" + url)
    elif url.startswith("VOL_0133"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0133/" + url)
    elif url.startswith("VOL_0134"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0134/" + url)
    elif url.startswith("VOL_0135"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0135/" + url)
    elif url.startswith("VOL_0136"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0136/" + url)
    elif url.startswith("VOL_0137"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0137/" + url)
    elif url.startswith("VOL_0138"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0138/" + url)
    elif url.startswith("VOL_0139"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0139/" + url)
    elif url.startswith("VOL_0140"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0140/" + url)
    elif url.startswith("VOL_0141"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0141/" + url)
    elif url.startswith("VOL_0142"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0142/" + url)
    elif url.startswith("VOL_0143"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0143/" + url)
    elif url.startswith("VOL_0144"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0144/" + url)
    elif url.startswith("VOL_0145"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0145/" + url)
    elif url.startswith("VOL_0146"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0146/" + url)
    elif url.startswith("VOL_0147"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0147/" + url)
    elif url.startswith("VOL_0148"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0148/" + url)
    elif url.startswith("VOL_0149"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0149/" + url)
    elif url.startswith("VOL_0150"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0150/" + url)
    elif url.startswith("VOL_0151"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0151/" + url)
    elif url.startswith("VOL_0152"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0152/" + url)
    elif url.startswith("VOL_0153"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0153/" + url)
    elif url.startswith("VOL_0154"):
        full_issue_url_list.append("http://archives.nd.edu/Scholastic/VOL_0154/" + url)

In [None]:
# sample full issue url
full_issue_url_list[0]

# Download PDFs from List of Full URLs

In [None]:
# import libraries
import urllib3
import os

# configure urllib
http = urllib3.PoolManager()
print("downloading with urllib")

# for loop that downloads PDF for each url in full_href_list
for url in full_issue_url_list:
    r = http.request('GET', url)
    filename = os.path.basename(url)
    with open (filename, 'wb') as fcont:
        fcont.write(r.data)

# Matching File Names and Volume/Issue Info

In [None]:
# create concatenated list with full issue title
full_issue_title = []

for title in issue_title_list:
    full_issue_title.append("Notre Dame Scholastic, " + title)

# show sample issue title
full_issue_title[0]

In [None]:
# import pandas
import pandas as pd

# create empty dataframe with two columns
df = pd.DataFrame(columns=['file_name', 'title'])

# append issue_url_list to file_name column
df['file_name'] = issue_url_list

# append full_issue_title values to title column
df['title'] = full_issue_title

# show updated dataframe
df

In [None]:
# write dataframe to csv file
df.to_csv("scholastic_file_name_master.csv", index=False)