# Web Scraping using Python

## Target Journal: JMRI

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#To easily display the plots, make sure to include the line %matplotlib inline as shown below.
%matplotlib inline

import urllib
import requests
import lxml

#To perform web scraping, you should also import the libraries shown below. 
#The urllib.request module is used to open URLs. 
#The Beautiful Soup package is used to extract data from html files. 
#The Beautiful Soup library's name is bs4 which stands for Beautiful Soup, version 4.
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup

## First, save as a local html file
 - See: https://zetcode.com/python/beautifulsoup/

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# JMRI Vol 54 No 1
# URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_Vol 54, No 1.html'
# JMRI Vol 54 No 2
# URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_ Vol 54, No 2.html'
# JMRI Vol 54 No 3
# URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_ Vol 54, No 3.html'
# Current: Vol 54 No 4
URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_ Vol 54, No 4.html'

with open(URL, 'r') as f:

    contents = f.read()

    soup = BeautifulSoup(contents, 'lxml')

Find the corresponding index, and identify the title info

In [4]:
def safeOpenParsePage(targetUrl):
    try:
        # tmpurl = urlopen(Request(targetUrl, headers={'User-Agent': 'Chrome/92.0.4515.107'}))
        tmpurl = open(targetUrl, 'r')
        tmpR = tmpurl.read()
        # tmpSoup = BeautifulSoup(tmpR, 'html.parser')
        tmpSoup = BeautifulSoup(tmpR, 'lxml')
        return tmpSoup
    except urllib.error.HTTPError as e:
        print(e)
        return None

soupJMRI = safeOpenParsePage(URL)
if soupJMRI is not None:
      print(soupJMRI.prettify())

<!DOCTYPE html>
<html class="pb-page" data-request-id="59a6411c-3f54-4364-810d-569f570a63c4" lang="en">
 <head data-pb-dropzone="head">
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content=";journal:journal:15222586;ctype:string:Journal Content;website:website:pericles;page:string:Table of Contents;requestedJournal:journal:15222586;wgroup:string:Publication Websites;issue:issue:doi\:10.1002/jmri.v54.4;pageGroup:string:Publication Pages" name="pbContext"/>
  <script type="text/javascript">
   var $DoubleClickZone = "j-magres-imaging_jmri";var $DoubleClickSite =  "wly.radiol.imag_000105";
  </script>
  <script id="analyticDigitalData">
   digitalData = {"site":{"ip":"152.78.0.24","environment":"LIVE","website":"onlinelibrary.wiley.com","websiteCode":"pericles","serverDate":"2021-09-26"},"identities":[{"type":"BasicGroup","uuid":"e623deab-c83b-41e3-84a2-d9f1d46c5f17"},{"type":"InstitutionUser","uuid":"58cbd8e6-3c5d-4fc0-ac26-57c4792d2c4b","customerRecords":[{"customerD

In [5]:
# Get the title
title = soupJMRI.title
print(title)

<title> Journal of Magnetic Resonance Imaging: Vol 54, No 4</title>


In [6]:
# Print out the text
text = soupJMRI.get_text()
print(soup.text)

# Another way to extract text
# str(all_links[103]).split("<h2>")[1].replace("</h2></a>", "")



var $DoubleClickZone = "j-magres-imaging_jmri";var $DoubleClickSite =  "wly.radiol.imag_000105";digitalData = {"site":{"ip":"152.78.0.24","environment":"LIVE","website":"onlinelibrary.wiley.com","websiteCode":"pericles","serverDate":"2021-09-26"},"identities":[{"type":"BasicGroup","uuid":"e623deab-c83b-41e3-84a2-d9f1d46c5f17"},{"type":"InstitutionUser","uuid":"58cbd8e6-3c5d-4fc0-ac26-57c4792d2c4b","customerRecords":[{"customerDomain":"ALM-CU","customerNumber":"EALWB000204"}]},{"type":"SmartGroupUser","uuid":"cf9c85b9-7f3b-4309-aeaa-c41669cabe3b"},{"type":"SmartGroupUser","uuid":"db8b8d10-1aed-4809-b3ee-70938c569014"},{"type":"SmartGroupUser","uuid":"cdce41c2-a8de-4a64-a4d9-a85caaaa2314"},{"type":"InstitutionUser","uuid":"bfa4ccf8-8126-488a-b090-da278aa7b4d9","customerRecords":[{"customerDomain":"ALM-CU","customerNumber":"ALOG00017"}]},{"type":"BasicGroup","uuid":"6a17ff38-0daf-4f0a-b4eb-b42c69d5c12f","customerRecords":[{"customerDomain":"ALM-CU","customerNumber":"CORE00000007783"}]},

In [7]:
all_links = soupJMRI.find_all('a')

### Explore Category

In [8]:
# h3 class corresponds to editorial

all_h3 = soupJMRI.find_all("h3")

i = 0
for h3 in all_h3:
  # print(h3)
  print(h3.get_text())
  i += 1
print(i)

Menu
Format
Type of import
Cover Image
Issue Information
Review Article
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Commentary
Letter to the Editor
About Wiley Online Library
Help & Support
Opportunities
Connect with Wiley
40


### Explore Original Research Subcategory

In [9]:
# h4 class corresponds to category

all_h4 = soupJMRI.find_all("h4")

i = 0
for h4 in all_h4:
  # print(h4)
  print(h4.get_text())
  i += 1
print(i)

Neuro
Neuro
Vascular
Pediatrics
Abdomen
Abdomen
Abdomen
Technical
Pelvis
Cardiac
Cardiac
Cardiac
Breast
Musculoskeletal
Musculoskeletal
Head and Neck
Physics
17


In [10]:
all_h2 = soupJMRI.find_all("h2")

i = 0
for h2 in all_h2:
  h2_len = len(h2.get_text().split())
  if h2_len > 3:
    print(h2.get_text())
    i += 1
print(i)

# str_h2 = str(all_h2)

# links = soupJMRI.find_all("a")
# str_links = str(links)
# print(str_h2)

# cleantext = BeautifulSoup(str_h2, "lxml").get_text()
# cleantext = BeautifulSoup(str_links, "lxml").get_text()
# print(cleantext)

T2 Relaxometry Evidence of Microstructural Changes in Diffusely Abnormal White Matter in Relapsing–Remitting Multiple Sclerosis and Clinically Isolated Syndrome: Impact on Visuomotor Performance
MRI of Temporomandibular Joint Disorders: Recent Advances and Future Directions
Multi-Parametric Evaluation of Cerebral Hemodynamics in Neonatal Piglets Using Non-Contrast-Enhanced Magnetic Resonance Imaging Methods
Tissue Probability Based Registration of Diffusion-Weighted Magnetic Resonance Imaging
T2 Relaxometry Evidence of Microstructural Changes in Diffusely Abnormal White Matter in Relapsing–Remitting Multiple Sclerosis and Clinically Isolated Syndrome: Impact on Visuomotor Performance
One-Minute Multi-contrast Echo Planar Brain MRI in Ischemic Stroke: A Retrospective Observational Study of Diagnostic Performance
Editorial for “One-Minute Multi-Contrast Echo Planar Brain MRI in Ischemic Stroke - A Retrospective Observational Study of Diagnostic Performance”
Serum Ceruloplasmin Depletion 

## Next, clean up the title list

In [11]:
list_h2 = []
for h2 in all_h2:
  h2_len = len(h2.get_text().split())
  if h2_len > 4:
    cells = h2.get_text()
    list_h2.append(cells)

# Delete the last element: "Log in to Wiley Online Library"
list_h2.pop()

# Insert "Issue Information" as the 2nd element
list_h2.insert(1, "Issue Information")

# JMRI Vol 54 No 2
# list_h2.append("Erratum")

# JMRI Vol 54 No 3
# list_h2.insert(2, "Commentary")
# list_h2.append("Condensation Artifact")
# list_h2.append("Reviewer Acknowledgements")

for l in list_h2:
  print(l)
print(len(list_h2))

T2 Relaxometry Evidence of Microstructural Changes in Diffusely Abnormal White Matter in Relapsing–Remitting Multiple Sclerosis and Clinically Isolated Syndrome: Impact on Visuomotor Performance
Issue Information
MRI of Temporomandibular Joint Disorders: Recent Advances and Future Directions
Multi-Parametric Evaluation of Cerebral Hemodynamics in Neonatal Piglets Using Non-Contrast-Enhanced Magnetic Resonance Imaging Methods
Tissue Probability Based Registration of Diffusion-Weighted Magnetic Resonance Imaging
T2 Relaxometry Evidence of Microstructural Changes in Diffusely Abnormal White Matter in Relapsing–Remitting Multiple Sclerosis and Clinically Isolated Syndrome: Impact on Visuomotor Performance
One-Minute Multi-contrast Echo Planar Brain MRI in Ischemic Stroke: A Retrospective Observational Study of Diagnostic Performance
Editorial for “One-Minute Multi-Contrast Echo Planar Brain MRI in Ischemic Stroke - A Retrospective Observational Study of Diagnostic Performance”
Serum Cerulo

### Add dates first published

In [12]:
# li class: corresponds to "First Published"

all_li = soupJMRI.find_all("li")

# i = 0
# for li in all_li:
  # li_class = li.get_attribute_list("class")
  # print(li_class)
  # if li_class == ['ePubDate']:
    # print(li.get_text())
    # i += 1
  # print(li.get_text())
  # i += 1
# print(i)

list_date = []
for li in all_li:
  li_class = li.get_attribute_list("class")
  if li_class == ['ePubDate']:
    cells = li.get_text().split(': ')[1]
    list_date.append(cells)

for l in list_date:
  print(l)
print(len(list_date))

14 September 2021
14 September 2021
31 August 2020
06 May 2021
24 April 2021
06 May 2021
04 May 2021
11 May 2021
05 May 2021
11 May 2021
15 May 2021
22 May 2021
04 May 2021
13 April 2021
05 May 2021
26 March 2021
06 May 2021
29 March 2021
20 April 2021
28 March 2021
26 March 2021
06 May 2021
15 May 2021
17 May 2021
10 May 2021
28 May 2021
21 March 2021
24 March 2021
13 May 2021
19 March 2021
13 June 2021
06 April 2021
05 May 2021
13 June 2021
17 July 2021
24 April 2021
12 May 2021
22 May 2021
05 May 2021
06 May 2021
17 May 2021
18 May 2021
05 May 2021
13 May 2021
24 May 2021
13 June 2021
46


In [13]:
list_url = []
for link in all_links:
  if 'visitable' in str(link.get("class")): # and '/doi/' in link.get("href"):
    cells = link.get("href").replace('/doi', 'https://doi.org')
    list_url.append(cells)

for l in list_url:
  print(l)
print(len(list_url))

https://doi.org/10.1002/jmri.27233
https://doi.org/10.1002/jmri.27234
https://doi.org/10.1002/jmri.27338
https://doi.org/10.1002/jmri.27638
https://doi.org/10.1002/jmri.27654
https://doi.org/10.1002/jmri.27661
https://doi.org/10.1002/jmri.27641
https://doi.org/10.1002/jmri.27685
https://doi.org/10.1002/jmri.27680
https://doi.org/10.1002/jmri.27694
https://doi.org/10.1002/jmri.27657
https://doi.org/10.1002/jmri.27736
https://doi.org/10.1002/jmri.27656
https://doi.org/10.1002/jmri.27606
https://doi.org/10.1002/jmri.27673
https://doi.org/10.1002/jmri.27609
https://doi.org/10.1002/jmri.27686
https://doi.org/10.1002/jmri.27611
https://doi.org/10.1002/jmri.27643
https://doi.org/10.1002/jmri.27612
https://doi.org/10.1002/jmri.27616
https://doi.org/10.1002/jmri.27679
https://doi.org/10.1002/jmri.27684
https://doi.org/10.1002/jmri.27676
https://doi.org/10.1002/jmri.27678
https://doi.org/10.1002/jmri.27747
https://doi.org/10.1002/jmri.27602
https://doi.org/10.1002/jmri.27603
https://doi.org/10.1

In [14]:
# df = pd.DataFrame({'title':list_h2, 'url': list_url})
df = pd.DataFrame({'Journal': title, 
                   #'Category': list_category, 
                   'Title': list_h2, 
                   'First Published': list_date, 
                   'DOI': list_url})
df

Unnamed: 0,Journal,Title,First Published,DOI
0,Journal of Magnetic Resonance Imaging: Vol 54...,T2 Relaxometry Evidence of Microstructural Cha...,14 September 2021,https://doi.org/10.1002/jmri.27233
1,Journal of Magnetic Resonance Imaging: Vol 54...,Issue Information,14 September 2021,https://doi.org/10.1002/jmri.27234
2,Journal of Magnetic Resonance Imaging: Vol 54...,MRI of Temporomandibular Joint Disorders: Rece...,31 August 2020,https://doi.org/10.1002/jmri.27338
3,Journal of Magnetic Resonance Imaging: Vol 54...,Multi-Parametric Evaluation of Cerebral Hemody...,06 May 2021,https://doi.org/10.1002/jmri.27638
4,Journal of Magnetic Resonance Imaging: Vol 54...,Tissue Probability Based Registration of Diffu...,24 April 2021,https://doi.org/10.1002/jmri.27654
5,Journal of Magnetic Resonance Imaging: Vol 54...,T2 Relaxometry Evidence of Microstructural Cha...,06 May 2021,https://doi.org/10.1002/jmri.27661
6,Journal of Magnetic Resonance Imaging: Vol 54...,One-Minute Multi-contrast Echo Planar Brain MR...,04 May 2021,https://doi.org/10.1002/jmri.27641
7,Journal of Magnetic Resonance Imaging: Vol 54...,Editorial for “One-Minute Multi-Contrast Echo ...,11 May 2021,https://doi.org/10.1002/jmri.27685
8,Journal of Magnetic Resonance Imaging: Vol 54...,Serum Ceruloplasmin Depletion is Associated Wi...,05 May 2021,https://doi.org/10.1002/jmri.27680
9,Journal of Magnetic Resonance Imaging: Vol 54...,Editorial for “Serum Ceruloplasmin Depletion i...,11 May 2021,https://doi.org/10.1002/jmri.27694


## Third, manually create the list of category based on Issue Information
 - See [here](https://stackoverflow.com/questions/4654414/python-append-item-to-list-n-times) for extending elements in a list for X times

In [20]:
# list_category = ['Cover Image', 'Issue Information', 'Commentary']

### Vol 54 No 1 ###
# list_category.extend(['Review Articles'] * 3)
# list_category.extend(['Original Research: Head and Neck', 'Editorial'] * 2)
# list_category.extend(['Original Research: Pelvis'] * 1)
# list_category.extend(['Original Research: Abdomen'] * 1)
# list_category.extend(['Original Research: Abdomen', 'Editorial'] * 2)
# list_category.extend(['Original Research: Musculoskeletal', 'Editorial'] * 1)
# list_category.extend(['Original Research: Musculoskeletal'] * 1)
# list_category.extend(['Original Research: Vascular'] * 1)
# list_category.extend(['Original Research: Vascular', 'Editorial'] * 1)
# list_category.extend(['Original Research: Neuro'] * 3)
# list_category.extend(['Original Research: Neuro', 'Editorial'] * 3)
# list_category.extend(['Original Research: Breast', 'Editorial'] * 1)
# list_category.extend(['Original Research: Pediatrics', 'Editorial'] * 1)
# list_category.extend(['Original Research: Cardiac'] * 2)
# list_category.extend(['Original Research: Cardiac', 'Editorial'] * 2)
# list_category.extend(['Original Research: Safety', 'Editorial'] * 1)
# list_category.extend(['Letter to the Editor'] * 1)

### Vol 54 No 2 ###
# list_category.extend(['CME Article'] * 1)
# list_category.extend(['Review Articles'] * 3)
# list_category.extend(['Original Research: Whole Body', 'Editorial'] * 1)
# list_category.extend(['Original Research: Cardiac'] * 4)
# list_category.extend(['Original Research: Pelvis', 'Editorial'] * 2)
# list_category.extend(['Original Research: Technical', 'Editorial'] * 1)
# list_category.extend(['Original Research: Musculoskeletal'] * 2)
# list_category.extend(['Original Research: Abdomen'] * 3)
# list_category.extend(['Original Research: Abdomen', 'Editorial'] * 1)
# list_category.extend(['Original Research: Neuro'] * 2)
# list_category.extend(['Original Research: Neuro', 'Editorial'] * 4)
# list_category.extend(['Original Research: Thoracic', 'Editorial'] * 1)
# list_category.extend(['Original Research: Breast'] * 1)
# list_category.extend(['Original Research: Vascular'] * 1)
# list_category.extend(['Original Research: Vascular', 'Editorial'] * 1)
# list_category.extend(['Original Research: Case Report'] * 1)
# list_category.extend(['Erratum'] * 1)

### Vol 54 No 3 ###
# list_category.extend(['Review Articles'] * 1)
# list_category.extend(['Original Research: Breast'] * 1)
# list_category.extend(['Original Research: Abdomen'] * 2)
# list_category.extend(['Original Research: Abdomen', 'Editorial'] * 3)
# list_category.extend(['Original Research: Vascular', 'Editorial'] * 1)
# list_category.extend(['Original Research: Cardiac'] * 2)
# list_category.extend(['Original Research: Cardiac', 'Editorial'] * 2)
# list_category.extend(['Original Research: Pediatrics', 'Editorial'] * 1)
# list_category.extend(['Original Research: Musculoskeletal'] * 1)
# list_category.extend(['Original Research: Musculoskeletal', 'Editorial'] * 1)
# list_category.extend(['Original Research: Head and Neck'] * 1)
# list_category.extend(['Original Research: Neuro'] * 2)
# list_category.extend(['Original Research: Neuro', 'Editorial'] * 6)
# list_category.extend(['Original Research: Thoracic'] * 1)
# list_category.extend(['Original Research: Pelvis', 'Editorial'] * 2)
# list_category.extend(['Original Research: Technical'] * 1)
# list_category.extend(['Original Research: Technical', 'Editorial'] * 1)
# list_category.extend(['Case Report: Technical'] * 1)
# list_category.extend(['Reviewer Appreciation'] * 1)

### Vol 54 No 4 ###
list_category = ['Cover Image', 'Issue Information']
list_category.extend(['Review Articles'] * 1)
list_category.extend(['Original Research: Neuro'] * 3)
list_category.extend(['Original Research: Neuro', 'Editorial'] * 2)
list_category.extend(['Original Research: Vascular', 'Editorial'] * 1)
list_category.extend(['Original Research: Pediatrics'] * 1)
list_category.extend(['Original Research: Abdomen'] * 2)
list_category.extend(['Original Research: Abdomen', 'Editorial'] * 3)
list_category.extend(['Original Research: Technical'] * 1)
list_category.extend(['Original Research: Pelvis'] * 2)
list_category.extend(['Original Research: Pelvis', 'Editorial'] * 1)
list_category.extend(['Original Research: Cardiac'] * 1)
list_category.extend(['Original Research: Cardiac', 'Editorial'] * 3)
list_category.extend(['Original Research: Breast', 'Editorial'] * 1)
list_category.extend(['Original Research: Musculoskeletal'] * 1)
list_category.extend(['Original Research: Musculoskeletal', 'Editorial'] * 2)
list_category.extend(['Original Research: Head and Neck'] * 2)
list_category.extend(['Original Research: Physics', 'Editorial'] * 1)
list_category.extend(['Commentary'] * 1)
list_category.extend(['Letter to the Editor'] * 1)

i = 0
for l in list_category:
  print(l)
  i += 1
print(i)

Cover Image
Issue Information
Review Articles
Original Research: Neuro
Original Research: Neuro
Original Research: Neuro
Original Research: Neuro
Editorial
Original Research: Neuro
Editorial
Original Research: Vascular
Editorial
Original Research: Pediatrics
Original Research: Abdomen
Original Research: Abdomen
Original Research: Abdomen
Editorial
Original Research: Abdomen
Editorial
Original Research: Abdomen
Editorial
Original Research: Technical
Original Research: Pelvis
Original Research: Pelvis
Original Research: Pelvis
Editorial
Original Research: Cardiac
Original Research: Cardiac
Editorial
Original Research: Cardiac
Editorial
Original Research: Cardiac
Editorial
Original Research: Breast
Editorial
Original Research: Musculoskeletal
Original Research: Musculoskeletal
Editorial
Original Research: Musculoskeletal
Editorial
Original Research: Head and Neck
Original Research: Head and Neck
Original Research: Physics
Editorial
Commentary
Letter to the Editor
46


In [21]:
df.insert(1, 'Category', list_category)
df

Unnamed: 0,Journal,Category,Title,First Published,DOI
0,Journal of Magnetic Resonance Imaging: Vol 54...,Cover Image,T2 Relaxometry Evidence of Microstructural Cha...,14 September 2021,https://doi.org/10.1002/jmri.27233
1,Journal of Magnetic Resonance Imaging: Vol 54...,Issue Information,Issue Information,14 September 2021,https://doi.org/10.1002/jmri.27234
2,Journal of Magnetic Resonance Imaging: Vol 54...,Review Articles,MRI of Temporomandibular Joint Disorders: Rece...,31 August 2020,https://doi.org/10.1002/jmri.27338
3,Journal of Magnetic Resonance Imaging: Vol 54...,Original Research: Neuro,Multi-Parametric Evaluation of Cerebral Hemody...,06 May 2021,https://doi.org/10.1002/jmri.27638
4,Journal of Magnetic Resonance Imaging: Vol 54...,Original Research: Neuro,Tissue Probability Based Registration of Diffu...,24 April 2021,https://doi.org/10.1002/jmri.27654
5,Journal of Magnetic Resonance Imaging: Vol 54...,Original Research: Neuro,T2 Relaxometry Evidence of Microstructural Cha...,06 May 2021,https://doi.org/10.1002/jmri.27661
6,Journal of Magnetic Resonance Imaging: Vol 54...,Original Research: Neuro,One-Minute Multi-contrast Echo Planar Brain MR...,04 May 2021,https://doi.org/10.1002/jmri.27641
7,Journal of Magnetic Resonance Imaging: Vol 54...,Editorial,Editorial for “One-Minute Multi-Contrast Echo ...,11 May 2021,https://doi.org/10.1002/jmri.27685
8,Journal of Magnetic Resonance Imaging: Vol 54...,Original Research: Neuro,Serum Ceruloplasmin Depletion is Associated Wi...,05 May 2021,https://doi.org/10.1002/jmri.27680
9,Journal of Magnetic Resonance Imaging: Vol 54...,Editorial,Editorial for “Serum Ceruloplasmin Depletion i...,11 May 2021,https://doi.org/10.1002/jmri.27694


## Save as csv

In [23]:
df.to_csv(path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri-vol-54-no-4.csv',
          # path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri-vol-54-no-3.csv',
          # path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri-vol-54-no-2.csv',
          # path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri-vol-54-no-1.csv', 
          index=False)

## Save as xlsx file
 - See [here](https://xlsxwriter.readthedocs.io/working_with_pandas.html) for instruction

In [None]:
# !pip install xlsxwriter
import xlsxwriter

# (Comment out after saving first sheet) Create an ExcelWriter object
# writer = pd.ExcelWriter('/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri-all-summary.xlsx', engine='xlsxwriter')

In [None]:
df.to_excel(excel_writer=writer,
            # sheet_name='jmri-vol-54-no-1',
            # sheet_name='jmri-vol-54-no-2',
            sheet_name='jmri-vol-54-no-3',
            index=False)

In [None]:
# (Uncomment at the end) Close the Pandas Excel writer and output the Excel file.
writer.save()