# Web Scraping using Python

## Target Journal: JMRI
- https://onlinelibrary.wiley.com/toc/15222586/2022/55/1
- https://onlinelibrary.wiley.com/toc/15222586/2021/54/6

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#To easily display the plots, make sure to include the line %matplotlib inline as shown below.
%matplotlib inline

import urllib
import requests
import lxml

#To perform web scraping, you should also import the libraries shown below. 
#The urllib.request module is used to open URLs. 
#The Beautiful Soup package is used to extract data from html files. 
#The Beautiful Soup library's name is bs4 which stands for Beautiful Soup, version 4.
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup

## First, save as a local html file
 - See: https://zetcode.com/python/beautifulsoup/

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [33]:
# JMRI Vol 54 No 1
# URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_Vol 54, No 1.html'
# JMRI Vol 54 No 2
# URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_ Vol 54, No 2.html'
# JMRI Vol 54 No 3
# URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_ Vol 54, No 3.html'
# Vol 54 No 4
# URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_ Vol 54, No 4.html'
# Vol 54 No 5
# URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_ Vol 54, No 5.html'
# Vol 54 No 6
# URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_ Vol 54, No 6.html'
# Vol 55 No 1
URL = '/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/rawdata/Journal of Magnetic Resonance Imaging_ Vol 55, No 1.html'

with open(URL, 'r') as f:

    contents = f.read()

    soup = BeautifulSoup(contents, 'lxml')

Find the corresponding index, and identify the title info

In [34]:
def safeOpenParsePage(targetUrl):
    try:
        # tmpurl = urlopen(Request(targetUrl, headers={'User-Agent': 'Chrome/92.0.4515.107'}))
        tmpurl = open(targetUrl, 'r')
        tmpR = tmpurl.read()
        # tmpSoup = BeautifulSoup(tmpR, 'html.parser')
        tmpSoup = BeautifulSoup(tmpR, 'lxml')
        return tmpSoup
    except urllib.error.HTTPError as e:
        print(e)
        return None

soupJMRI = safeOpenParsePage(URL)
if soupJMRI is not None:
      print(soupJMRI.prettify())

<!DOCTYPE html>
<html class="pb-page" data-request-id="cf15f650-9dbd-40f2-bc22-1827e3d04b82" lang="en">
 <head data-pb-dropzone="head">
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content=";journal:journal:15222586;issue:issue:doi\:10.1002/jmri.v55.1;ctype:string:Journal Content;website:website:pericles;page:string:Table of Contents;requestedJournal:journal:15222586;wgroup:string:Publication Websites;pageGroup:string:Publication Pages" name="pbContext"/>
  <script type="text/javascript">
   var $DoubleClickZone = "j-magres-imaging_jmri";var $DoubleClickSite =  "wly.radiol.imag_000105";
  </script>
  <script id="analyticDigitalData">
   digitalData = {"site":{"ip":"152.78.0.24","environment":"LIVE","website":"onlinelibrary.wiley.com","websiteCode":"pericles","serverDate":"2021-12-24"},"identities":[{"type":"BasicGroup","uuid":"e623deab-c83b-41e3-84a2-d9f1d46c5f17"},{"type":"ReferrerUser","uuid":"5a2c1d70-1255-4a45-95f3-a3aa3a2771fb"},{"type":"InstitutionUser","uuid"

In [35]:
# Get the title
title = soupJMRI.title
print(title)

<title> Journal of Magnetic Resonance Imaging: Vol 55, No 1</title>


In [36]:
# Print out the text
text = soupJMRI.get_text()
print(soup.text)

# Another way to extract text
# str(all_links[103]).split("<h2>")[1].replace("</h2></a>", "")



var $DoubleClickZone = "j-magres-imaging_jmri";var $DoubleClickSite =  "wly.radiol.imag_000105";digitalData = {"site":{"ip":"152.78.0.24","environment":"LIVE","website":"onlinelibrary.wiley.com","websiteCode":"pericles","serverDate":"2021-12-24"},"identities":[{"type":"BasicGroup","uuid":"e623deab-c83b-41e3-84a2-d9f1d46c5f17"},{"type":"ReferrerUser","uuid":"5a2c1d70-1255-4a45-95f3-a3aa3a2771fb"},{"type":"InstitutionUser","uuid":"58cbd8e6-3c5d-4fc0-ac26-57c4792d2c4b","customerRecords":[{"customerDomain":"ALM-CU","customerNumber":"EALWB000204"}]},{"type":"SmartGroupUser","uuid":"cf9c85b9-7f3b-4309-aeaa-c41669cabe3b"},{"type":"SmartGroupUser","uuid":"db8b8d10-1aed-4809-b3ee-70938c569014"},{"type":"SmartGroupUser","uuid":"cdce41c2-a8de-4a64-a4d9-a85caaaa2314"},{"type":"SmartGroupUser","uuid":"cb1790a9-a0b9-40b3-915d-806df593aeeb"},{"type":"ReferrerUser","uuid":"6401b279-b825-4f41-a389-04e29feb967f"},{"type":"InstitutionUser","uuid":"bfa4ccf8-8126-488a-b090-da278aa7b4d9","customerRecords"

In [37]:
all_links = soupJMRI.find_all('a')

### Explore Category

In [38]:
# h3 class corresponds to editorial

all_h3 = soupJMRI.find_all("h3")

i = 0
for h3 in all_h3:
  # print(h3)
  print(h3.get_text())
  i += 1
print(i)

Menu
Format
Type of import
Cover Image
Issue Information
JMRI-ISMRM Recommendation
Review Articles
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
Research Articles
Editorial
About Wiley Online Library
Help & Support
Opportunities
Connect with Wiley
33


### Explore Original Research Subcategory

In [39]:
# h4 class corresponds to category

all_h4 = soupJMRI.find_all("h4")

i = 0
for h4 in all_h4:
  # print(h4)
  print(h4.get_text())
  i += 1
print(i)

Abdomen
Neuro
Neuro
Neuro
Neuro
Cardiac
Cardiac
Cardiac
Musculoskeletal
Vascular
Pelvis
Technical
12


In [40]:
all_h2 = soupJMRI.find_all("h2")

i = 0
for h2 in all_h2:
  h2_len = len(h2.get_text().split())
  if h2_len > 3:
    print(h2.get_text())
    i += 1
print(i)

# str_h2 = str(all_h2)

# links = soupJMRI.find_all("a")
# str_links = str(links)
# print(str_h2)

# cleantext = BeautifulSoup(str_h2, "lxml").get_text()
# cleantext = BeautifulSoup(str_links, "lxml").get_text()
# print(cleantext)

Four-Dimensional flow Magnetic Resonance Imaging for Assessment of Pediatric Coarctation of the Aorta
Histological Validation of MRI: A Review of Challenges in Registration of Imaging and Whole-Mount Histopathology
VI-RADS for Bladder Cancer: Current Applications and Future Developments
Primary Site Identification of Soft-Tissue Mass: Things to Know in MRI Assessment
Machine Learning in Meningioma MRI: Past to Present. A Narrative Review
Black-Blood Contrast in Cardiovascular MRI
Real-Time Magnetic Resonance Imaging
Integrative Machine Learning Prediction of Prostate Biopsy Results From Negative Multiparametric MRI
MRI-Based Quantitative R2* Mapping at 3 Tesla Reflects Hepatic Iron Overload and Pathogenesis in Nonalcoholic Fatty Liver Disease Patients
Multi-shot Diffusion-Weighted MRI With Multiplexed Sensitivity Encoding (MUSE) in the Assessment of Active Inflammation in Crohn's Disease
Editorial for “Multi-Shot Diffusion-Weighted Imaging With Multiplexed Sensitivity Encoding (MUSE) i

## Next, clean up the title list

In [61]:
list_h2 = []
for h2 in all_h2:
  h2_len = len(h2.get_text().split())
  if h2_len > 3:
    cells = h2.get_text()
    list_h2.append(cells)

# Delete the last element: "Log in to Wiley Online Library"
list_h2.pop()

# Insert "Issue Information" as the 2nd element
list_h2.insert(1, "Issue Information")

# JMRI Vol 54 No 2
# list_h2.append("Erratum")

# JMRI Vol 54 No 3
# list_h2.insert(2, "Commentary")
# list_h2.append("Condensation Artifact")
# list_h2.append("Reviewer Acknowledgements")

# JMRI Vol 54 No 6
# list_h2.insert(2, "ISMRM Young Investigator Award Winners")

# JMRI Vol 55 No 1 Pop out a few more
list_h2 = list_h2[:-2]

for l in list_h2:
  print(l)
print(len(list_h2))

Four-Dimensional flow Magnetic Resonance Imaging for Assessment of Pediatric Coarctation of the Aorta
Issue Information
Histological Validation of MRI: A Review of Challenges in Registration of Imaging and Whole-Mount Histopathology
VI-RADS for Bladder Cancer: Current Applications and Future Developments
Primary Site Identification of Soft-Tissue Mass: Things to Know in MRI Assessment
Machine Learning in Meningioma MRI: Past to Present. A Narrative Review
Black-Blood Contrast in Cardiovascular MRI
Real-Time Magnetic Resonance Imaging
Integrative Machine Learning Prediction of Prostate Biopsy Results From Negative Multiparametric MRI
MRI-Based Quantitative R2* Mapping at 3 Tesla Reflects Hepatic Iron Overload and Pathogenesis in Nonalcoholic Fatty Liver Disease Patients
Multi-shot Diffusion-Weighted MRI With Multiplexed Sensitivity Encoding (MUSE) in the Assessment of Active Inflammation in Crohn's Disease
Editorial for “Multi-Shot Diffusion-Weighted Imaging With Multiplexed Sensitivity

### Add dates first published

In [42]:
# li class: corresponds to "First Published"

all_li = soupJMRI.find_all("li")

# i = 0
# for li in all_li:
  # li_class = li.get_attribute_list("class")
  # print(li_class)
  # if li_class == ['ePubDate']:
    # print(li.get_text())
    # i += 1
  # print(li.get_text())
  # i += 1
# print(i)

list_date = []
for li in all_li:
  li_class = li.get_attribute_list("class")
  if li_class == ['ePubDate']:
    cells = li.get_text().split(': ')[1]
    list_date.append(cells)

for l in list_date:
  print(l)
print(len(list_date))

15 December 2021
15 December 2021
31 October 2020
17 September 2020
18 September 2020
02 October 2020
19 October 2020
09 December 2020
23 June 2021
28 June 2021
25 June 2021
19 June 2021
14 July 2021
29 July 2021
30 June 2021
23 July 2021
28 June 2021
11 July 2021
15 July 2021
25 August 2021
24 June 2021
27 July 2021
26 June 2021
06 July 2021
25 June 2021
22 June 2021
08 July 2021
30 June 2021
28 July 2021
29 July 2021
21 June 2021
05 July 2021
28 June 2021
19 July 2021
13 July 2021
24 August 2021
27 July 2021
37


In [43]:
list_url = []
for link in all_links:
  if 'visitable' in str(link.get("class")): # and '/doi/' in link.get("href"):
    cells = link.get("href").replace('/doi', 'https://doi.org')
    list_url.append(cells)

for l in list_url:
  print(l)
print(len(list_url))

https://doi.org/10.1002/jmri.27704
https://doi.org/10.1002/jmri.27705
https://doi.org/10.1002/jmri.27409
https://doi.org/10.1002/jmri.27361
https://doi.org/10.1002/jmri.27368
https://doi.org/10.1002/jmri.27378
https://doi.org/10.1002/jmri.27399
https://doi.org/10.1002/jmri.27411
https://doi.org/10.1002/jmri.27793
https://doi.org/10.1002/jmri.27810
https://doi.org/10.1002/jmri.27801
https://doi.org/10.1002/jmri.27795
https://doi.org/10.1002/jmri.27832
https://doi.org/10.1002/jmri.27872
https://doi.org/10.1002/jmri.27806
https://doi.org/10.1002/jmri.27862
https://doi.org/10.1002/jmri.27812
https://doi.org/10.1002/jmri.27831
https://doi.org/10.1002/jmri.27823
https://doi.org/10.1002/jmri.27901
https://doi.org/10.1002/jmri.27805
https://doi.org/10.1002/jmri.27855
https://doi.org/10.1002/jmri.27802
https://doi.org/10.1002/jmri.27798
https://doi.org/10.1002/jmri.27804
https://doi.org/10.1002/jmri.27800
https://doi.org/10.1002/jmri.27828
https://doi.org/10.1002/jmri.27809
https://doi.org/10.1

In [62]:
# df = pd.DataFrame({'title':list_h2, 'url': list_url})
df = pd.DataFrame({'Journal': title, 
                   #'Category': list_category, 
                   'Title': list_h2, 
                   'First Published': list_date, 
                   'DOI': list_url})
df

Unnamed: 0,Journal,Title,First Published,DOI
0,Journal of Magnetic Resonance Imaging: Vol 55...,Four-Dimensional flow Magnetic Resonance Imagi...,15 December 2021,https://doi.org/10.1002/jmri.27704
1,Journal of Magnetic Resonance Imaging: Vol 55...,Issue Information,15 December 2021,https://doi.org/10.1002/jmri.27705
2,Journal of Magnetic Resonance Imaging: Vol 55...,Histological Validation of MRI: A Review of Ch...,31 October 2020,https://doi.org/10.1002/jmri.27409
3,Journal of Magnetic Resonance Imaging: Vol 55...,VI-RADS for Bladder Cancer: Current Applicatio...,17 September 2020,https://doi.org/10.1002/jmri.27361
4,Journal of Magnetic Resonance Imaging: Vol 55...,Primary Site Identification of Soft-Tissue Mas...,18 September 2020,https://doi.org/10.1002/jmri.27368
5,Journal of Magnetic Resonance Imaging: Vol 55...,Machine Learning in Meningioma MRI: Past to Pr...,02 October 2020,https://doi.org/10.1002/jmri.27378
6,Journal of Magnetic Resonance Imaging: Vol 55...,Black-Blood Contrast in Cardiovascular MRI,19 October 2020,https://doi.org/10.1002/jmri.27399
7,Journal of Magnetic Resonance Imaging: Vol 55...,Real-Time Magnetic Resonance Imaging,09 December 2020,https://doi.org/10.1002/jmri.27411
8,Journal of Magnetic Resonance Imaging: Vol 55...,Integrative Machine Learning Prediction of Pro...,23 June 2021,https://doi.org/10.1002/jmri.27793
9,Journal of Magnetic Resonance Imaging: Vol 55...,MRI-Based Quantitative R2* Mapping at 3 Tesla ...,28 June 2021,https://doi.org/10.1002/jmri.27810


## Third, manually create the list of category based on Issue Information
 - See [here](https://stackoverflow.com/questions/4654414/python-append-item-to-list-n-times) for extending elements in a list for X times

In [64]:
# list_category = ['Cover Image', 'Issue Information', 'Commentary']

### Vol 54 No 1 ###
# list_category.extend(['Review Articles'] * 3)
# list_category.extend(['Original Research: Head and Neck', 'Editorial'] * 2)
# list_category.extend(['Original Research: Pelvis'] * 1)
# list_category.extend(['Original Research: Abdomen'] * 1)
# list_category.extend(['Original Research: Abdomen', 'Editorial'] * 2)
# list_category.extend(['Original Research: Musculoskeletal', 'Editorial'] * 1)
# list_category.extend(['Original Research: Musculoskeletal'] * 1)
# list_category.extend(['Original Research: Vascular'] * 1)
# list_category.extend(['Original Research: Vascular', 'Editorial'] * 1)
# list_category.extend(['Original Research: Neuro'] * 3)
# list_category.extend(['Original Research: Neuro', 'Editorial'] * 3)
# list_category.extend(['Original Research: Breast', 'Editorial'] * 1)
# list_category.extend(['Original Research: Pediatrics', 'Editorial'] * 1)
# list_category.extend(['Original Research: Cardiac'] * 2)
# list_category.extend(['Original Research: Cardiac', 'Editorial'] * 2)
# list_category.extend(['Original Research: Safety', 'Editorial'] * 1)
# list_category.extend(['Letter to the Editor'] * 1)

### Vol 54 No 2 ###
# list_category.extend(['CME Article'] * 1)
# list_category.extend(['Review Articles'] * 3)
# list_category.extend(['Original Research: Whole Body', 'Editorial'] * 1)
# list_category.extend(['Original Research: Cardiac'] * 4)
# list_category.extend(['Original Research: Pelvis', 'Editorial'] * 2)
# list_category.extend(['Original Research: Technical', 'Editorial'] * 1)
# list_category.extend(['Original Research: Musculoskeletal'] * 2)
# list_category.extend(['Original Research: Abdomen'] * 3)
# list_category.extend(['Original Research: Abdomen', 'Editorial'] * 1)
# list_category.extend(['Original Research: Neuro'] * 2)
# list_category.extend(['Original Research: Neuro', 'Editorial'] * 4)
# list_category.extend(['Original Research: Thoracic', 'Editorial'] * 1)
# list_category.extend(['Original Research: Breast'] * 1)
# list_category.extend(['Original Research: Vascular'] * 1)
# list_category.extend(['Original Research: Vascular', 'Editorial'] * 1)
# list_category.extend(['Original Research: Case Report'] * 1)
# list_category.extend(['Erratum'] * 1)

### Vol 54 No 3 ###
# list_category.extend(['Review Articles'] * 1)
# list_category.extend(['Original Research: Breast'] * 1)
# list_category.extend(['Original Research: Abdomen'] * 2)
# list_category.extend(['Original Research: Abdomen', 'Editorial'] * 3)
# list_category.extend(['Original Research: Vascular', 'Editorial'] * 1)
# list_category.extend(['Original Research: Cardiac'] * 2)
# list_category.extend(['Original Research: Cardiac', 'Editorial'] * 2)
# list_category.extend(['Original Research: Pediatrics', 'Editorial'] * 1)
# list_category.extend(['Original Research: Musculoskeletal'] * 1)
# list_category.extend(['Original Research: Musculoskeletal', 'Editorial'] * 1)
# list_category.extend(['Original Research: Head and Neck'] * 1)
# list_category.extend(['Original Research: Neuro'] * 2)
# list_category.extend(['Original Research: Neuro', 'Editorial'] * 6)
# list_category.extend(['Original Research: Thoracic'] * 1)
# list_category.extend(['Original Research: Pelvis', 'Editorial'] * 2)
# list_category.extend(['Original Research: Technical'] * 1)
# list_category.extend(['Original Research: Technical', 'Editorial'] * 1)
# list_category.extend(['Case Report: Technical'] * 1)
# list_category.extend(['Reviewer Appreciation'] * 1)

### Vol 54 No 4 ###
# list_category = ['Cover Image', 'Issue Information']
# list_category.extend(['Review Articles'] * 1)
# list_category.extend(['Original Research: Neuro'] * 3)
# list_category.extend(['Original Research: Neuro', 'Editorial'] * 2)
# list_category.extend(['Original Research: Vascular', 'Editorial'] * 1)
# list_category.extend(['Original Research: Pediatrics'] * 1)
# list_category.extend(['Original Research: Abdomen'] * 2)
# list_category.extend(['Original Research: Abdomen', 'Editorial'] * 3)
# list_category.extend(['Original Research: Technical'] * 1)
# list_category.extend(['Original Research: Pelvis'] * 2)
# list_category.extend(['Original Research: Pelvis', 'Editorial'] * 1)
# list_category.extend(['Original Research: Cardiac'] * 1)
# list_category.extend(['Original Research: Cardiac', 'Editorial'] * 3)
# list_category.extend(['Original Research: Breast', 'Editorial'] * 1)
# list_category.extend(['Original Research: Musculoskeletal'] * 1)
# list_category.extend(['Original Research: Musculoskeletal', 'Editorial'] * 2)
# list_category.extend(['Original Research: Head and Neck'] * 2)
# list_category.extend(['Original Research: Physics', 'Editorial'] * 1)
# list_category.extend(['Commentary'] * 1)
# list_category.extend(['Letter to the Editor'] * 1)

### Vol 54 No 5 ###
# list_category = ['Cover Image', 'Issue Information']
# list_category.extend(['Review Articles'] * 1)
# list_category.extend(['Original Research: Pediatrics'] * 1)
# list_category.extend(['Original Research: Abdomen', 'Editorial'] * 4)
# list_category.extend(['Original Research: Pelvis'] * 1)
# list_category.extend(['Original Research: Pelvis', 'Editorial'] * 1)
# list_category.extend(['Original Research: Cardiac'] * 1)
# list_category.extend(['Original Research: Cardiac', 'Editorial'] * 2)
# list_category.extend(['Original Research: Vascular', 'Editorial'] * 1)
# list_category.extend(['Original Research: Chest'] * 1)
# list_category.extend(['Original Research: Head and Neck'] * 1)
# list_category.extend(['Original Research: Head and Neck', 'Editorial'] * 1)
# list_category.extend(['Original Research: Thoracic'] * 1)
# list_category.extend(['Original Research: Musculoskeletal', 'Editorial'] * 3)
# list_category.extend(['Original Research: Neuro'] * 2)
# list_category.extend(['Original Research: Neuro', 'Editorial'] * 4)
# list_category.extend(['Letter to the Editor'] * 1)

### Vol 54 No 6 ###
# list_category = ['Cover Image', 'Issue Information']
# list_category.extend(['Announcement'] * 1)
# list_category.extend(['CME Article: Neuro'] * 1)
# list_category.extend(['Original Research: Head and Neck', 'Editorial'] * 1)
# list_category.extend(['Original Research: Musculoskeletal', 'Editorial'] * 2)
# list_category.extend(['Original Research: Cardiac'] * 1)
# list_category.extend(['Original Research: Cardiac', 'Editorial'] * 2)
# list_category.extend(['Original Research: Pediatrics', 'Editorial'] * 1)
# list_category.extend(['Original Research: Neuro'] * 2)
# list_category.extend(['Original Research: Neuro', 'Editorial'] * 5)
# list_category.extend(['Original Research: Abdomen'] * 2)
# list_category.extend(['Original Research: Abdomen', 'Editorial'] * 3)
# list_category.extend(['Original Research: Technical', 'Editorial'] * 2)
# list_category.extend(['Original Research: Pelvis'] * 1)
# list_category.extend(['Original Research: Pelvis', 'Editorial'] * 1)
# list_category.extend(['Letter to the Editor'] * 1)

### Vol 55 No 1 ###
list_category = ['Cover Image', 'Issue Information']
list_category.extend(['JMRI-ISMRM Recommendation'] * 1)
list_category.extend(['Review Articles'] * 5)
list_category.extend(['Original Research: Abdomen'] * 2)
list_category.extend(['Original Research: Abdomen', 'Editorial'] * 1)
list_category.extend(['Original Research: Neuro', 'Editorial'] * 4)
list_category.extend(['Original Research: Cardiac', 'Editorial'] * 3)
list_category.extend(['Original Research: Musculoskeletal'] * 1)
list_category.extend(['Original Research: Musculoskeletal', 'Editorial'] * 1)
list_category.extend(['Original Research: Vascular'] * 1)
list_category.extend(['Original Research: Pelvis'] * 2)
list_category.extend(['Original Research: Pelvis', 'Editorial'] * 1)
list_category.extend(['Original Research: Technical'] * 1)
list_category.extend(['Original Research: Technical', 'Editorial'] * 1)

i = 0
for l in list_category:
  print(l)
  i += 1
print(i)

Cover Image
Issue Information
JMRI-ISMRM Recommendation
Review Articles
Review Articles
Review Articles
Review Articles
Review Articles
Original Research: Abdomen
Original Research: Abdomen
Original Research: Abdomen
Editorial
Original Research: Neuro
Editorial
Original Research: Neuro
Editorial
Original Research: Neuro
Editorial
Original Research: Neuro
Editorial
Original Research: Cardiac
Editorial
Original Research: Cardiac
Editorial
Original Research: Cardiac
Editorial
Original Research: Musculoskeletal
Original Research: Musculoskeletal
Editorial
Original Research: Vascular
Original Research: Pelvis
Original Research: Pelvis
Original Research: Pelvis
Editorial
Original Research: Technical
Original Research: Technical
Editorial
37


In [65]:
df.insert(1, 'Category', list_category)
df

Unnamed: 0,Journal,Category,Title,First Published,DOI
0,Journal of Magnetic Resonance Imaging: Vol 55...,Cover Image,Four-Dimensional flow Magnetic Resonance Imagi...,15 December 2021,https://doi.org/10.1002/jmri.27704
1,Journal of Magnetic Resonance Imaging: Vol 55...,Issue Information,Issue Information,15 December 2021,https://doi.org/10.1002/jmri.27705
2,Journal of Magnetic Resonance Imaging: Vol 55...,JMRI-ISMRM Recommendation,Histological Validation of MRI: A Review of Ch...,31 October 2020,https://doi.org/10.1002/jmri.27409
3,Journal of Magnetic Resonance Imaging: Vol 55...,Review Articles,VI-RADS for Bladder Cancer: Current Applicatio...,17 September 2020,https://doi.org/10.1002/jmri.27361
4,Journal of Magnetic Resonance Imaging: Vol 55...,Review Articles,Primary Site Identification of Soft-Tissue Mas...,18 September 2020,https://doi.org/10.1002/jmri.27368
5,Journal of Magnetic Resonance Imaging: Vol 55...,Review Articles,Machine Learning in Meningioma MRI: Past to Pr...,02 October 2020,https://doi.org/10.1002/jmri.27378
6,Journal of Magnetic Resonance Imaging: Vol 55...,Review Articles,Black-Blood Contrast in Cardiovascular MRI,19 October 2020,https://doi.org/10.1002/jmri.27399
7,Journal of Magnetic Resonance Imaging: Vol 55...,Review Articles,Real-Time Magnetic Resonance Imaging,09 December 2020,https://doi.org/10.1002/jmri.27411
8,Journal of Magnetic Resonance Imaging: Vol 55...,Original Research: Abdomen,Integrative Machine Learning Prediction of Pro...,23 June 2021,https://doi.org/10.1002/jmri.27793
9,Journal of Magnetic Resonance Imaging: Vol 55...,Original Research: Abdomen,MRI-Based Quantitative R2* Mapping at 3 Tesla ...,28 June 2021,https://doi.org/10.1002/jmri.27810


## Save as csv

In [66]:
df.to_csv(path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri/jmri-vol-55-no-1.csv',
          #path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri/jmri-vol-54-no-6.csv',
          #path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri-vol-54-no-5.csv',
          #path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri-vol-54-no-4.csv',
          # path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri-vol-54-no-3.csv',
          # path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri-vol-54-no-2.csv',
          # path_or_buf='/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri-vol-54-no-1.csv', 
          index=False)

## Save as xlsx file
 - See [here](https://xlsxwriter.readthedocs.io/working_with_pandas.html) for instruction

In [None]:
# !pip install xlsxwriter
import xlsxwriter

# (Comment out after saving first sheet) Create an ExcelWriter object
# writer = pd.ExcelWriter('/content/drive/MyDrive/UHS-MRIPhysics-journal-web-scrapping/processed/jmri-all-summary.xlsx', engine='xlsxwriter')

In [None]:
df.to_excel(excel_writer=writer,
            # sheet_name='jmri-vol-54-no-1',
            # sheet_name='jmri-vol-54-no-2',
            sheet_name='jmri-vol-54-no-3',
            index=False)

In [None]:
# (Uncomment at the end) Close the Pandas Excel writer and output the Excel file.
writer.save()