# Web Scraping with Pandas

In [12]:
import pandas as pd
from bs4 import BeautifulSoup as bs
from splinter import Browser

In [13]:
# Set path and initialize Chrome Browser
executable_path = {"executable_path": "/Users/nicolemuscanell/ChromeDriver/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

## NASA Mars News

Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [17]:
# Open browser
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [18]:
# Create bs object and set parser
html = browser.html
soup = bs(html, 'html.parser')

In [19]:
# Search for news titles
title_results = soup.find_all('div', class_='content_title')

# Search for paragraph text under news titles
paragraph_results = soup.find_all('div', class_='article_teaser_body')

# Extract first title and paragraph, and assign to variables
news_title = title_results[0].text
news_p = paragraph_results[0].text

print(f"Title: {news_title}")
print("--------------------")
print(news_p)

Title: Mars Now
--------------------
NASA's Perseverance rover carries a device to convert Martian air into oxygen that, if produced on a larger scale, could be used not just for breathing, but also for fuel.


## JPL Mars Space Images - Featured Image

Visit the url for JPL Featured Space Image and use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called "featured_image_url".

In [24]:
# Open browser
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [25]:
# Click 'full image'
browser.click_link_by_partial_text('FULL IMAGE')

In [26]:
# Click 'more info'
browser.click_link_by_partial_text('more info')

In [27]:
# Create bs object and set parser
html = browser.html
soup = bs(html, 'html.parser')

In [28]:
# Find image url
image = soup.find('figure', class_='lede').a['href']
featured_image_url = 'https://www.jpl.nasa.gov' + image

print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18840_hires.jpg


## Mars Facts

Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc

In [29]:
# Scrape table data from page
tables = pd.read_html('https://space-facts.com/mars/')
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [30]:
# Turn into a DataFrame
mars_facts = tables[0]
mars_facts

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [31]:
# Rename columns
mars_facts.columns=['Feature', 'Record']
mars_facts

Unnamed: 0,Feature,Record
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [32]:
# Reset index
mars_facts.set_index('Feature', inplace=True)

In [33]:
# Convert table to html
mars_table = mars_facts.to_html(header=True, index=True)
print(mars_table)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Record</th>
    </tr>
    <tr>
      <th>Feature</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


## Mars Hemispheres

Visit the USGS Astrogeology site and obtain high resolution images for each of Mar's hemispheres. Add these to dictionaries along with the hemisphere names, and then turn this into a list of dictionaries.

In [34]:
# Open browser
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [35]:
# Create bs object and set parser
html = browser.html
soup = bs(html, 'html.parser')

In [36]:
# Finding the hemisphere names

# Create an empty list to store hemisphere names
hemisphere_names = []

# Search for image titles
hemispheres = soup.find_all('div', class_='collapsible results')
names = hemispheres[0].find_all("h3")

# Add hemisphere names to the list
for name in names:
    hemisphere_names.append(name.text.strip('Enhanced'))
       
hemisphere_names

['Cerberus Hemisphere ',
 'Schiaparelli Hemisphere ',
 'Syrtis Major Hemisphere ',
 'Valles Marineris Hemisphere ']

In [37]:
# Finding links images for each hemisphere

# Create empty list to store image urls
links = []

# Locate image links
urls = soup.find_all("div", class_="item")

# Add base link to image links and append to the list
for url in urls:
    hemis_links = url.find('a')['href']
    path = 'https://astrogeology.usgs.gov' + hemis_links
    links.append(path)

links

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']

In [38]:
# Finding urls to full sized images for each hemisphere
image_url = []

# Click each image link 
for link in links:
    browser.visit(link)
    html = browser.html
    soup = bs(html, 'html.parser')
    
# Find full jpgs
    url = soup.find_all('img', class_='wide-image')
    full_urls = url[0]['src']

# Add base link to image links and append to the list
    final_path = 'https://astrogeology.usgs.gov' + full_urls
    image_url.append(final_path)

image_url

['https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg']

In [39]:
# Zip lists
hemis_zip = zip(hemisphere_names, image_url)

# Ceate a new list to store dictionaries
hemisphere_image_urls = []

# Add name and url image lists to dictionaries
for name, img in hemis_zip:
    
    hemispheres_dict = {}
    
    # Add hemisphere name to dictionary
    hemispheres_dict['hemisphere_names'] = name
    
    # Add image url to dictionary
    hemispheres_dict['image_url'] = img
    
    # Append the list with dictionaries
    hemisphere_image_urls.append(hemispheres_dict)

hemisphere_image_urls

[{'hemisphere_names': 'Cerberus Hemisphere ',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'hemisphere_names': 'Schiaparelli Hemisphere ',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'hemisphere_names': 'Syrtis Major Hemisphere ',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'hemisphere_names': 'Valles Marineris Hemisphere ',
  'image_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]