# Mars Scraper

* Scrape latest News Title and Paragraph Text from Mars news site: https://redplanetscience.com/
* Use splinter to get featured Mars image from JPL: https://spaceimages-mars.com/
* Use Pandas to scrape the Mars fact profile fact table from: https://galaxyfacts-mars.com/
* Scrape high-res photos of Mars' hemispheres: https://marshemispheres.com/

In [5]:
import pandas as pd
import requests
from splinter import Browser
from bs4 import BeautifulSoup as bs
from webdriver_manager.chrome import ChromeDriverManager

## get latest news headline

In [6]:
# set up splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# visit web page
url = "https://redplanetscience.com/"
browser.visit(url)

# create html object and scrape into soup
html = browser.html
soup = bs(html, "html.parser")



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [C:\Users\T\.wdm\drivers\chromedriver\win32\91.0.4472.101\chromedriver.exe] found in cache


In [7]:
soup.find("div", class_="list_text")

<div class="list_text">
<div class="list_date">July 5, 2021</div>
<div class="content_title">NASA to Reveal Name of Its Next Mars Rover</div>
<div class="article_teaser_body">After a months-long contest among students to name NASA's newest Mars rover, the agency will reveal the winning name — and the winning student — this Thursday. </div>
</div>

In [8]:
news_date = soup.find("div", class_="list_text").find("div", class_="list_date").text
news_title = soup.find("div", class_="list_text").find("div", class_="content_title").text
news_teaser = soup.find("div", class_="list_text").find("div", class_="article_teaser_body").text

print(news_date)
print(news_title)
print(news_teaser)

July 5, 2021
NASA to Reveal Name of Its Next Mars Rover
After a months-long contest among students to name NASA's newest Mars rover, the agency will reveal the winning name — and the winning student — this Thursday. 


## get featured Mars image

In [9]:
# visit web page
url = "https://spaceimages-mars.com/"
browser.visit(url)

# create html object and scrape into soup
html = browser.html
soup = bs(html, "html.parser")

In [10]:
featured_image_url = url + soup.find("img", class_="headerimage fade-in")["src"]

print(featured_image_url)

https://spaceimages-mars.com/image/featured/mars3.jpg


## get Mars profile fact table

In [11]:
# grab tables from web page
url = "https://galaxyfacts-mars.com/"
dfs = pd.read_html(url)

# put profile table in variable
df_mars = dfs[1]
df_mars.set_index(df_mars.columns[0], inplace=True)
df_mars.index.name = None

df_mars

Unnamed: 0,1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 ( Phobos & Deimos )
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [12]:
# convert df to html table
mars_table = df_mars.to_html(header=None)

mars_table

'<table border="1" class="dataframe">\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 ( Phobos &amp; Deimos )</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

## get high-res hemisphere images

In [13]:
# visit web page
url = "https://marshemispheres.com/"
browser.visit(url)

# create html object and scrape into soup
html = browser.html
soup = bs(html, "html.parser")

In [14]:
# div with image links
descriptions = soup.find_all("div", class_='description')

hemisphere_img_urls = []

# loop through divs to get title and link to full-size images
for desc in descriptions:
    title = desc.h3.text[:-9]
    hemi_url = url + desc.a['href']
    browser.visit(hemi_url)
    hemi_html = browser.html
    hemi_soup = bs(hemi_html, "html.parser")
    img_url = url + hemi_soup.find("img", class_="wide-image")['src']
    hemi_dict = {"title": title, "img_url": img_url}
    hemisphere_img_urls.append(hemi_dict)


In [15]:
for i in hemisphere_img_urls:
    print(i['title'], i['img_url'])

Cerberus Hemisphere https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg
Schiaparelli Hemisphere https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg
Syrtis Major Hemisphere https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg
Valles Marineris Hemisphere https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg
