In [1]:
# Import BeautifulSoup, Pandas, and Requests/Splinter
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as bs
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Setup Splinter
executable_path = {"executable_path": ChromeDriverManager().install()}
browser = Browser("chrome", **executable_path, headless=False)



Current google-chrome version is 94.0.4606
Get LATEST driver version for 94.0.4606
Driver [C:\Users\josep\.wdm\drivers\chromedriver\win32\94.0.4606.61\chromedriver.exe] found in cache


In [3]:
# Scrape https://redplanetscience.com/ and collect the latest News Title and Paragraph Text
url = "https://redplanetscience.com/"
browser.visit(url)
html = browser.html
soup = bs(html, "html.parser")
news_title = soup.find("div", class_="content_title").text.strip()
news_p = soup.find("div", class_="article_teaser_body").text.strip()
print(news_title)
print(news_p)

NASA's Mars Reconnaissance Orbiter Undergoes Memory Update
Other orbiters will continue relaying data from Mars surface missions for a two-week period.


In [4]:
# Use Splinter to navigate https://spaceimages-mars.com/ and find the image url for the current Featured Mars Image
url = "https://spaceimages-mars.com/"
browser.visit(url)
html = browser.html
soup = bs(html, "html.parser")
featured_image_url = soup.find("img", class_="headerimage fade-in")["src"]
featured_image_url = "https://spaceimages-mars.com/" + featured_image_url
print(featured_image_url)

https://spaceimages-mars.com/image/featured/mars2.jpg


In [6]:
# At https://galaxyfacts-mars.com/ use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
url = "https://galaxyfacts-mars.com/"
tables = pd.read_html(url, header=0)
mars_df = tables[0]
mars_df.set_index("Mars - Earth Comparison", inplace=True)
mars_df.head()

Unnamed: 0_level_0,Mars,Earth
Mars - Earth Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days


In [7]:
# Use Pandas to convert the data to a HTML table string
html_table = mars_df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>\n</table>'

In [8]:
# Obtain high resolution images for each of Mar's hemispheres at https://marshemispheres.com/
url = "https://marshemispheres.com/"
browser.visit(url)
html = browser.html
soup = bs(html, "html.parser")

# Locate the links to the four hemispheres
links = []
location_data = soup.find_all("div", class_="description")
for location in location_data:
    link = location.find("a")["href"]
    links.append(link)

# Go to each link and pull the image and hemisphere data
hemisphere_image_urls = []
for link in links:
    browser.visit(url + link)
    html = browser.html
    soup = bs(html, "html.parser")
    image_url = soup.find("img", class_="wide-image")["src"]
    image_url = url + image_url
    hemisphere = soup.find("div", class_="cover")
    hemisphere = hemisphere.find("h2").text.strip()
    hemisphere = hemisphere.rsplit(' ', 1)[0]
    print(image_url)
    print(hemisphere)
    hemisphere_dict = {"title": hemisphere, "img_url": image_url}
    hemisphere_image_urls.append(hemisphere_dict)

print(hemisphere_image_urls)

https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg
Cerberus Hemisphere
https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg
Schiaparelli Hemisphere
https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg
Syrtis Major Hemisphere
https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg
Valles Marineris Hemisphere
[{'title': 'Cerberus Hemisphere', 'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere', 'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere', 'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisp

In [9]:
browser.quit()