In [1]:
#import dependencies
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
import os
import pandas as pd
import time
from webdriver_manager.chrome import ChromeDriverManager

### Setup splinter and connect to NASA website 

In [2]:

executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 92.0.4515
Get LATEST driver version for 92.0.4515
Driver [C:\Users\racht\.wdm\drivers\chromedriver\win32\92.0.4515.107\chromedriver.exe] found in cache


In [3]:
url = "https://mars.nasa.gov/news/"
browser.visit(url)

In [4]:

# Parse results HTML with bs

html = browser.html
news_soup = bs(html, "html.parser")
slide_element = news_soup.select_one("ul.item_list li.slide")

### Find the most recent news title and paragraph

In [5]:
#find the lastest news title and paragraph of text
slide_element.find("div", class_="content_title")

<div class="content_title"><a href="/news/9012/nasas-ingenuity-mars-helicopter-spots-perseverance-from-above/" target="_self">NASA's Ingenuity Mars Helicopter Spots Perseverance From Above</a></div>

In [6]:
news_title = slide_element.find("div", class_="content_title").get_text()

In [7]:
news_paragraph = slide_element.find("div", class_="article_teaser_body").get_text()

In [8]:
print(news_title)
print(news_paragraph)

NASA's Ingenuity Mars Helicopter Spots Perseverance From Above
Can you see NASA’s newest rover in this picture from Jezero Crater?


### Scrape HTML for the Featured Image 

In [9]:
featured_url = "https://spaceimages-mars.com/"

browser.visit(featured_url)

In [10]:
browser.click_link_by_partial_text('FULL IMAGE')



In [11]:
bigimage_html = browser.html
bigimage_soup = bs(bigimage_html, 'html.parser')

bigimage = bigimage_soup.body.find(class_ = 'headerimage fade-in')
bigimage_img = bigimage['src']

bigimage_base_url = 'https://spaceimages-mars.com/'
featured_image_url = bigimage_base_url + bigimage_img
print(featured_image_url)

https://spaceimages-mars.com/image/featured/mars2.jpg


### Scrape a webpage for facts about Mars

In [12]:

# Visit the Mars Facts Site Using Pandas to Read
mars_facts = pd.read_html("https://galaxyfacts-mars.com/")

mars_facts

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [13]:
#Turn into a pandas dataframe
mars_df = mars_facts[0]
mars_df.head()

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"


In [14]:
#clean up the header row 
header_row = 0
mars_df.columns = mars_df.iloc[header_row]
mars_df = mars_df.drop(header_row)

In [15]:
#Turn the dataframe into an HTML table string
html_table = mars_df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars - Earth Comparison</th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>Diameter:</td>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Distance from Sun:</td>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Length of Year:</td>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Temperature:</td>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>\n</table>'

In [16]:
###html_table.replace('\n', '')

### Extract the Mars Hemispheres images 

In [26]:
# extract from the Cerberus hemisphere and the other 3 as well

mars_hemispheres_url = "https://marshemispheres.com/"

browser.visit(mars_hemispheres_url)
hemisphere_image_urls = []

In [27]:
for i in range (4):
    browser.find_by_tag('h3')[i].click()
    hemispheres_html = browser.html
    hem_soup = bs(hemispheres_html, 'html.parser')
    hem = hem_soup.body.find('img', class_ = 'wide-image')
    hem_img = hem['src']
    title = hem_soup.find('h2', class_ = 'title').get_text()
    hem_base_url = 'https://marshemispheres.com/'
    hem_url = hem_base_url + hem_img
    hem_dict = {"title": title, "img_url": hem_url}
    hemisphere_image_urls.append(hem_dict)
    browser.back()

print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]


In [24]:
#browser.click_link_by_partial_text('Cerberus')

In [None]:
browser.click_link_by_partial_text('Open')

In [None]:
hemispheres_html = browser.html
cerberus_soup = bs(hemispheres_html, 'html.parser')

cerberus = cerberus_soup.body.find('img', class_ = 'wide-image')
cerberus_img = cerberus['src']

hem_base_url = 'https://marshemispheres.com/'
cerberus_url = hem_base_url + cerberus_img
print(cerberus_url)

In [None]:
mars_hemispheres_url = "https://marshemispheres.com/"

browser.visit(mars_hemispheres_url)

In [None]:
browser.click_link_by_partial_text('Schiaparelli')

In [None]:
browser.click_link_by_partial_text('Open')

In [None]:
schiap_html = browser.html
schiap_soup = bs(schiap_html, 'html.parser')

schiap = schiap_soup.body.find('img', class_ = 'wide-image')
schiap_img = schiap['src']

hem_base_url = 'https://marshemispheres.com/'
schiap_url = hem_base_url + schiap_img
print(schiap_url)

In [None]:
mars_hemispheres_url = "https://marshemispheres.com/"

browser.visit(mars_hemispheres_url)

In [None]:
browser.click_link_by_partial_text('Syrtis')

In [None]:
browser.click_link_by_partial_text('Open')

In [None]:
syrtis_html = browser.html
syrtis_soup = bs(syrtis_html, 'html.parser')

syrtis = syrtis_soup.body.find('img', class_ = 'wide-image')
syrtis_img = syrtis['src']

hem_base_url = 'https://marshemispheres.com/'
syrtis_url = hem_base_url + syrtis_img
print(syrtis_url)

In [None]:
mars_hemispheres_url = "https://marshemispheres.com/"

browser.visit(mars_hemispheres_url)

In [None]:
browser.click_link_by_partial_text('Valles')

In [None]:
browser.click_link_by_partial_text('Open')

In [None]:
valles_html = browser.html
valles_soup = bs(valles_html, 'html.parser')

valles = valles_soup.body.find('img', class_ = 'wide-image')
valles_img = valles['src']

hem_base_url = 'https://marshemispheres.com/'
valles_url = hem_base_url + valles_img
print(valles_url)

In [None]:
#save all the links in a dictionary 

hemispheres_image_urls = [
    {"title": "Valles Marineris Hemisphere", "img_url": valles_url},
    {"title": "Cerberus Hemisphere", "img_url": cerberus_url},
    {"title": "Schiaparelli Marineris Hemisphere", "img_url": schiap_url},
    {"title": "Syrtis Major Hemisphere", "img_url": syrtis_url}
]

hemispheres_image_urls

In [None]:

final_dictionary = {
        'latesthedline': news_title,
        'latestparagraph':  news_paragraph,
        'featuredimage': featured_image_url,
        "title": "Valles Marineris Hemisphere", "img_url": valles_url,
        "title": "Cerberus Hemisphere", "img_url": cerberus_url,
        "title": "Schiaparelli Marineris Hemisphere", "img_url": schiap_url,
        "title": "Syrtis Major Hemisphere", "img_url": syrtis_url 
    }