In [1]:
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as bs
from webdriver_manager.chrome import ChromeDriverManager
import time

## Step 1 - Scraping

Complete initial scraping using Jupyter Notebook, BeautifulSoup, Pandas, and Requests/Splinter.

In [2]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



[WDM] - Current google-chrome version is 89.0.4389
[WDM] - Get LATEST driver version for 89.0.4389
[WDM] - Driver [C:\Users\Junette Lee\.wdm\drivers\chromedriver\win32\89.0.4389.23\chromedriver.exe] found in cache


### NASA Mars News

* Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [3]:
# Define browser url to scrape
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
time.sleep(1)

In [4]:
# Write to HTML using BeautifulSoup
html = browser.html
soup = bs(html, 'html.parser')
# print(soup.prettify())

In [5]:
# Create lists to hold variables for reference later
news_title_list = []
news_p_list = []
news_href_list = []

In [6]:
# News articles returned in an iterable list
articles = soup.find_all('li', class_='slide')
print(f'List length: {len(articles)}')
# print(articles[0].prettify())

List length: 40


In [7]:
# Loop to scrape latest news title and paragraph text
print('--------------------------------------------')
count = 0

for article in articles:
    try:
        # Scrape title, paragraph text, link href
        news_title = article.find(class_='content_title').text
        news_p = article.find(class_='article_teaser_body').text
        news_href = article.find('a')['href']

        # Append variable lists with each iteration
        news_title_list.append(news_title)
        news_p_list.append(news_p)
        news_href_list.append(news_href)
        count = count + 1

        # Print values
        print(count)
        print(news_title)
        print(news_p)
        print(news_href)
        print('--------------------------------------------')

    except:
        print('*** Error! ***')
        
time.sleep(3)

--------------------------------------------
1
NASA Ingenuity Mars Helicopter Prepares for First Flight
Now uncocooned from its protective carbon-fiber shield, the helicopter is being readied for its next steps.  
/news/8896/nasa-ingenuity-mars-helicopter-prepares-for-first-flight/
--------------------------------------------
2
Another First: Perseverance Captures the Sounds of Driving on Mars
NASA’s newest rover recorded audio of itself crunching over the surface of the Red Planet, adding a whole new dimension to Mars exploration.
/news/8892/another-first-perseverance-captures-the-sounds-of-driving-on-mars/
--------------------------------------------
3
NASA to Host Briefing to Preview First Mars Helicopter Flights
Members of the projects will lay out the steps necessary before the helicopter attempts its historic test flights. 
/news/8891/nasa-to-host-briefing-to-preview-first-mars-helicopter-flights/
--------------------------------------------
4
New Study Challenges Long-Held Theor

### JPL Mars Space Images - Featured Image

* Visit the url for JPL Featured Space Image [here](https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html).
* Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called `featured_image_url`.
* Make sure to find the image url to the full size `.jpg` image.
* Make sure to save a complete url string for this image.

In [8]:
# Current Featured Image url
image_url_base = 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/'
image_url = f'{image_url_base}index.html'
browser.visit(image_url)
time.sleep(1)

In [9]:
# Follow link to featured image
browser.click_link_by_partial_text('FULL IMAGE')
image_html = browser.html
time.sleep(1)



In [10]:
# Write to HTML using BeautifulSoup
image_soup = bs(image_html, 'html.parser')
# print(image_soup.prettify())

In [11]:
# Find image src
featured_image = image_soup.find(class_='fancybox-image')['src']
# print(featured_image)

# Find featured image url
featured_image_url = image_url_base + featured_image
print('JPL Mars Space Images - Featured Image URL: ')
print(featured_image_url)

time.sleep(3)

JPL Mars Space Images - Featured Image URL: 
https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/featured/mars3.jpg


### Mars Facts

* Visit the Mars Facts webpage [here](https://space-facts.com/mars/) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
* Use Pandas to convert the data to a HTML table string.

In [12]:
# Scrape tabular data from url
table_url = 'https://space-facts.com/mars/'
mars_table = pd.read_html(table_url)
mars_table
# type(facts_table)

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [13]:
# Table containing facts about Mars including Diameter, Mass, etc.
mars_table_df = mars_table[0]
mars_table_df.columns = ['Description', 'Value']
mars_table_df

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [14]:
mars_table_html = mars_table_df.to_html()
mars_table_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Description</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium 

### Mars Hemispheres

* Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mars' hemispheres.
* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.
* Save both the image url string for the full resolution hemisphere image and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.
* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [15]:
# Create list to hold image url and title variables
hemisphere_image_urls = []

In [16]:
# Define browser url to scrape
hemi_url_base = 'https://astrogeology.usgs.gov/'
hemi_url = f'{hemi_url_base}search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemi_url)
time.sleep(1)

In [17]:
# Write to HTML using BeautifulSoup
hemi_html = browser.html
hemi_soup = bs(hemi_html, 'html.parser')
time.sleep(1)

In [18]:
# Hemisphere items returned in an iterable list
hemis = hemi_soup.find_all('div', class_='item')
# print(hemis[0].prettify())

In [19]:
# Loop through each hemisphere
for hemi in hemis:
    # Scrape title and href for image link
    title = hemi.find('h3').text
    hemi_href = hemi.find('a', class_="itemLink product-item")["href"]
    time.sleep(1)
    
    # Click link to find full res image url
    browser.visit(hemi_url_base + hemi_href)
    time.sleep(1)
    
    # Iteration html
    hemi_loop_html = browser.html
    hemi_loop_soup = bs(hemi_loop_html, 'html.parser')
    
    # Scrape full res image url
    img_url = hemi_loop_soup.find(class_='downloads').a['href']
    print(img_url)
    
    # Add to list
    hemisphere_image_urls.append({'title': title, 'img_url': img_url})
    time.sleep(1)

https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


In [20]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [21]:
browser.quit()