In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd
import time


# NASA Mars News

In [2]:
#define path & set up browser
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [3]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [4]:
titles = soup.find_all('div', class_="content_title")
news_title = titles[0].text.strip()
print(news_title)

Why This Martian Full Moon Looks Like Candy


In [5]:
p_texts = soup.find_all('div', class_="article_teaser_body")
news_p = p_texts[0].text.strip()
print(news_p)

For the first time, NASA's Mars Odyssey orbiter has caught the Martian moon Phobos during a full moon phase. Each color in this new image represents a temperature range detected by Odyssey's infrared camera.


In [6]:
dates = soup.find_all('div', class_="list_date")
news_date = dates[0].text.strip()
print(news_date)

May  9, 2019


In [7]:
mars_news = {"news_title": news_title, "news_p": news_p, "news_date":news_date}
print(mars_news)

{'news_title': 'Why This Martian Full Moon Looks Like Candy', 'news_p': "For the first time, NASA's Mars Odyssey orbiter has caught the Martian moon Phobos during a full moon phase. Each color in this new image represents a temperature range detected by Odyssey's infrared camera.", 'news_date': 'May  9, 2019'}


# JPL Mars Space Images - Featured Image

In [8]:
#define path & set up browser
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)
time.sleep(1)


In [9]:
#navigate to top image
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(1)



In [10]:
#set up beautiful soup for new page
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

#extract top image url
top_img = soup.find('img', class_="fancybox-image")
top_img['src']


'/spaceimages/images/mediumsize/PIA17046_ip.jpg'

In [11]:
top_img_url = 'https://www.jpl.nasa.gov' + top_img["src"]
print(top_img_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17046_ip.jpg


# Mars Weather

In [12]:
# URL of page to be scraped
url = 'https://twitter.com/marswxreport?lang=en'
# Retrieve page with the requests module
response = requests.get(url)    
#create soup object
soup = BeautifulSoup(response.text, 'html.parser')
# Examine the results
# print(soup.prettify())

In [13]:
mars_weather = soup.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text.strip()
print(mars_weather)

InSight sol 159 (2019-05-08) low -100.3ºC (-148.5ºF) high -21.6ºC (-6.9ºF)
winds from the SW at 4.6 m/s (10.4 mph) gusting to 15.3 m/s (34.2 mph)
pressure at 7.40 hPapic.twitter.com/rg5UKl8dLc


# Mars Facts

In [14]:
# URL of page to be scraped
url = 'https://space-facts.com/mars/'
# Retrieve page with the requests module
response = requests.get(url)    
#create soup object
soup = BeautifulSoup(response.text, 'html.parser')
# Examine the results
# print(soup.prettify())

In [15]:
# Use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
tables = pd.read_html(url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [16]:
# Use Pandas to convert the data to a HTML table string.
mars_df = tables[0]
mars_df


Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [17]:
html_table=mars_df.to_html(na_rep = " ",index = False, header=False)
html_table = html_table.replace('\n','')
html_table = html_table.replace("'",' ')
print(html_table)

<table border="1" class="dataframe">  <tbody>    <tr>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <td>Recorded By:</td>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>


# Mars Hemispheres

In [18]:
#define path & set up browser
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)
time.sleep(2)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
time.sleep(2)
# products = soup.find('div', class_="product-section")
items = soup.find_all('div', class_="item")

titles = []
img_urls = []

hemisphere_image_urls = []


for i in items:
        #scrape title
        img_title = i.find('h3').get_text()
        title = img_title.rsplit(' ', 1)[0]
        titles.append(title)
        
        #scrape hemisphere url
        detail = i.find('a')['href']
        detail_url = 'https://astrogeology.usgs.gov' + detail
        
        #got to detail_url
        browser.visit(detail_url)
        time.sleep(1)
        html = browser.html
        soup = BeautifulSoup(html, 'html.parser')
        
        #scrape img_url
        downloads = soup.find('div', class_="downloads")
        ul = downloads.find('ul')
        li = ul.find_all('li')
        img = li[0]
        img_url = img.find('a')['href']
        img_urls.append(img_url)
        
        hemisphere_image_urls.append({"title": title, "img_url": img_url})

        #go back to original url
        browser.visit(url)

print(hemisphere_image_urls)
 


[{'title': 'Cerberus Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


In [19]:
hemisphere_image_urls = dict(zip(titles, img_urls))
    
print(hemisphere_image_urls)
print(titles)


{'Cerberus Hemisphere': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg', 'Schiaparelli Hemisphere': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg', 'Syrtis Major Hemisphere': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg', 'Valles Marineris Hemisphere': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}
['Cerberus Hemisphere', 'Schiaparelli Hemisphere', 'Syrtis Major Hemisphere', 'Valles Marineris Hemisphere']


In [20]:
scrape_dict = {"mars_news": mars_news,  "top_img_url": top_img_url, "mars_weather": mars_weather, "html_table": html_table, "hemisphere_image_urls":hemisphere_image_urls}

In [21]:
print(scrape_dict)

{'mars_news': {'news_title': 'Why This Martian Full Moon Looks Like Candy', 'news_p': "For the first time, NASA's Mars Odyssey orbiter has caught the Martian moon Phobos during a full moon phase. Each color in this new image represents a temperature range detected by Odyssey's infrared camera.", 'news_date': 'May  9, 2019'}, 'top_img_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17046_ip.jpg', 'mars_weather': 'InSight sol 159 (2019-05-08) low -100.3ºC (-148.5ºF) high -21.6ºC (-6.9ºF)\nwinds from the SW at 4.6 m/s (10.4 mph) gusting to 15.3 m/s (34.2 mph)\npressure at 7.40 hPapic.twitter.com/rg5UKl8dLc', 'html_table': '<table border="1" class="dataframe">  <tbody>    <tr>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>