In [1]:
#importing python libraries
import pandas as pd
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from bs4 import BeautifulSoup

# NASA Mars News

In [2]:
#setting up url
url = "https://mars.nasa.gov/news/"

In [3]:
#opening website in chrome for scraping
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [4]:
#using visit attribute to explore the url
browser.visit(url)

In [8]:
#checking available attributes
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [9]:
#scraping news, date, title and paragraph (p), saving as variables for later as requested
news = soup.find("div", class_='list_text')
news_date = news.find("div", class_ ="list_date").text
news_title = news.find("div", class_="content_title").text
news_p = news.find("div", class_ ="article_teaser_body").text
print(news_date)
print(news_title)
print(news_p)

August 28, 2019
NASA Invites Students to Name Mars 2020 Rover
Through Nov. 1, K-12 students in the U.S. are encouraged to enter an essay contest to name NASA's next Mars rover.


# JPL Mars Space Images - Featured Image


In [10]:
#setting up url, beautiful soup
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(image_url)
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [11]:
featured_image_url = "https://www.jpl.nasa.gov" + soup.find("img", class_="thumb")["src"]

print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA23416-640x350.jpg


In [12]:
featured_image_date = soup.find("h3", class_="release_date").text
print(featured_image_date)

September 3, 2019


In [13]:
featured_image_title = soup.find("img", class_="thumb")["title"]
print(featured_image_title)

Elysium Fossae


# Mars Weather

In [51]:
#setting up twitter url
weather_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(weather_url)
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [53]:
#scraping latest data... unfortunately there's no data at the moment...
weather = soup.find_all("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text")[1].text
print(weather)

InSight sol 265 (2019-08-25) low -99.4ºC (-146.9ºF) high -26.3ºC (-15.3ºF)
winds from the SSE at 5.3 m/s (12.0 mph) gusting to 16.1 m/s (35.9 mph)
pressure at 7.50 hPapic.twitter.com/9YLawm67zS


In [54]:
weather_date = soup.find_all("span", class_="_timestamp js-short-timestamp")[1].text
print(weather_date)

Aug 26


# Mars Facts

In [17]:
#setting up facts url
facts_url = 'https://space-facts.com/mars/'
#used the read_html function in Pandas to automatically scrape any tabular data from a page.
tables = pd.read_html(facts_url)
tables

[  Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [18]:
type(tables)

list

In [19]:
#converting list to data frame
df = tables[0]
df.columns = ['Mars - Earth Comparison', 'Mars', 'Earth']
df.head(100)

Unnamed: 0,Mars - Earth Comparison,Mars,Earth
0,Diameter:,"6,779 km","12,742 km"
1,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
2,Moons:,2,1
3,Distance from Sun:,"227,943,824 km","149,598,262 km"
4,Length of Year:,687 Earth days,365.24 days
5,Temperature:,-153 to 20 °C,-88 to 58°C


In [20]:
#Pandas also had a to_html method that we can use to generate HTML tables from DataFrames.
html_table = df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars - Earth Comparison</th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Diameter:</td>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Moons:</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Distance from Sun:</td>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Length of Year:</td>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Temperature:</td>\n      <td>-153 to 20 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>\n</table>'

In [21]:
#saving the table directly to a file.
df.to_html('table.html')

# Mars Hemispheres

In [22]:
#opening website in chrome for scraping
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=True)
hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemisphere_url)

html = browser.html
soup = BeautifulSoup(html, "html.parser")

# img_title = soup.find("img", class_="thumb")["alt"]
#img_url = "https://astrogeology.usgs.gov" + soup.find("img", class_="thumb")["src"]
#img_header = soup.find_all("h3")
#print(img_title)
#print(img_url)
#print(img_header)


In [32]:
img_header = soup.find_all("h3")
img_header

[<h3>Cerberus Hemisphere Enhanced</h3>,
 <h3>Schiaparelli Hemisphere Enhanced</h3>,
 <h3>Syrtis Major Hemisphere Enhanced</h3>,
 <h3>Valles Marineris Hemisphere Enhanced</h3>]

In [36]:
title_list = []
imgs_url_list = []
title_list 
imgs_url_list 

[]

In [37]:

# Iterate through all pages

for i in img_header:

    try:   
        title = i.get_text()
        browser.click_link_by_partial_text(title)
        imgs_url = browser.find_link_by_partial_href('download')['href']
        title_list.append(title)
        imgs_url_list.append(imgs_url)
        browser.visit(hemisphere_url)

        print('-----------')
        print(title)
        print(img_url)

# Click the 'Next' button on each page
#try:
#    browser.click_link_by_partial_text('next')
          
    except:
        print("Scraping Complete")


-----------
Cerberus Hemisphere Enhanced
Scraping Complete
-----------
Schiaparelli Hemisphere Enhanced
Scraping Complete
-----------
Syrtis Major Hemisphere Enhanced
Scraping Complete
-----------
Valles Marineris Hemisphere Enhanced
Scraping Complete


In [38]:
title_list 


['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [39]:
img_header

[<h3>Cerberus Hemisphere Enhanced</h3>,
 <h3>Schiaparelli Hemisphere Enhanced</h3>,
 <h3>Syrtis Major Hemisphere Enhanced</h3>,
 <h3>Valles Marineris Hemisphere Enhanced</h3>]

In [40]:
imgs_url_list 

['http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg']

In [46]:
hemisphere_dict = [{"title": title_list[i], "img_url": imgs_url_list[i]}  for i in range(len(img_header))]
hemisphere_dict

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]