In [1]:
from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd

In [2]:
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
# URL of news page to be scraped
url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"

In [4]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(url)

In [5]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [6]:
# returns title of first article in feed
news = soup.select_one('ul.item_list li.slide')
news_title = news.find('div', class_="content_title").get_text()
news_title

"Things Are Stacking up for NASA's Mars 2020 Spacecraft"

In [7]:
# returns description of first article in feed
news_p = news.find('div', class_="article_teaser_body").get_text()
news_p

'As the July 2020 launch date inches closer, the next spacecraft headed to the Red Planet is assembled for more testing.'

In [8]:
# URL of gallery of images when query = 'Mars' to be scraped
images_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(images_url)

In [9]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [10]:
base_image_url = 'https://www.jpl.nasa.gov'
# returns partial url of featured image
partial_image_url = soup.find("a", class_='button fancybox')['data-link']
# Concat partial and base URL
featured_image_url = base_image_url + partial_image_url

In [11]:
# URL of twitter page to be scraped
twitter_url = 'https://twitter.com/marswxreport?lang=en'
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(twitter_url)

In [12]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [13]:
mars_weather = soup.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
# mars_weather = mars_weather.split('\n')
# mars_weather1= mars_weather[1]
# mars_weather2= mars_weather[2]
# mars_weather = mars_weather1 + mars_weather2
# wrong way, but right answer
mars_weather
# still, wrong answer...

'InSight sol 138 (2019-04-17) low -97.7ºC (-143.9ºF) high -17.3ºC (0.9ºF)\nwinds from the W at 4.3 m/s (9.5 mph) gusting to 12.6 m/s (28.1 mph)\npressure at 7.30 hPapic.twitter.com/ofMyPZM2vS'

In [14]:
# URL of webpage containing table
table_url='https://space-facts.com/mars/'

In [15]:
# read table from webpage using Pandas
tables = pd.read_html(table_url , encoding= "utf-8")
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [16]:
# Convert table into dataframe
df = tables[0]
df.columns = ['Characteristic', 'Description']
df

Unnamed: 0,Characteristic,Description
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [17]:
# URL of webpage of hemisphere information. Repeated these steps for each hemisphere
cerberus_url='https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(cerberus_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [18]:
# Returns the hemisphere name
cerberus = soup.find('h2', class_="title").text
# Cleaning text
cerberus = cerberus.strip('Enhanced')
cerberus = cerberus.strip()
cerberus
# needed to .strip('Enhanced'), followed by .strip(), and not
# .strip(' Enhanced'), because .strip(' Enhanced') gives output
# Cerberus Hemispher
# Schiaparelli Hemispher
# Syrtis Major Hemispher
# Valles Marineris Hemispher

'Cerberus Hemisphere'

In [19]:
# Returns link of image of hemisphere
cerberus_img = soup.select_one('div.downloads ul li a')['href']
cerberus_img

'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'

In [20]:
# Create dictionary for each hemisphere, which includes the name and URL of the image of the hemisphere
cerberus_dict = {"Title": cerberus, "Image URL": cerberus_img}

In [21]:
cerberus_dict

{'Title': 'Cerberus Hemisphere',
 'Image URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}

In [22]:
schiaparelli_url='https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(schiaparelli_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [23]:
schiaparelli = soup.find('h2', class_="title").text
schiaparelli = schiaparelli.strip('Enhanced')
schiaparelli = schiaparelli.strip()
schiaparelli

'Schiaparelli Hemisphere'

In [24]:
schiaparelli_img = soup.select_one('div.downloads ul li a')['href']
schiaparelli_img

'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'

In [25]:
schiaparelli_dict = {"Title": schiaparelli, "Image URL": schiaparelli_img}

In [26]:
schiaparelli_dict

{'Title': 'Schiaparelli Hemisphere',
 'Image URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}

In [27]:
syrtis_major_url='https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(syrtis_major_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [28]:
syrtis_major = soup.find('h2', class_="title").text
syrtis_major = syrtis_major.strip('Enhanced')
syrtis_major = syrtis_major.strip()
syrtis_major

'Syrtis Major Hemisphere'

In [29]:
syrtis_major_img = soup.select_one('div.downloads ul li a')['href']
syrtis_major_img

'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'

In [30]:
syrtis_major_dict = {"Title": syrtis_major, "Image URL": syrtis_major_img}

In [31]:
syrtis_major_dict

{'Title': 'Syrtis Major Hemisphere',
 'Image URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}

In [32]:
valles_marineris_url='https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced'
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=True)
browser.visit(valles_marineris_url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [33]:
valles_marineris = soup.find('h2', class_="title").text
valles_marineris = valles_marineris.strip('Enhanced')
valles_marineris = valles_marineris.strip()
valles_marineris

'Valles Marineris Hemisphere'

In [34]:
valles_marineris_img = soup.select_one('div.downloads ul li a')['href']
valles_marineris_img

'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'

In [35]:
valles_marineris_dict = {"Title": valles_marineris, "Image URL": valles_marineris_img}

In [36]:
valles_marineris_dict

{'Title': 'Valles Marineris Hemisphere',
 'Image URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}

In [37]:
# Create list of hemisphere distionaries
hemisphere_image_urls = [cerberus_dict, schiaparelli_dict, syrtis_major_dict, valles_marineris_dict]

In [38]:
hemisphere_image_urls

[{'Title': 'Cerberus Hemisphere',
  'Image URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'Title': 'Schiaparelli Hemisphere',
  'Image URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'Title': 'Syrtis Major Hemisphere',
  'Image URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'Title': 'Valles Marineris Hemisphere',
  'Image URL': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]