# Web Scraping - Mission to Mars

In [25]:
# Dependencies
import requests
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import time

In [26]:
# Create path and browser with chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [7]:
# URL of page to be scraped
url="https://mars.nasa.gov/news/"
browser.visit(url)
time.sleep(5)
html=browser.html

# Create BeautifulSoup object; parse with 'html.parser'
soup=bs(html, 'html.parser')

# Extract the latest news title and paragraph:
article=soup.find("div", class_='list_text')
title=article.find("div", class_="content_title")
news_title=title.find("a").text
news_p=article.find("div", class_ ="article_teaser_body").text
print(news_title)
print(news_p)

NASA's Perseverance Rover Mission Getting in Shape for Launch
Stacking spacecraft components on top of each other is one of the final assembly steps before a mission launches to the Red Planet. 


### JPL Mars Space Images - Featured Image

In [8]:
# URL of page to be scraped
url_image='https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url_image)
time.sleep(5)
html_image=browser.html

# Create BeautifulSoup object; parse with 'html.parser'
soup_image=bs(html_image, 'html.parser')

In [9]:
# Extract the latest image:
containers=soup_image.find_all("div",class_="img")
for container in containers:
    img=container.find('img')["src"]
    img2=img.split('/')[4]
    img3=img2.split('-')[0]
featured_image_url='https://www.jpl.nasa.gov/spaceimages/images/largesize/' + img3 +'_hires.jpg'
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA23847_hires.jpg


### Mars Weather

In [27]:
# URL of page to be scraped
url_tweet='https://twitter.com/marswxreport'
browser.visit(url_tweet)
time.sleep(5)
html_tweet=browser.html

# Create BeautifulSoup object; parse with 'html.parser'
soup_tweet=bs(html_tweet, 'html.parser')

In [28]:
# Loop through latest tweets and find the tweet that has weather information
tweet_container=soup_tweet.find_all('span',class_='css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0')
for tweet in tweet_container: 
    mars_weather=tweet.text
    if 'InSight' in mars_weather:
        print(mars_weather)
        break
    else: 
        pass

NASA InSight


### Mars Facts

In [3]:
# URL of page to be scraped
url_facts='https://space-facts.com/mars/'

In [4]:
# Extract the facts to dataframe:
tables=pd.read_html(url_facts)
profile=pd.DataFrame(tables[0])
profile=profile.rename(columns={0:"Facts", 1:"Results"}).set_index('Facts')
profile

Unnamed: 0_level_0,Results
Facts,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [391]:
# Convert to html
html_table=profile.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Results</th>\n    </tr>\n    <tr>\n      <th>Facts</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n  <

### Mars Hemispheres

In [395]:
# URL of page to be scraped
url_hem='https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url_hem)
time.sleep(1)
html_hem=browser.html

# Create BeautifulSoup object; parse with 'html.parser'
soup_hem=bs(html_hem, 'html.parser')

In [421]:
# Extract image urls and their titles
hemisphere_image_urls=[]
dic={}
results=soup_hem.find_all('div', class_='item')
for result in results:
    desc=result.find('div', class_='description')
    a=desc.find('a')
    l=a['href'].split('/')[5]
    link='http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/' + l + '.tif/full.jpg'
    title=a.find('h3').text
    dict={'Title': title, 'img_url': link}
    hemisphere_image_urls.append(dict)

In [424]:
# Print the list
hemisphere_image_urls

[{'Title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'Title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'Title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'Title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]