# Mars Web Scraping

In [1]:
# import dependencies
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
import pandas as pd

### Mars News

In [2]:
# requests call to page
marspage = requests.get("https://mars.nasa.gov/news")

In [3]:
# get page text
marspagetext = bs(marspage.text,'html.parser')
# marspagetext

In [4]:
# get article title and body
newstitle = marspagetext.find_all(class_='content_title')[0].a.text.strip()
newspara = marspagetext.find_all(class_='rollover_description_inner')[0].text.strip()
newstitle, newspara

('NASA Prepares for Moon and Mars With New Addition to Its Deep Space Network',
 'Robotic spacecraft will be able to communicate with the dish using radio waves and lasers.')

### Mars Picture

In [5]:
# start up splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [6]:
# navigate site
browser.visit("https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars")
browser.click_link_by_id('full_image')
browser.click_link_by_partial_text('more info')
html = browser.html
imagepagetext = bs(html, 'lxml')
browser.quit()



In [7]:
# extract image url
imageurl = 'https://www.jpl.nasa.gov' + imagepagetext.find(class_='main_image')['src'].strip()
imageurl

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17932_hires.jpg'

### Weather Twitter

In [8]:
# get twitter
marstwitter = requests.get("https://twitter.com/marswxreport?lang=en")
# get page text
marstwittertext = bs(marstwitter.text,'html.parser')
# marstwittertext

In [9]:
twitterweather = marstwittertext.find(class_="tweet-text").text.strip()
twitterweather

'InSight sol 442 (2020-02-23) low -94.1ºC (-137.3ºF) high -10.5ºC (13.0ºF)\nwinds from the SSE at 6.2 m/s (13.8 mph) gusting to 21.1 m/s (47.3 mph)\npressure at 6.30 hPapic.twitter.com/lfdFlvxVxe'

### Facts

In [10]:
# read facts page into dataframe
tables = pd.read_html("https://space-facts.com/mars/")
facts = pd.DataFrame(tables[0])
facts = facts.rename(columns={0:'description',1:'value'})

In [11]:
facts

Unnamed: 0,description,value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


### Hemispheres

In [12]:
# start up splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [13]:
# navigate site
browser.visit("https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars")
mainhtml = browser.html
mainhempagetext = bs(mainhtml, 'html.parser')
links = mainhempagetext.find_all(class_='itemLink')
browser.quit()

In [14]:
# get titles and page links
pageURLs = []
titles = []
for i in range(len(links)):
    try:
        title = links[i].text.strip('Enhanced').strip()
        link = "https://astrogeology.usgs.gov" + links[i]['href']
        if len(title) > 0:
            pageURLs.append(link)
            titles.append(title)
    except:
        pass

In [15]:
# get image urls
imagesources = []
browser = Browser('chrome', **executable_path, headless=False)
for link in pageURLs:
    browser.visit(link)
    pagehtml = browser.html
    pagehtmltext = bs(pagehtml, 'html.parser')
    imgsrc = "https://astrogeology.usgs.gov/" + pagehtmltext.find(class_="wide-image")['src'].strip()
    imagesources.append(imgsrc)

# close browser
browser.quit()

In [16]:
hemisphereURLs = []
for i in range(len(titles)):
    hemisphereURLs.append({'title':titles[i], 'img_url':imagesources[i]})
    

In [17]:
hemisphereURLs

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]