In [1]:
from bs4 import BeautifulSoup
from splinter import Browser
import requests
import pymongo
import pandas as pd

In [2]:
mars_news = 'https://mars.nasa.gov/news/'
mars_images = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
mars_twitter = 'https://twitter.com/marswxreport?lang=en'
mars_facts = 'https://space-facts.com/mars/'
mars_hemisphere = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

In [3]:
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [4]:
db = client.news_db
collection = db.items

## MARS NEWS

In [5]:
response = requests.get(mars_news)

news_pull = BeautifulSoup(response.text, 'lxml')

title = news_pull.find('div', class_= 'content_title')
news_title = title.a.text.strip()
print(news_title)

paragraph = news_pull.find('div', class_= 'rollover_description_inner')
news_paragraph = paragraph.text.strip()
print(news_paragraph)

Alabama High School Student Names NASA's Mars Helicopter
Vaneeza Rupani's essay was chosen as the name for the small spacecraft, which will mark NASA's first attempt at powered flight on another planet.


## MARS IMAGE

In [6]:
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

browser.visit(mars_images)
browser.click_link_by_partial_text('FULL IMAGE')
browser.click_link_by_partial_text('more info')

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

figure = soup.find('figure', class_='lede').a['href']
featured_image_url = 'https://www.jpl.nasa.gov'+figure
print(featured_image_url)



https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17440_hires.jpg


## MARS TWEET

In [7]:
response = requests.get(mars_twitter)

tweet_pull = BeautifulSoup(response.text, 'html.parser')

mars_weather = tweet_pull.find('div', class_='js-tweet-text-container').p.text
mars_weather = mars_weather.split('pic')[0]
mars_weather = mars_weather.replace("\n"," ")
print(mars_weather)

InSight sol 506 (2020-04-29) low -93.5ºC (-136.2ºF) high -3.7ºC (25.3ºF) winds from the WNW at 4.5 m/s (10.1 mph) gusting to 15.6 m/s (34.8 mph) pressure at 6.80 hPa


## MARS FACTS

In [8]:
facts_html = pd.read_html(mars_facts)
facts_df = facts_html[0]
facts_df.columns = ['Description', 'Data']
facts_df.set_index('Description', inplace=True)
facts_df

Unnamed: 0_level_0,Data
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [9]:
html_table = facts_df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Data</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n

## MARS HEMISPHERES

In [10]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(mars_hemisphere)

hemisphere_image_urls = []

for i in range(4):
    link = browser.links.find_by_partial_text('Hemisphere')[i]
    link.click()
    title = browser.find_by_css('.title').first.text
    url = browser.find_by_text('Sample').first['href']
    hemisphere_dict = {'title':title, 'url':url}
    hemisphere_image_urls.append(hemisphere_dict)
    browser.back()

In [11]:
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
