In [1]:
import requests
from bs4 import BeautifulSoup
import pymongo
import pandas as pd
import time
from selenium import webdriver

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# News Article

In [3]:
# Define database and collection
db = client.mars_db
collection = db.articles

In [4]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'
# Base URL for make relative links explicit
base_url = 'https://mars.nasa.gov'

In [5]:
# Retrieve page with the requests module
def render_page(url):
    driver = webdriver.Chrome()
    driver.get(url)
    time.sleep(3)
    response = driver.page_source
    #driver.quit()
    return response

In [6]:
response = render_page(url)

In [7]:
soup = BeautifulSoup(response, "html.parser")
#print(soup.prettify())

In [8]:
# Retrieve latest news article
news = soup.find('li', class_='slide')

title = news.find('div', class_='content_title').text
# Identify and return title
date = news.find('div', class_='list_date').text
# Identify and return link
link = news.a['href']
# Identify and return teaser text
text = news.find('div', class_='article_teaser_body').text
   
# Concat URL
concatURL = base_url + link

# Dictionary to be inserted as a MongoDB document
article = {
    'title': title,
    'date': date,
    'url': concatURL,
    'teaser': text
}

collection.insert_one(article)

<pymongo.results.InsertOneResult at 0x11135a348>

# Feature Image

In [9]:
# Define database and collection
db = client.mars_db
collection = db.images

In [10]:
# URL of page to be scraped
image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
# Base URL for make relative links explicit
image_base_url = 'https://www.jpl.nasa.gov'

In [11]:
# Retrieve page with the requests module
def render_page(image_url):
    driver = webdriver.Chrome()
    driver.get(image_url)
    time.sleep(3)
    image_response = driver.page_source
    #driver.quit()
    return image_response

In [12]:
image_response = render_page(image_url)

In [13]:
image_soup = BeautifulSoup(image_response, "html.parser")
#print(image_soup.prettify())

In [14]:
# Retrieve image result
image_results = image_soup.find('footer')

# Identify and return image title
img_title = image_results.a['data-title']
# Identify and return image src
img_link = image_results.a['data-fancybox-href']
     
# Concat URL
imageURL = image_base_url + img_link

# Dictionary to be inserted as a MongoDB document
image = {
    'imgTitle': img_title,
    'imgSrc': imageURL
}

collection.insert_one(image)

<pymongo.results.InsertOneResult at 0x111364188>

# Current Weather

In [15]:
# Define database and collection
db = client.mars_db
collection = db.weather

In [16]:
# URL of page to be scraped
weather_url = 'https://twitter.com/marswxreport?lang=en'

In [17]:
# Retrieve page with the requests module
def render_page(weather_url):
    driver = webdriver.Chrome()
    driver.get(weather_url)
    time.sleep(3)
    weather_response = driver.page_source
    #driver.quit()
    return weather_response

In [18]:
weather_response = render_page(weather_url)

In [19]:
weathersoup = BeautifulSoup(weather_response, "html.parser")
#print(weathersoup.prettify())

In [20]:
# Get Current Weather
weather = weathersoup.find('div', class_='js-tweet-text-container')

mars_weather = weathersoup.find('p', class_='tweet-text').text

# Dictionary to be inserted as a MongoDB document
weather = {
    'weather': mars_weather
}

collection.insert_one(weather)

<pymongo.results.InsertOneResult at 0x1126cc3c8>

# Mars Facts

In [21]:
# URL of page to be scraped
facts_url = 'http://space-facts.com/mars/'

In [22]:
# Retrieve page with the requests module
def render_page(facts_url):
    driver = webdriver.Chrome()
    driver.get(facts_url)
    time.sleep(3)
    facts_response = driver.page_source
    #driver.quit()
    return facts_response

In [23]:
facts_response = render_page(facts_url)

In [24]:
facts_soup = BeautifulSoup(facts_response, "html.parser")
#print(facts_soup.prettify())

In [25]:
tables = pd.read_html(facts_url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [26]:
table_df = tables[0]
table_df.columns = ['Description', 'Value']
table_df.reset_index(drop = True)
table_df

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [27]:
table_df.to_html('table.html')

# Hemispheres

In [28]:
# URL of page to be scraped
cerb_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'

# Retrieve page with the requests module
def render_page(cerb_url):
    driver = webdriver.Chrome()
    driver.get(cerb_url)
    time.sleep(3)
    cerb_response = driver.page_source
    #driver.quit()
    return cerb_response
    
cerb_response = render_page(cerb_url)

cerb_soup = BeautifulSoup(cerb_response, "html.parser")
#print(cerb_soup.prettify())

In [29]:
# Retrieve hemisphere info
cerberus = cerb_soup.find('div', class_='wide-image-wrapper')
# Retrieve hemisphere title
cerb_title = cerb_soup.find('h2', class_='title').text
# Identify and return image src
cerb_link = cerb_soup.a['href']


In [30]:
# URL of page to be scraped
schia_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'

# Retrieve page with the requests module
def render_page(schia_url):
    driver = webdriver.Chrome()
    driver.get(schia_url)
    time.sleep(3)
    schia_response = driver.page_source
    #driver.quit()
    return schia_response
    
schia_response = render_page(schia_url)

schia_soup = BeautifulSoup(schia_response, "html.parser")
#print(schia_soup.prettify())

In [31]:
# Retrieve hemisphere info
schiaparelli = schia_soup.find('div', class_='wide-image-wrapper')
# Retrieve hemisphere title
schia_title = schia_soup.find('h2', class_='title').text
# Identify and return image src
schia_link = schia_soup.a['href']

In [32]:
# URL of page to be scraped
syrtis_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'

# Retrieve page with the requests module
def render_page(syrtis_url):
    driver = webdriver.Chrome()
    driver.get(syrtis_url)
    time.sleep(3)
    syrtis_response = driver.page_source
    #driver.quit()
    return syrtis_response
    
syrtis_response = render_page(syrtis_url)

syrtis_soup = BeautifulSoup(syrtis_response, "html.parser")
#print(cerb_soup.prettify())

In [33]:
# Retrieve hemisphere info
syrtis = syrtis_soup.find('div', class_='wide-image-wrapper')
# Retrieve hemisphere title
syrtis_title = syrtis_soup.find('h2', class_='title').text
# Identify and return image src
syrtis_link = syrtis_soup.a['href']

In [34]:
# URL of page to be scraped
valles_url = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced'

# Retrieve page with the requests module
def render_page(valles_url):
    driver = webdriver.Chrome()
    driver.get(valles_url)
    time.sleep(3)
    cerb_response = driver.page_source
    #driver.quit()
    return cerb_response
    
valles_response = render_page(valles_url)

valles_soup = BeautifulSoup(valles_response, "html.parser")
#print(cerb_soup.prettify())

In [35]:
# Retrieve hemisphere info
valles = valles_soup.find('div', class_='wide-image-wrapper')
# Retrieve hemisphere title
valles_title = valles_soup.find('h2', class_='title').text
# Identify and return image src
valles_link = valles_soup.a['href']

# Query MongoDB

In [36]:
# Display news article in MongoDB collection
articles = db.articles.find()

for article in articles:
    print(article)

{'_id': ObjectId('5be60e01755db92b97adbbcd'), 'title': 'Curiosity on the Move Again', 'date': 'November  6, 2018', 'url': 'https://mars.nasa.gov/news/8371/curiosity-on-the-move-again/', 'teaser': "NASA's Mars Curiosity rover drove about 197 feet over the weekend to a site called Lake Orcadie, pushing its total odometry to over 12 miles."}


In [37]:
# Display featured image in MongoDB collection
images = db.images.find()

for image in images:
    print(image)

{'_id': ObjectId('5be60e09755db92b97adbbce'), 'imgTitle': 'A Ring of Color', 'imgSrc': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA14924_ip.jpg'}


In [38]:
# Display current weather in MongoDB collection
weather = db.weather.find()

for weather in weather:
    print(weather)

{'_id': ObjectId('5be60e10755db92b97adbbcf'), 'weather': 'Sol 2223 (2018-11-07), high 2C/35F, low -64C/-83F, pressure at 8.50 hPa, daylight 06:18-18:35'}
