In [11]:
from splinter import Browser
from flask import Flask, jsonify, redirect
import pandas as pd
from bs4 import BeautifulSoup as bs
import time
import pymongo
import re
import datetime

In [12]:
executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)
client = pymongo.MongoClient("mongodb://localhost:27017")
db = client.mars_db

#### https://mars.nasa.gov/news/

In [13]:
# Retrieving news title and teaser
browser.visit("https://mars.nasa.gov/news/")
time.sleep(2)

soup = bs(browser.html, "html.parser")
time.sleep(1)
items = soup.find("ul",class_="item_list")
slides = items.find_all("li", class_="slide")

news_titles = []
news_paragraphs = []
for slide in slides:
    news_title = slide.find("div", class_="content_title").text
    news_p = slide.find("div", class_="article_teaser_body").text
    news_titles.append(news_title)
    news_paragraphs.append(news_p)

print(news_titles[0])
print(news_paragraphs[0])

NASA Readies Perseverance Mars Rover's Earthly Twin 
Did you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape.


#### https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars

In [14]:
# Retrieving featured image url
browser.visit("https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars")
browser.find_by_id("full_image").click()
time.sleep(2)

soup = bs(browser.html, "html.parser")
image_src = soup.find("img", class_="fancybox-image")["src"]

featured_image_url = f"https://jpl.nasa.gov{image_src}"
featured_image_url

'https://jpl.nasa.gov/spaceimages/images/mediumsize/PIA00046_ip.jpg'

#### https://space-facts.com/mars/

In [15]:
# Retriving mars facts table
browser.visit("https://space-facts.com/mars/")
df = pd.read_html(browser.html)[1]
mars_facts_table_html = df.to_html(index=False, justify="center")
mars_facts_table_html = mars_facts_table_html.replace("\n","")
mars_facts_table_html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: center;">      <th>Mars - Earth Comparison</th>      <th>Mars</th>      <th>Earth</th>    </tr>  </thead>  <tbody>    <tr>      <td>Diameter:</td>      <td>6,779 km</td>      <td>12,742 km</td>    </tr>    <tr>      <td>Mass:</td>      <td>6.39 × 10^23 kg</td>      <td>5.97 × 10^24 kg</td>    </tr>    <tr>      <td>Moons:</td>      <td>2</td>      <td>1</td>    </tr>    <tr>      <td>Distance from Sun:</td>      <td>227,943,824 km</td>      <td>149,598,262 km</td>    </tr>    <tr>      <td>Length of Year:</td>      <td>687 Earth days</td>      <td>365.24 days</td>    </tr>    <tr>      <td>Temperature:</td>      <td>-87 to -5 °C</td>      <td>-88 to 58°C</td>    </tr>  </tbody></table>'

#### https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars

In [17]:
browser.visit("https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars")
time.sleep(1)
soup = bs(browser.html, "html.parser")

# Retrieving hemishere page's urls
hemisphere_urls = []
hemispheres = soup.find_all("div", class_="description")
for hemisphere in hemispheres:
    url = hemisphere.find("a")["href"]
    url = f"https://astrogeology.usgs.gov{url}"
    hemisphere_urls.append(url)

# Retrieving titles and image links of different hemispheres
hemisphere_list = []
for hemisphere_url in hemisphere_urls:
    browser.visit(hemisphere_url)
    time.sleep(2)
    soup = bs(browser.html, "html.parser")
    title = soup.find("h2", class_="title").text
    title = re.sub(" Enhanced","",title)
    image_url = soup.find_all("li")[0].find("a")["href"]
    hemisphere_list.append({"title":title, "image_url":image_url})

return_dict = {}
return_dict["news_titles"] = news_titles
return_dict["news_paragraphs"] = news_paragraphs
return_dict["featured_image_url"] = featured_image_url
return_dict["mars_facts_table_html"] = mars_facts_table_html
return_dict["hemisphere_list"] = hemisphere_list
return_dict["date"] = datetime.datetime.utcnow()

db.mission_to_mars.update({}, return_dict, upsert=True)

browser.quit()

return_dict



{'news_titles': ["NASA Readies Perseverance Mars Rover's Earthly Twin ",
  "NASA Engineers Checking InSight's Weather Sensors",
  "Follow NASA's Perseverance Rover in Real Time on Its Way to Mars",
  'NASA Establishes Board to Initially Review Mars Sample Return Plans',
  "NASA's Ingenuity Mars Helicopter Recharges Its Batteries in Flight",
  "Celebrate Mars Reconnaissance Orbiter's Views From Above",
  "NASA's MAVEN Observes Martian Night Sky Pulsing in Ultraviolet Light",
  "8 Martian Postcards to Celebrate Curiosity's Landing Anniversary",
  'NASA, ULA Launch Mars 2020 Perseverance Rover Mission to Red Planet',
  "NASA's Perseverance Rover Will Carry First Spacesuit Materials to Mars",
  "A New Video Captures the Science of NASA's Perseverance Mars Rover",
  'NASA Invites Public to Share Excitement of Mars 2020 Perseverance Rover Launch',
  "NASA's Mars Perseverance Rover Passes Flight Readiness Review",
  'NASA to Broadcast Mars 2020 Perseverance Launch, Prelaunch Activities',
  "6