### Scraping

Scrape the NASA Mars News Site (https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [1]:
# import modules
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pymongo
import pandas as pd
import requests
from flask import Flask, render_template, redirect
import time

In [2]:
#executable path to driver
executable_path ={'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# NASA Mars News website to scrape
news_url='https://mars.nasa.gov/news/'
# checking connection
response = requests.get(news_url)
response

<Response [200]>

In [6]:
# Create BeautifulSoup object; parse with 'html.parser' to scrape the site

browser.visit(news_url)
news_html = browser.html
news_soup = bs(news_html, 'html.parser')

NASA Mars News

In [7]:
# collect the latest News Title and Paragraph Text
news_title = news_soup.find_all('div', class_="content_title")[1].text
news_paragraph = news_soup.find('div', class_="article_teaser_body").text

#printing results
print(f"Latest news title : {news_title}")
print("\n"+news_paragraph)

Latest news title : NASA's MAVEN Observes Martian Night Sky Pulsing in Ultraviolet Light

Vast areas of the Martian night sky pulse in ultraviolet light, according to images from NASA’s MAVEN spacecraft. The results are being used to illuminate complex circulation patterns in the Martian atmosphere.


JPL Mars Space Images - Featured Image

In [8]:
# JPL Mars Space Images to scrape
img_url='https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
# checking connection
response = requests.get(img_url)
response

<Response [200]>

In [9]:
# visit the url
browser.visit(img_url)

In [10]:
# to get the full size jpg img of the FEATURED IMAGE on the first page by Clicking "FULL IMAGE" button
# and click "more info" button to get to the full image
full_img = browser.find_by_id('full_image').first.click()
time.sleep(2)

In [11]:
# click 'more info' button
browser.click_link_by_partial_text('more info')



In [12]:
# Create BeautifulSoup object; parse with 'html.parser' to scrape the site
img_html = browser.html
img_soup = bs(img_html, 'html.parser')

In [13]:
#checking result
#print(img_soup.prettify)

In [14]:
# find the image url
partial_img_url = img_soup.find('img', class_="main_image")['src']
partial_img_url 

'/spaceimages/images/largesize/PIA15254_hires.jpg'

In [15]:
featured_img_url = "https://www.jpl.nasa.gov" + partial_img_url 
print(f"featured_img_url : {featured_img_url}")

featured_img_url : https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA15254_hires.jpg


Mars Weather - twitter

In [16]:
twitter_mars_url = 'https://twitter.com/marswxreport?lang=en'
# checking connection
response = requests.get(twitter_mars_url)
response

<Response [200]>

In [17]:
# visit the url
browser.visit(twitter_mars_url)
time.sleep(1)

In [18]:
# Create BeautifulSoup object; parse with 'html.parser' to scrape the site
twtr_html = browser.html
twtr_soup = bs(twtr_html, 'html.parser')

# latest weaather tweet
latest_weather_tweet = twtr_soup.find_all('div', lang="en")[0].text

print(latest_weather_tweet)

InSight sol 606 (2020-08-09) low -94.1ºC (-137.4ºF) high -18.7ºC (-1.6ºF)
winds from the WNW at 8.0 m/s (17.8 mph) gusting to 23.9 m/s (53.4 mph)
pressure at 7.90 hPa


Mars Facts

In [19]:
mars_fact_url= "https://space-facts.com/mars/"
# checking connection
response = requests.get(mars_fact_url)
response


<Response [200]>

In [20]:
browser.visit(mars_fact_url)

In [21]:
# Use Pandas to read_html
tables = pd.read_html(mars_fact_url)

tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
           

In [22]:
# number of tables in the site
len(tables)

3

In [23]:
df = tables[0]
df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [24]:
#columns name
df.columns = ['Items', 'Value']
df

Unnamed: 0,Items,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [25]:
html_facts_table = df.to_html()
#remove "\n"
html_facts_table = html_facts_table.replace('\n', '')
html_facts_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Items</th>      <th>Value</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronomers</t

Mars Hemispheres

In [26]:
usgs_url= 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
# checking connection
response = requests.get(usgs_url)
response

<Response [200]>

In [27]:
# visit the site with Google Chrome
browser.visit(usgs_url)

In [28]:
# beautifulsoup object
hemi_html = browser.html
mars_hemi_soup = bs(hemi_html, 'html.parser')

In [29]:
# get info that contains the 4 image links
items = mars_hemi_soup.find_all('div', class_='item')
print(len(items))

4


In [30]:
# title check
items[0].find('h3').text

'Cerberus Hemisphere Enhanced'

In [31]:
# image link check
partial_img_url=items[0].find('a').find('img')['src']
partial_img_url

'/cache/images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png'

In [32]:
# Create empty list to contain 4 image urls
hemisphere_image_urls = []

In [33]:
#for loop to get the img title and img link from the list items
for i in items: 
    #title
    title = i.find('h3').text
    title = title.replace(" Enhanced", "")
    # partial image url
    partial_img_url = i.find('a').find('img')['src']
        
    # getting full image url 
    img_url = 'https://astrogeology.usgs.gov' + partial_img_url
        
    # Adding to dictionary
    hemisphere_image_urls.append({"title" : title, "img_url" : img_url})
    

In [34]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/08eac6e22c07fb1fe72223a79252de20_schiaparelli_enhanced.tif_thumb.png'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/55a0a1e2796313fdeafb17c35925e8ac_syrtis_major_enhanced.tif_thumb.png'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/4e59980c1c57f89c680c0e1ccabbeff1_valles_marineris_enhanced.tif_thumb.png'}]

### MongoDB and Flask Application

In [35]:
# please see py file and html file in the folder