In [4]:
# Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.nasa_mars_landing_db
collection = db.articles

In [5]:
executable_path = {'executable_path': '\\Users\\Paul-DS\\Downloads\\chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [2]:
url = 'https://mars.nasa.gov/news/'

In [4]:
# Retrieve page with splinter module
browser.visit(url)

# Create BeautifulSoup object; parse with html
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [6]:
# Extract title text
title = soup.title.text
print(title)

News  – NASA’s Mars Exploration Program 


In [34]:
# Examine the results, then determine element that contains news info
# results are returned as an iterable list
nasa_news = []
results = soup.find_all('div', class_="image_and_description_container")
for result in results:
    mars_news = {}
    news_link=result.find('a')
    #print(news_link.text)
    news_summary=news_link.find('div', class_="rollover_description_inner").text.strip('\n')
    try:
        news_title=news_link.h3.text
    except AttributeError:  # When H3 tag not available then use Alt tag
        news_title_img=news_link.find_all('img',alt=True)
        news_title=news_title_img[1]['alt'] 
    mars_news[news_title]=news_summary
    nasa_news.append(mars_news)

In [35]:
nasa_news

[{'Curiosity on the Move Again': "NASA's Mars Curiosity rover drove about 197 feet over the weekend to a site called Lake Orcadie, pushing its total odometry to over 12 miles."},
 {'The Mars InSight Landing Site Is Just Plain Perfect': 'If the InSight landing zone were ice cream, it would be vanilla.'},
 {"Five Things to Know About InSight's Mars Landing": "NASA engineers will be holding their breath when their spacecraft heads into Mars' atmosphere on Nov. 26."},
 {'NASA Launches a New Podcast to Mars': "NASA's new eight-episode series 'On a Mission' follows the InSight spacecraft on its journey to Mars and details the extraordinary challenges of landing on the Red Planet."},
 {'Update on Opportunity Rover Recovery Efforts': 'After a review of the progress of the listening campaign, NASA will continue its current strategy for attempting to make contact with the Opportunity rover for the foreseeable future.'},
 {'Third ASPIRE Test Confirms Mars 2020 Parachute a Go': 'The supersonic par

### JPL Mars Space Images - Featured Image

In [22]:
url='https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
main_url='https://www.jpl.nasa.gov'

In [6]:
# Retrieve page with splinter module
browser.visit(url)

# Create BeautifulSoup object; parse with html
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [23]:
# Navigate the site and find the image url for the current Featured Mars Image
results = soup.find_all('section', class_="centered_text clearfix main_feature primary_media_feature single")
for result in results:
    news_link=result.find('article')
    #print(news_link['style'].split(':')[1].split("'")[1])
    image_url=news_link['style'].split(':')[1].split("'")[1]

nasa_mars_featured_image_url=main_url+image_url
print(nasa_mars_featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA17841-1920x1200.jpg


### Mars Weather

In [26]:
url='https://twitter.com/marswxreport?lang=en'

In [27]:
# Retrieve page with splinter module
browser.visit(url)

# Create BeautifulSoup object; parse with html
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [42]:
# Scrape the latest Mars weather tweet from the page. 
# Save the tweet text for the weather report as a variable called `mars_weather`.

# Get first Top tweet on twitter page
mars_weather = soup.find('div', class_="js-tweet-text-container").p.text
print(mars_weather)

Sol 2220 (2018-11-04), high -10C/14F, low -73C/-99F, pressure at 8.66 hPa, daylight 06:16-18:33


### Mars Facts

In [61]:
import pandas as pd

In [39]:
mars_space_facts_url = 'https://space-facts.com/mars/'

In [101]:
tables = pd.read_html(mars_space_facts_url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [86]:
type(tables)

list

In [102]:
df=tables[0]
df.columns=['Metric','Measurement']
df

Unnamed: 0,Metric,Measurement
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [108]:
df.set_index('Metric',inplace=True)

In [109]:
df.index

Index(['Equatorial Diameter:', 'Polar Diameter:', 'Mass:', 'Moons:',
       'Orbit Distance:', 'Orbit Period:', 'Surface Temperature:',
       'First Record:', 'Recorded By:'],
      dtype='object', name='Metric')

In [114]:
html_table = df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Measurement</th>\n    </tr>\n    <tr>\n      <th>Metric</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr

In [117]:
html_table=html_table.replace('\n', '')

In [118]:
html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Measurement</th>    </tr>    <tr>      <th>Metric</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [119]:
df.to_html('table.html')