In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.nasa_mars_landing_db
collection = db.articles

In [4]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [5]:
# Extract title text
title = soup.title.text
print(title)

News  – NASA’s Mars Exploration Program 


In [None]:
'''<h1 class='article_title'>
Opportunity Hunkers Down During Dust Storm
</h1>
</header>
<div class='clearfix' id='primary_column'>
<div class='wysiwyg_content'>
<p><b>NASA Mars Exploration Rover Status Report</b></p>

<p><b>Updated at 2:25 p.m. PDT on July 26, 2018</b></p>

<p>It&#39;s the beginning of the end for the planet-encircling dust storm on Mars. But it could still be weeks, or even months, before skies are clear enough for NASA&#39;s Opportunity rover to recharge its batteries and phone home. The last signal received from the rover was on June 10.</p>

<p>Scientists observing the global event -- which is actually caused by a series of local and regional storms throwing dust into the Martian atmosphere -- say that, as of Monday, July 23, more dust is falling out than is being raised into the planet&#39;s thin air. That means the event has reached its decay phase, when dust-raising occurs in ever smaller areas, while others stop raising dust altogether.</p>

'''

In [34]:
# Examine the results, then determine element that contains news info
# results are returned as an iterable list
results = soup.find_all('div', class_="image_and_description_container")
for result in results:
    news_link=result.find('a')['href']
    print(news_link)
    next_url = url+news_link
    next_response = requests.get(next_url)
    next_soup = BeautifulSoup(next_response.text, 'lxml')
    news_title = next_soup.find('h1', class_='article_title')
    print(news_title.text)
    news_p = next_soup.find('p')
    print(news_p.string)

/news/8348/opportunity-hunkers-down-during-dust-storm/

Opportunity Hunkers Down During Dust Storm

NASA Mars Exploration Rover Status Report
/news/8347/nasa-finds-ancient-organic-material-mysterious-methane-on-mars/

NASA Finds Ancient Organic Material, Mysterious Methane on Mars

NASA’s Curiosity rover has found new evidence preserved in rocks on Mars that suggests the planet could have supported ancient life, as well as new evidence in the Martian atmosphere that relates to the search for current life on the Red Planet. While not necessarily evidence of life itself, these findings are a good sign for future missions exploring the planet’s surface and subsurface.​
/news/8326/nasa-invests-in-visionary-technology/

NASA Invests in Visionary Technology 

NASA is investing in technology concepts, including several from JPL, that may one day be used for future space exploration missions.
/news/8325/nasa-is-ready-to-study-the-heart-of-mars/

NASA is Ready to Study the Heart of Mars

​NASA 

In [61]:
import pandas as pd

In [39]:
mars_space_facts_url = 'https://space-facts.com/mars/'

In [40]:
tables = pd.read_html(mars_space_facts_url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [41]:
type(tables)

list

In [63]:
df=tables[0]
df.columns=['Metric','Measurement']
df.set_index('Metric',inplace=True)
df

Unnamed: 0_level_0,Measurement
Metric,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [64]:
df.index

Index(['Equatorial Diameter:', 'Polar Diameter:', 'Mass:', 'Moons:',
       'Orbit Distance:', 'Orbit Period:', 'Surface Temperature:',
       'First Record:', 'Recorded By:'],
      dtype='object', name='Metric')

In [66]:
html_table = df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Measurement</th>\n    </tr>\n    <tr>\n      <th>Metric</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr

In [70]:
html_table=html_table.replace('\n', '')

In [71]:
html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Measurement</th>    </tr>    <tr>      <th>Metric</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [None]:
df.to_html('table.html')