In [1]:
#Import dependencies for web scraping: BeautifulSoup, Splinter, Pandas
from bs4 import BeautifulSoup
import requests
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import pymongo
import pandas as pd

In [2]:
#Connect to MongoDB
mongo_conn = 'mongodb://localhost:27017'
client=pymongo.MongoClient(mongo_conn)

In [3]:
#Generate database and collection to MongoDB
client.mars_db.mars.insert_one({'news_title':'title',
                               'news_p':'description'})

<pymongo.results.InsertOneResult at 0x1d57a041b40>

# Nasa Mars News:

In [4]:
#Set up url path
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

In [5]:
#Access url path
response = requests.get(url)

In [6]:
#Create a BeautifulSoup object to parse html
soup = BeautifulSoup(response.text, 'html.parser')

In [7]:
#Use the inspect tool within the browser to find tag and class associated with news title content
news_title = soup.find_all('div', class_='content_title')
print(news_title)

[<div class="content_title">
<a href="/news/8882/nasas-perseverance-drives-on-mars-terrain-for-first-time/">
NASA's Perseverance Drives on Mars' Terrain for First Time
</a>
</div>, <div class="content_title">
<a href="/news/8880/nasa-awards-mars-ascent-propulsion-system-contract-for-sample-return/">
NASA Awards Mars Ascent Propulsion System Contract for Sample Return
</a>
</div>, <div class="content_title">
<a href="/news/8878/nasa-to-provide-update-on-perseverance-firsts-since-mars-landing/">
NASA to Provide Update on Perseverance ‘Firsts' Since Mars Landing 
</a>
</div>, <div class="content_title">
<a href="/news/8870/nasas-mars-perseverance-rover-provides-front-row-seat-to-landing-first-audio-recording-of-red-planet/">
NASA's Mars Perseverance Rover Provides Front-Row Seat to Landing, First Audio Recording of Red Planet 
</a>
</div>, <div class="content_title">
<a href="/news/8868/nasa-to-reveal-new-video-images-from-mars-perseverance-rover/">
NASA to Reveal New Video, Images From M

In [8]:
#Use the inspect tool within the browser to find tag and class associated with news paragraph content
news_p = soup.find_all('div', class_="rollover_description_inner")
print(news_p)

[<div class="rollover_description_inner">
The first trek of the agency’s largest, most advanced rover yet on the Red Planet marks a major milestone before science operations get under way.
</div>, <div class="rollover_description_inner">
The award moves NASA and ESA a step closer to realizing Mars Sample Return, an ambitious planetary exploration program that will build upon decades of science, knowledge, and experience.
</div>, <div class="rollover_description_inner">
Mission team members are participating in a virtual teleconference to discuss milestones achieved so far since the Feb. 18 landing and those to come.
</div>, <div class="rollover_description_inner">
The agency’s newest rover captured first-of-its kind footage of its Feb. 18 touchdown and has recorded audio of Martian wind.


</div>, <div class="rollover_description_inner">
First-of-its kind footage from the agency’s newest rover will be presented during a briefing this morning.
</div>, <div class="rollover_description_in

# JPL Mars Space Images - Featured Image

In [9]:
#Set up splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

image_urlpath='https://www.jpl.nasa.gov/images?search=&category=Mars'
browser.visit(image_urlpath)

browser.find_by_css('.BaseImagePlaceholder').click()

html=browser.html
soup = BeautifulSoup(html, 'html.parser')
img_info = soup.find('img', class_ = 'BaseImagePlaceholder')
featured_image_url = 'https://d2pn8kiwq2w21t.cloudfront.net/images/jpegPIA24460.width-1024.jpg'

browser.back()
browser.quit()

[WDM] - Current google-chrome version is 89.0.4389
[WDM] - Get LATEST driver version for 89.0.4389
[WDM] - Driver [C:\Users\jrose\.wdm\drivers\chromedriver\win32\89.0.4389.23\chromedriver.exe] found in cache






In [10]:
#Connect to established MongoDB to insert into collection
client.mars_db.mars.insert_one({'featured_image_url': featured_image_url})

<pymongo.results.InsertOneResult at 0x1d57af46e40>

# Mars Facts

In [11]:
#Use pandas to parse mars facts page
mars_facts_url = 'https://space-facts.com/mars/'

In [12]:
#Read into html with pandas
table_list = pd.read_html(mars_facts_url)
len(table_list)

3

In [13]:
#Convert data to a HTML table string
for each_table in table_list:
    display(each_table.head())

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"


Unnamed: 0,Mars - Earth Comparison,Mars,Earth
0,Diameter:,"6,779 km","12,742 km"
1,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
2,Moons:,2,1
3,Distance from Sun:,"227,943,824 km","149,598,262 km"
4,Length of Year:,687 Earth days,365.24 days


Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"


In [14]:
#Select one of the tables to save and clean table for 
mars_df = table_list[0]
mars_df.columns = ['Mars Planet Profile', 'Facts']
clean_mars = mars_df.drop(0)
cleaner_mars = clean_mars.set_index('Mars Planet Profile')
cleaner_mars

Unnamed: 0_level_0,Facts
Mars Planet Profile,Unnamed: 1_level_1
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [15]:
#Export as a .html file
cleaner_mars.to_html('marsfacts', index=False)

# Mars Hemispheres

In [16]:
#Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 89.0.4389
[WDM] - Get LATEST driver version for 89.0.4389
[WDM] - Driver [C:\Users\jrose\.wdm\drivers\chromedriver\win32\89.0.4389.23\chromedriver.exe] found in cache






In [17]:
#Setup BeautifulSoup
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

browser.visit(url)
html=browser.html
soup = BeautifulSoup(html, 'html.parser')

In [18]:
#Use inspection tool to find tag and class associated with image
mars_hemis_info = soup.find_all('div', class_= 'item')

In [19]:
#Set up a for loop to find all hemisphere images and the titles
img_urls = []

for x in range(len(mars_hemis_info)):
    html = browser.find_by_css('h3')
    html[x].click()
    
    img_html = browser.html
    soup = BeautifulSoup(img_html, 'html.parser')
    
    main_hemis_url = 'https://astrogeology.usgs.gov'
    
    full_img_url = soup.find('img', class_='wide-image')['src']
    
    fin_url = main_hemis_url + full_img_url
    
    title = browser.find_by_css('.title').text
    
    img_urls.append({'title': title,
                         'img_url': fin_url})
    
    browser.back()
browser.quit()

In [20]:
#Verify that title and image data accessed is correct
img_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [21]:
#Save to MongoDB
client.mars_db.mars.insert_one({'mars_hemispheres': img_urls})

<pymongo.results.InsertOneResult at 0x1d57b157d00>