In [12]:
# Dependencies
import pandas as pd
import pymongo
import requests
from bs4 import BeautifulSoup as bs
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

In [13]:
executable_path = {'executable_path': ChromeDriverManager().install()}

browser = Browser('chrome', **executable_path, headless = False)



Current google-chrome version is 95.0.4638
Get LATEST driver version for 95.0.4638
Driver [/Users/margaretwharton/.wdm/drivers/chromedriver/mac64/95.0.4638.69/chromedriver] found in cache


In [14]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [15]:
# Define database and collection
db = client.MissionToMars_db
collection = db.marsdata

## NASA Mars News

In [16]:
# Visit the URL for mars news
url = 'https://redplanetscience.com/'
browser.visit(url)

# pull html into Beautiful Soup parser
html=browser.html
soup=bs(html, 'html.parser')

In [7]:
#print(soup)

In [17]:
# results are returned as an iterable list
news_elements = soup.find_all('div', class_='list_text')
news_title = news_elements[0].find('div', class_ = 'content_title').text
news_p = news_elements[0].find('div', class_ = 'article_teaser_body').text
news_title, news_p

("NASA's Curiosity Keeps Rolling As Team Operates Rover From Home",
 'The team has learned to meet new challenges as they work remotely on the Mars mission.')

## JPL Mars Space Images

In [18]:
# Visit the URL for futured images
url = 'https://spaceimages-mars.com/'
browser.visit(url)

# pull html into Beautiful Soup parser
html=browser.html
soup=bs(html, 'html.parser')

In [20]:
#print(soup)

In [21]:
# find the url of the fuatured images
image = soup.find('img', class_ = 'headerimage fade-in')['src']
featured_imge_url = url + image
print(f'The featured image URL: {featured_imge_url}')

The featured image URL: https://spaceimages-mars.com/image/featured/mars3.jpg


## Mars Facts

In [22]:
# Visit Mars facts page & use Pandas to read html
mars_facts='https://space-facts.com/mars/'
mars_fact_table=pd.read_html(mars_facts)

In [23]:
# Create df with table data
df = mars_fact_table[0]
# Create Data Frame
df.columns = ["Description", "Value"]
# Set index to Description
df.set_index("Description", inplace=True)
# Print Data Frame
df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [24]:
# Save fact table as html
html_table = df.to_html()

# Strip unwanted newlines to clean up the table
html_table.replace("\n", '')

# Save html code
df.to_html("mars_facts_data.html")
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\

## Mars Hemispheres

In [25]:
# obtain high resolution images for each of Mar's hemispheres
url = 'https://marshemispheres.com/'
browser.visit(url)

# pull html into Beautiful Soup parser
html=browser.html
soup=bs(html, 'html.parser')

In [27]:
#print(soup)

In [28]:
# create a list for disctionay of url
img_title_list = []

# loop thru the list to retrive High resolution image url and title
high_reso_image = soup.find_all('div', class_ = 'description')

for image in high_reso_image: 
    #     title of the image
    image_title = image.find('h3').get_text()
     #     find image url
    img_url = image.find('a', class_ = 'itemLink product-item')['href']
    hemis_url = url + img_url
#     now find the high resolution image from 'hemis_url'
    browser.visit(hemis_url)
    html = browser.html
    soup = bs(html,'html.parser')
#     get image src
    img_src = soup.find('img', class_='wide-image')['src']
#     create image link
    highresol_imgurl = url + img_src
    
#     create disctionary of titles and high resolution url
    hemisphere_image_url = [{
        'title': image_title,
        'image_url': highresol_imgurl
    }]      
#      append titles and images to the list
    img_title_list += hemisphere_image_url

# print(img_title_list)
for high_res_image in img_title_list:
    
    print(high_res_image['image_url'], high_res_image['title']   
    )

https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg Cerberus Hemisphere Enhanced
https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg Schiaparelli Hemisphere Enhanced
https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg Syrtis Major Hemisphere Enhanced
https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg Valles Marineris Hemisphere Enhanced


### Creating dictionary & exporting data to MongoDB

In [29]:
# Create dictionary with all scraped ata
mars_information = {
    "news_title": news_title,
    "news_p": news_p,
    "featured_imge_url": featured_imge_url,
    "facts_table": html_table,
    "hemispheres": img_title_list
}
mars_information

{'news_title': "NASA's Curiosity Keeps Rolling As Team Operates Rover From Home",
 'news_p': 'The team has learned to meet new challenges as they work remotely on the Mars mission.',
 'featured_imge_url': 'https://spaceimages-mars.com/image/featured/mars3.jpg',
 'facts_table': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 year

In [30]:
# Data to MongoDB
from pymongo import MongoClient

client = MongoClient()
client
client = MongoClient(host="localhost", port=27017)
db = client.MissionToMars_DB
collection = db.marsdata

result = collection.insert_one(mars_information)
result

<pymongo.results.InsertOneResult at 0x7fe091277a40>