## Import Dependencies

In [1]:
#Import Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import requests
import pymongo
import pandas as pd

## Connect to PyMongo

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Define database and collection
db = client.mars_db
collection = db.news

## Latest News

In [3]:
# Set up splinter for news section
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)
url = 'https://redplanetscience.com/'
browser.visit(url)

#scrape webpage for container
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
results = soup.find('div', class_='list_text')
news_title = results.find('div', class_='content_title').text
news_subtitle = results.find('div', class_='article_teaser_body').text



Current google-chrome version is 90.0.4430
Get LATEST driver version for 90.0.4430
Driver [C:\Users\17738\.wdm\drivers\chromedriver\win32\90.0.4430.24\chromedriver.exe] found in cache


In [4]:
news_title

"3 Things We've Learned From NASA's Mars InSight "

In [5]:
news_subtitle

'Scientists are finding new mysteries since the geophysics mission landed two years ago.'

In [6]:
browser.quit()

## Featured Image

In [7]:
# Set up splinter for image section
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)
url = 'https://spaceimages-mars.com/'
browser.visit(url)

#scrape webpage for full img link
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
browser.links.find_by_partial_text('FULL IMAGE').click()



Current google-chrome version is 90.0.4430
Get LATEST driver version for 90.0.4430
Driver [C:\Users\17738\.wdm\drivers\chromedriver\win32\90.0.4430.24\chromedriver.exe] found in cache


In [8]:
results = soup.find('a', class_='showimg fancybox-thumbs')["href"]
featured_image_url = url + results
featured_image_url

'https://spaceimages-mars.com/image/featured/mars3.jpg'

In [9]:
browser.quit()

## Fact Table

In [25]:
#using pandas
# Webpage url                                                                                                               
data_url = 'https://galaxyfacts-mars.com/'

# Extract tables
dfs = pd.read_html(data_url)
mars_df = dfs[0]
mars_df = mars_df.rename(columns={0:"",1:"Mars",2:"Earth"})
mars_df = mars_df.set_index("")

#put table into html
mars_table_html = mars_df.to_html()
mars_table_html = mars_table_html.replace('\n', '')
mars_table_html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Mars</th>      <th>Earth</th>    </tr>    <tr>      <th></th>      <th></th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Mars - Earth Comparison</th>      <td>Mars</td>      <td>Earth</td>    </tr>    <tr>      <th>Diameter:</th>      <td>6,779 km</td>      <td>12,742 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg</td>      <td>5.97 × 10^24 kg</td>    </tr>    <tr>      <th>Moons:</th>      <td>2</td>      <td>1</td>    </tr>    <tr>      <th>Distance from Sun:</th>      <td>227,943,824 km</td>      <td>149,598,262 km</td>    </tr>    <tr>      <th>Length of Year:</th>      <td>687 Earth days</td>      <td>365.24 days</td>    </tr>    <tr>      <th>Temperature:</th>      <td>-87 to -5 °C</td>      <td>-88 to 58°C</td>    </tr>  </tbody></table>'

## Mars Hemisphere Section

In [11]:
# Set up splinter for hemisphere section
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)
url = 'https://marshemispheres.com/'
browser.visit(url)

#scrape webpage for full img link and hemisphere name
hemisphere_image_urls = []
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
results = soup.find_all('div', class_='item')
for result in results:
    #find title with first url
    title = result.find('h3').text
    
    #click the title link to find full res image
    browser.click_link_by_partial_text(title)
    
    #save the new url to get full res image url
    new_html = browser.html
    new_soup = BeautifulSoup(new_html, 'html.parser')
    partial_url = new_soup.find('img', class_="wide-image")['src']
    img_url = url + partial_url
    print(title)
    print(img_url)
    print("----------")
    
    #go back to previous page for next title and image
    browser.back()
    
    # Dictionary to be inserted into list
    post = {
        'title': title,
        'img_url': img_url,
    }
    hemisphere_image_urls.append(post)



Current google-chrome version is 90.0.4430
Get LATEST driver version for 90.0.4430
Driver [C:\Users\17738\.wdm\drivers\chromedriver\win32\90.0.4430.24\chromedriver.exe] found in cache


Cerberus Hemisphere Enhanced
https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg
----------
Schiaparelli Hemisphere Enhanced
https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg
----------
Syrtis Major Hemisphere Enhanced
https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg
----------
Valles Marineris Hemisphere Enhanced
https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg
----------


In [12]:
browser.quit()

In [13]:
#double check list of dictionaries
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]