In [78]:
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import requests
import pymongo
import os
import pandas as pd

In [171]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 97.0.4692
Get LATEST chromedriver version for 97.0.4692 google-chrome
Driver [/Users/keithmoravec/.wdm/drivers/chromedriver/mac64/97.0.4692.71/chromedriver] found in cache


### NASA Mars News

* Scrape the [Mars News Site](https://redplanetscience.com/) and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

```python
# Example:
news_title = "NASA's Next Mars Mission to Investigate Interior of Red Planet"

news_p = "Preparation of NASA's next spacecraft to Mars, InSight, has ramped up this summer, on course for launch next May from Vandenberg Air Force Base in central California -- the first interplanetary launch in history from America's West Coast."
```

In [4]:
url = 'https://redplanetscience.com/'
browser.visit(url)

In [5]:
# HTML object
html = browser.html

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

In [6]:
# soup

In [7]:
results = soup.find('div', class_='list_text')
print(results)

<div class="list_text">
<div class="list_date">January 19, 2022</div>
<div class="content_title">NASA Updates Mars 2020 Mission Environmental Review</div>
<div class="article_teaser_body">NASA and the Department of Energy have completed a more detailed risk analysis for the Mars 2020 rover launch from Florida.</div>
</div>


In [8]:
title = results.find('div', class_='content_title').text
text = results.find('div', class_='article_teaser_body').text

In [9]:
print("---------")
print(title)
print(text)

---------
NASA Updates Mars 2020 Mission Environmental Review
NASA and the Department of Energy have completed a more detailed risk analysis for the Mars 2020 rover launch from Florida.


### JPL Mars Space Images - Featured Image

* Visit the url for the Featured Space Image site [here](https://spaceimages-mars.com).

* Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called `featured_image_url`.

* Make sure to find the image url to the full size `.jpg` image.

* Make sure to save a complete url string for this image.

```python
# Example:
featured_image_url = 'https://spaceimages-mars.com/image/featured/mars2.jpg'
```


In [11]:
url = 'https://spaceimages-mars.com/'
browser.visit(url)
browser.links.find_by_partial_text('FULL IMAGE').click()

In [12]:
# HTML object
html = browser.html

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

In [37]:
results = soup.find('div', class_='fancybox-inner')
print(results)

<div class="fancybox-inner" style="overflow: visible; width: 640px; height: 350px;"><img alt="" class="fancybox-image" src="image/featured/mars1.jpg"/></div>


In [54]:
img = results.find('img', class_='fancybox-image').get('src')

In [None]:
img = results

In [55]:
print(img)

image/featured/mars1.jpg


In [58]:
featured_image_url=(url+img)
print(featured_image_url)

https://spaceimages-mars.com/image/featured/mars1.jpg


### Mars Facts

* Visit the Mars Facts webpage [here](https://galaxyfacts-mars.com) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

* Use Pandas to convert the data to a HTML table string.

In [93]:
url = 'https://galaxyfacts-mars.com'

In [94]:
tables = pd.read_html(url)[0]
tables

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [95]:
type(tables)

pandas.core.frame.DataFrame

In [96]:
tables.columns=["Description","Mars","Earth"]

In [97]:
tables.set_index("Description", inplace=True)

In [99]:
tables.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <td>Mars</td>\n      <td>Earth</td>\n    </tr>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>

### Mars Hemispheres

* Visit the astrogeology site [here](https://marshemispheres.com/) to obtain high resolution images for each of Mar's hemispheres.

* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.

* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

```python
# Example:
hemisphere_image_urls = [
    {"title": "Valles Marineris Hemisphere", "img_url": "..."},
    {"title": "Cerberus Hemisphere", "img_url": "..."},
    {"title": "Schiaparelli Hemisphere", "img_url": "..."},
    {"title": "Syrtis Major Hemisphere", "img_url": "..."},
]
```

- - -


In [238]:
url = 'https://marshemispheres.com/'
browser.visit(url)

In [239]:
# HTML object
html = browser.html

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

In [240]:
soup

<html lang="en"><head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<link href="css/jquery-ui.css" rel="stylesheet" type="text/css"/>
<title>Astropedia Search Results | GUSS Astrogeology Science Center</title>
<meta content="GUSS Astrogeology Science Center Astropedia search results." name="description"/>
<meta content="GUSS,Astrogeology Science Center,Cartography,Geology,Space,Geological Survey,Mapping" name="keywords"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1, maximum-scale=1" name="viewport"/>
<link href="css/main.css" media="screen" rel="stylesheet"/>
<link href="css/print.css" media="print" rel="stylesheet"/>
<link href="#" rel="icon" type="image/x-ico"/>
</head>
<body id="results">
<header>
<a href="#" style="float:right;margin-top:10px;" target="_blank">
<img alt="USGS: Science for a Changing World" class="logo" height="60" src="images/usgs_logo_main_2x.png"/>
</a>
<a href="#" style="float:

# Single Hemisphere

In [180]:
hemisphere_link = soup.find('img',class_="thumb")

In [181]:
type(hemisphere_link)

bs4.element.Tag

In [182]:
print(hemisphere_link)

<img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/>


In [183]:
img_url = hemisphere_link.get('src')
# img = results.find('img', class_='fancybox-image').get('src')
print(img_url)

images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png


In [188]:
# browser.links.find_by_partial_text("thumb").click()

In [None]:
https://marshemispheres.com/cerberus.html

In [None]:
https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg

### Try another approach

In [189]:
hemisphere_link = soup.find('div',class_="item")

In [190]:
type(hemisphere_link)

bs4.element.Tag

In [191]:
print(hemisphere_link)

<div class="item">
<a class="itemLink product-item" href="cerberus.html"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a>
<div class="description">
<a class="itemLink product-item" href="cerberus.html">
<h3>Cerberus Hemisphere Enhanced</h3>
</a>
<span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/>
<p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p>
</div>
<!-- end description -->
</div>


In [242]:
title = hemisphere_link.find('h3').text
print(title)

Cerberus Hemisphere Enhanced


In [244]:
title = title.strip("Enhanced")
print(title)

Cerberus Hemisphere 


In [198]:
link=hemisphere_link.find('a',class_="product-item")['href']
link = (url+link)

In [199]:
print(link)

https://marshemispheres.com/cerberus.html


In [216]:
browser.visit(link)
browser.links.find_by_partial_text('Open').click()

In [219]:
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
# Retrieve all elements that contain book information

In [235]:
hemisphere_url = soup.find('img',class_="wide-image").get('src')

In [236]:
print(hemisphere_url)

images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg


In [237]:
hemisphere_url=(url+hemisphere_url)
print(hemisphere_url)

https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg


In [None]:
https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg

In [245]:
hemisphere_dict = {"title":title,"img_url":hemisphere_url}

In [246]:
print(hemisphere_dict)

{'title': 'Cerberus Hemisphere ', 'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'}


# All Hemispheres

In [250]:
hemisphere_links = soup.find_all('div',class_="item")

In [251]:
type(hemisphere_links)

bs4.element.ResultSet

In [253]:
# print(hemisphere_links)

In [264]:
hemisphere_title_list =[]
for hemisphere_link in hemisphere_links:
    title = hemisphere_link.find('h3').text
    title = title.strip("Enhanced")
    hemisphere_title_list.append(title)

In [265]:
print(hemisphere_title_list)

['Cerberus Hemisphere ', 'Schiaparelli Hemisphere ', 'Syrtis Major Hemisphere ', 'Valles Marineris Hemisphere ']


In [266]:
hemisphere_url_list =[]
for hemisphere_link in hemisphere_links:
    link=hemisphere_link.find('a',class_="product-item")['href']
    link = (url+link)
    browser.visit(link)
    browser.links.find_by_partial_text('Open').click()
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    hemisphere_url = soup.find('img',class_="wide-image").get('src')
    hemisphere_url=(url+hemisphere_url)
    hemisphere_url_list.append(hemisphere_url)

In [267]:
print(hemisphere_url_list)

['https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg', 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg', 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg', 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg']


In [284]:
# keys_list = ["title","img_url"]

In [285]:
# # Create a DataFrame of frames using a dictionary of lists
# hemispheres_df = pd.DataFrame({
#     "Title": hemisphere_title_list,
#     "img_url": hemisphere_url_list,
# })
# hemispheres_df

In [283]:
# hemispheres_df.to_dict()

In [280]:
# Create List of dictionaries for each hemisphere

hemisphere_image_urls = []
for i in range(4):
    hemisphere_image_urls.append({"title":hemisphere_title_list[i],"img_url":hemisphere_url_list[i]})
    

In [282]:
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere ', 'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere ', 'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere ', 'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere ', 'img_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]


In [None]:
# Reference INfo

# HTML object
html = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')


# # # Retrieve all elements that contain book information
# results = soup.find_all('div', class_='list_text')

# scrape the article header 
news_title = soup.find('div', class_='content_title').text

# scrape the article subheader - type in my own quotes - not the ones that came in
news_p = soup.find('div', class_='article_teaser_body').text
MaryClare Colombo (she/her) to Everyone (8:13 PM)
jupyter nbconvert --to script ‘mission-to-mars.ipynb'