# Scraping. Part 1

In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd

## NASA Mars News
* Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. 
* Assign the text to variables that you can reference later.

In [2]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [4]:
# Scrape page into Soup
html = browser.html
soup = bs(html, "html.parser")

In [5]:
# Get the title and paragraph
news_title = soup.find("div",class_="content_title").text
news_paragraph = soup.find("div", class_="article_teaser_body").text
print(f"Title: {news_title}")
print(f"Para: {news_paragraph}")

Title: What Does a Marsquake Look Like?
Para: InSight scientists used a special "shake room" to demonstrate the differences between quakes on Earth, the Moon and Mars.


## JPL Mars Space Images - Featured Image
* Visit the url for JPL Featured Space Image at https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars.
* Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called 'featured_image_url'.
* Make sure to find the image url to the full size .jpg image.
* Make sure to save a complete url string for this image.

In [6]:
# assign url and open browser
featured_image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(featured_image_url)

In [7]:
# Scrape page into Soup
jpl_html = browser.html
jpl_soup = bs(jpl_html, "lxml")

In [8]:
featured = jpl_soup.find('div', class_='default floating_text_area ms-layer')
featured_image = featured.find('footer')
featured_image_url = 'https://www.jpl.nasa.gov'+ featured_image.find('a')['data-fancybox-href']
print(str(featured_image_url))

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA16567_ip.jpg


## Mars Weather
* Visit the Mars Weather twitter account at https://twitter.com/marswxreport?lang=en
* Scrape the latest Mars weather tweet from the page. 
* Save the tweet text for the weather report as a variable called 'mars_weather'.

In [9]:
#get mars weather's latest tweet from the website
weather_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(weather_url)

In [10]:
# Scrape page into Soup
weather_html = browser.html
weather_soup = bs(weather_html, 'html.parser')

#Retrieve latest tweet
mars_weather = weather_soup.find('p', 'TweetTextSize TweetTextSize--normal js-tweet-text tweet-text').text

#Display text
print(mars_weather)

InSight sol 229 (2019-07-19) low -99.2ºC (-146.6ºF) high -24.7ºC (-12.4ºF)
winds from the SW at 4.0 m/s (9.0 mph) gusting to 14.8 m/s (33.0 mph)
pressure at 7.60 hPapic.twitter.com/WEjGzvCEhb


## Mars Facts
* Visit the Mars Facts webpage at https://space-facts.com/mars/
* Use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
* Use Pandas to convert the data to a HTML table string.

In [11]:
# get mars weather's latest tweet from the website
facts_url = "https://space-facts.com/mars/"
browser.visit(facts_url)

In [28]:
# convert facts/table to DataFrame
table = pd.read_html(facts_url)
df = table[0]
mars_df = df.drop('Earth', axis=1) 
mars_df.columns = ['Description', 'Mars']
mars_df = mars_df.set_index(['Description'], drop=True)
mars_df


Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Diameter:,"6,779 km"
Mass:,6.39 × 10^23 kg
Moons:,2
Distance from Sun:,"227,943,824 km"
Length of Year:,687 Earth days
Temperature:,-153 to 20 °C


In [13]:
# convert df to HTML string
mars_df_html = mars_df.to_html()
mars_df_html = mars_df_html.replace("\n", "")
mars_df_html

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Description</th>      <th>Mars</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Diameter:</td>      <td>6,779 km</td>    </tr>    <tr>      <th>1</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg</td>    </tr>    <tr>      <th>2</th>      <td>Moons:</td>      <td>2</td>    </tr>    <tr>      <th>3</th>      <td>Distance from Sun:</td>      <td>227,943,824 km</td>    </tr>    <tr>      <th>4</th>      <td>Length of Year:</td>      <td>687 Earth days</td>    </tr>    <tr>      <th>5</th>      <td>Temperature:</td>      <td>-153 to 20 °C</td>    </tr>  </tbody></table>'

In [14]:
# convert df to HTML and export it as HTML page
mars_df.to_html('mars_facts.html')

## Mars Hemispheres
* Visit the USGS Astrogeology site at https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars to obtain high resolution images for each of Mar's hemispheres.
* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.
* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys 'img_url' and 'title'.
* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [15]:
# get url loaded
hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemispheres_url)

In [16]:
# Scrape page into Soup
hemispheres_html = browser.html
hemispheres_soup = bs(hemispheres_html, 'html.parser')

In [17]:
# scrape all information from the page
items = hemispheres_soup.find("div", class_="collapsible results")
all_items = items.find_all("div", class_="item")
hemisphere_image_urls = []

for item in all_items:
    title = item.find('h3').text
    browser.click_link_by_partial_text(title)
    
    # refresh browser page to image page
    html = browser.html
    soup = bs(html, 'html.parser')
    
    # get url address for full picture
    img_url = soup.find("div", class_="downloads").find("ul").find("li").a.get("href")
    
    #append dictionary to list
    hemisphere_image_urls.append({"title":title, "img_url":img_url })
    browser.back()
    
hemisphere_image_urls


[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]