# Scraping with Splinter

In [1]:
# Import Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import time
import requests

In [2]:
# Find chromedriver path

!which chromedriver

/usr/local/bin/chromedriver


In [9]:
# Connect Chromedriver to Browser

executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [4]:
# Run url through chromedriver browser

url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url)

time.sleep(20)

In [5]:
# Scrape relevant data

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Set path to first article
article = soup.find('li', class_='slide')

# Finding the First Article Title
slide_text = article.find('div', class_='list_text')
title_content = slide_text.find('div', class_='content_title')
news_title = title_content.text
print(news_title)

#Finding the First Article Body
body_content = slide_text.find('div', class_="article_teaser_body")
news_p = body_content.text
print(news_p)


How NASA's Perseverance Mars Team Adjusted to Work in the Time of Coronavirus 
Like much of the rest of the world, the Mars rover team is pushing forward with its mission-critical work while putting the health and safety of their colleagues and community first.


In [6]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [7]:
# Scrape relevant data

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Set path to Featured Section
main = soup.find('div', class_="carousel_container")

# Finding the Image URL
article = main.find('article', class_="carousel_item")
image = article.get('style')
image_split = image.split("'")
image_url = image_split[1]

# Create full link to Featured Image
featured_image_url = f'https://www.jpl.nasa.gov{image_url}'

print(featured_image_url)


https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA14316-1920x1200.jpg


In [10]:
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

time.sleep(2)

In [14]:
# Scrape relevant data

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Selecting all tweet blocks
tweets = soup.select('div[style*="translateY(0px)"]')

# # Finding the tweet text within each block
# first_block = tweets[0]
# all_first_tweet_text = first_block.find_all('span')

# # Selecting the correct text
# mars_weather = all_first_tweet_text[4].text

# # Selecting all tweet blocks
# tweets = soup.find('span')

# Selecting the correct text
# mars_weather = tweets.text

print(tweets)

[<div style="position: absolute; width: 100%; transform: translateY(0px); transition: opacity 0.3s ease-out 0s;"><div class="css-1dbjc4n r-my5ep6 r-qklmqi r-1adg3ll"><div class="css-1dbjc4n r-18u37iz"><div class="css-1dbjc4n r-eqz5dr r-16y2uox r-1wbh5a2"><article aria-haspopup="false" class="css-1dbjc4n r-1loqt21 r-16y2uox r-1wbh5a2 r-1udh08x r-1j3t67a r-o7ynqc r-6416eg" data-focusable="true" role="article" tabindex="0"><div class="css-1dbjc4n"><div class="css-1dbjc4n"><div class="css-1dbjc4n r-18u37iz r-thb0q2"><div class="css-1dbjc4n r-1iusvr4 r-16y2uox r-5f2r5o r-m611by"></div></div></div><div class="css-1dbjc4n r-18u37iz r-thb0q2" data-testid="tweet"><div class="css-1dbjc4n r-1awozwy r-18kxxzh r-5f2r5o" style="flex-basis: 49px;"><div class="css-1dbjc4n r-18kxxzh r-1wbh5a2 r-13qz1uu"><div class="css-1dbjc4n r-1wbh5a2 r-dnmrzs"><a aria-haspopup="false" class="css-4rbku5 css-18t94o4 css-1dbjc4n r-sdzlij r-1loqt21 r-1adg3ll r-ahm1il r-1udh08x r-o7ynqc r-6416eg r-13qz1uu" data-focusable

In [10]:
mars_table_url = 'https://space-facts.com/mars/'


In [11]:
mars_facts = pd.read_html(mars_table_url, header=None)
len(mars_facts)

3

In [12]:
mars_df = mars_facts[0]
mars_df 

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [13]:
mars_table_html = mars_df.to_html('mars_table.html', header=False)
mars_table_html

In [19]:
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

time.sleep(10)

In [22]:
hemispheres = ['Cerberus Hemisphere', 'Schiaparelli Hemisphere', 'Syrtis Major Hemisphere', 'Valles Marineris Hemisphere']
hemisphere_images_urls = []

for item in hemispheres:
    
    # Scrape relevant data
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')

    # Finding the Image URL
    browser.click_link_by_partial_text(f'{item}')
    current_url = browser.url
    response = requests.get(current_url)
    time.sleep(20)
    soup = BeautifulSoup(response.text, 'html.parser')
    wide_image = soup.find('div', class_="wide-image-wrapper")
    downloads = wide_image.find('div', class_="downloads")
    image_one = downloads.find('a')
    href_one = image_one['href']
    new_dict = {'title': item, 'img_url': href_one}
    hemisphere_images_urls.append(new_dict)
    
    browser.visit(url)
    time.sleep(5)

hemisphere_images_urls


[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]