# Scraping

In [1]:
from bs4 import BeautifulSoup as bs
from splinter import Browser
from config import exe_path
import pandas as pd
import requests

In [2]:
executable_path = {'executable_path':exe_path}
browser = Browser('chrome', **executable_path)

## Nasa Mars News

In [3]:
# Url of the page to be scraped
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [4]:
html = browser.html
soup = bs(html, 'html.parser')

In [5]:
# print(soup.prettify())

In [6]:
results = soup.find('div', class_="list_text")
news_title = results.find('a').text.strip('\n')
news_title
# print(results)

"NASA's MAVEN Observes Martian Night Sky Pulsing in Ultraviolet Light"

In [7]:
news_p = soup.find('div', class_="article_teaser_body").text
news_p

'Vast areas of the Martian night sky pulse in ultraviolet light, according to images from NASA’s MAVEN spacecraft. The results are being used to illuminate complex circulation patterns in the Martian atmosphere.'

## JPL Mars Space Image

In [19]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [20]:
html = browser.html
soup = bs(html, 'html.parser')

In [21]:
# print(soup.prettify())

In [22]:
browser.click_link_by_id("full_image")

In [23]:
html = browser.html
soup = bs(html, 'html.parser')

In [24]:
# print(soup.prettify())

In [25]:
results = soup.find('a', class_="button", target="_top")
partial_href = results['href']
partial_href

'/spaceimages/details.php?id=PIA14934 '

In [26]:
browser.click_link_by_partial_href(partial_href)



In [27]:
html = browser.html
soup = bs(html, 'html.parser')

In [28]:
# print(soup.prettify())

In [29]:
results = soup.find('figure', class_="lede")
link = results.find('a')
partial_href = link['href']
partial_href

'/spaceimages/images/largesize/PIA14934_hires.jpg'

In [30]:
url_head = url.split('?')
url_head2 = url_head[0].rsplit('/', maxsplit=2)
url_head2

['https://www.jpl.nasa.gov', 'spaceimages', '']

In [31]:
featured_image_url = url_head2[0] + partial_href
featured_image_url
# 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16225_hires.jpg'

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA14934_hires.jpg'

## Mars Weather

In [32]:
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

In [36]:
html = browser.html
soup = bs(html, 'html.parser')

In [37]:
results = soup.find('div', class_="css-1dbjc4n r-1iusvr4 r-16y2uox r-1777fci r-1mi0q7o")
spans = results.find_all('span')
mars_weather = spans[4].text
mars_weather

'InSight sol 605 (2020-08-09) low -92.7ºC (-134.8ºF) high -18.4ºC (-1.1ºF)\nwinds from the WNW at 8.8 m/s (19.7 mph) gusting to 22.5 m/s (50.4 mph)\npressure at 7.90 hPa'

## Mars Facts

In [38]:
url = 'https://space-facts.com/mars/'
browser.visit(url)

In [39]:
tables = pd.read_html(url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
           

In [40]:
df = tables[0]
df.columns = ['Name', 'Value']
df

Unnamed: 0,Name,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [41]:
df.set_index('Name', inplace=True)

In [42]:
html_string = df.to_html()
html_string = html_string.replace('\n', '')
html_string

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Name</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

## Mars Hemispheres

In [43]:
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [44]:
hemisphere_image_urls = []

In [45]:
html = browser.html
soup = bs(html, 'html.parser')

In [46]:
print(soup.prettify())

<html lang="en">
 <head>
  <link href="//ajax.googleapis.com/ajax/libs/jqueryui/1.11.3/themes/smoothness/jquery-ui.css" rel="stylesheet" type="text/css"/>
  <title>
   Astropedia Search Results | USGS Astrogeology Science Center
  </title>
  <meta content="USGS Astrogeology Science Center Astropedia search results." name="description"/>
  <meta content="USGS,Astrogeology Science Center,Cartography,Geology,Space,Geological Survey,Mapping" name="keywords"/>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1" name="viewport"/>
  <meta content="x61hXXVj7wtfBSNOPnTftajMsZ5yB2W-qRoyr7GtOKM" name="google-site-verification"/>
  <!--<link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Open+Sans:400italic,400,bold"/>-->
  <link href="/css/main.css" media="screen" rel="stylesheet"/>
  <link href="/css/print.css" media="print" rel="styles

In [47]:
results = soup.find_all('div', class_="item")
partial_href = results[0].find('a')['href']
partial_href

'/search/map/Mars/Viking/cerberus_enhanced'

In [48]:
browser.click_link_by_partial_href(partial_href)



ElementNotInteractableException: Message: element not interactable: element has zero size
  (Session info: chrome=84.0.4147.105)
