In [1]:
import pandas as pd
from bs4 import BeautifulSoup
from splinter import Browser
from time import sleep

In [2]:
executable_path = {"executable_path": "chromedriver.exe"}
browser = Browser("chrome", **executable_path)

In [3]:
# bring in URL from gitlab instructions
url = "https://mars.nasa.gov/news/"
# test browser connection to url
browser.visit(url)

In [4]:
html = browser.html
news_soup = BeautifulSoup(html, 'html.parser')

In [5]:
# Use the inspect feature on your chrome page to determine the class type of the first title in the webpage
latest_title = news_soup.find("div", class_="content_title").text
latest_title

'NASA Updates Mars 2020 Mission Environmental Review'

In [6]:
# Do the same for the teaser paragraph
first_teaser_paragraph = news_soup.find("div", class_="article_teaser_body").text
first_teaser_paragraph

'NASA and the Department of Energy have completed a more detailed risk analysis for the Mars 2020 rover launch from Florida.'

In [7]:
# Find featured image on the JPL website provided
jpg_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpg_url)

In [8]:
# Get fullsize image by telling splinter to click the "Full Size" button on the main page.
# Documentation at: https://splinter.readthedocs.io/en/latest/elements-in-the-page.html
browser.click_link_by_partial_text("FULL IMAGE")

In [9]:
# Do the same but now click the 'more info' button to go to the full screen page of the image.
browser.click_link_by_partial_text("more info")

In [10]:
jpg_html = browser.html
jpg_soup = BeautifulSoup(jpg_html, 'html.parser')


In [11]:
# save the featured image url
jpg_url = jpg_soup.find("img", class_="main_image")["src"]
# append the image url to the main url to get the full url
featured_image_url = f"https://www.jpl.nasa.gov{jpg_url}"
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19046_hires.jpg'

In [12]:
# Get the mars weather twitter url established
twitter_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(twitter_url)

In [13]:
twitter_html = browser.html
twitter_soup = BeautifulSoup(twitter_html, "html.parser")

In [14]:
recent_tweet = twitter_soup.find("p", class_="TweetTextSize").text
recent_tweet.splitlines()
# Shoutout Guido for teaching me the splitlines() function

['InSight sol 350 (2019-11-20) low -103.0ºC (-153.4ºF) high -23.2ºC (-9.8ºF)',
 'winds from the SSE at 5.0 m/s (11.2 mph) gusting to 20.7 m/s (46.4 mph)',
 'pressure at 6.80 hPapic.twitter.com/OXkl0lJczQ']

In [15]:
facts_url = "https://space-facts.com/mars/"
browser.visit(facts_url)

In [16]:
# Pandas documentation shows that they have a built in feature that automatically converts html tables to dataframes
# https://pandas.pydata.org/pandas-docs/version/0.23.4/generated/pandas.read_html.html
facts_table = pd.read_html(facts_url)
facts_df = facts_table[0]
facts_df.columns = ['Column1','Column2']
facts_df

Unnamed: 0,Column1,Column2
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [17]:
facts_df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Column1</th>\n      <th>Column2</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium BC

In [18]:
hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemisphere_url)

In [19]:
hemisphere_html = browser.html
hemisphere_soup = BeautifulSoup(hemisphere_html, "html.parser")

In [20]:
# names = hemisphere_soup.find_all("h3")

# hemisphere_links = []

# for hemispheres in names:
#     hemisphere_dict = {}
#     hemisphere_dict["Hemisphere Name"] = hemisphere_soup.find("h3").text
#     browser.click_link_by_partial_text("Hemisphere")
#     browser.click_link_by_partial_text("Sample")
#     hemisphere_dict["Image Link"] = hemisphere_soup.find("img")["src"]
#     hemisphere_links.append(hemisphere_dict)
# print(hemisphere_links)

## Couldn't find a way to get the loop to perform all the functions at once.

In [21]:
# # create an empty list to collect all the hemisphere names scraped from the website
# hemisphere_names = []
# # Hemisphere names are designated as 'h3's in the source code. Call all of them using find_all
# names = hemisphere_soup.find_all("h3")
# # Loop through all the 'h3' elements and append them to the empty list created. 
# for hemispheres in names:
#     hemisphere_names.append(hemispheres.text)
#     # use browser like you did here:
#     # hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
#     # browser.visit(hemisphere_url)
#     # to go to the URL for the given picture, and then use bs4 to get the link to the jpg/gif
    
    
# hemisphere_names

In [22]:
# image_links = []

# images = hemisphere_soup.find_all("h3")


# # for pics in images:
# #     browser.click_link_by_partial_text("Hemisphere")
# #     browser.click_link_by_partial_text("Sample")
# #     temp = browser.html
# #     temp_soup = BeautifulSoup(temp, "html.parser")
# #     temp_url = temp_soup.find("a target", "href")
# #     image_links.append(temp_url)

# for i in range(1,5):
#     browser.find_by_xpath('//div['+ str(i) +']/div/a/h3/..').click()
#     sleep(2)
#     browser.back()
#     print("next step")

In [23]:
import requests

In [24]:
root_url = 'https://astrogeology.usgs.gov/'

In [25]:
page = requests.get(root_url + "search/results?q=hemisphere+enhanced&k1=target&v1=Mars")

In [26]:
hemisphere_soup = BeautifulSoup(page.text, "html.parser")

In [27]:
divs = hemisphere_soup.findAll('div', attrs={"class": "item"})

In [28]:
hemisphere_dict = {}

for d in divs:
    name = d.find('h3').text
    link_bs = BeautifulSoup(requests.get(root_url + d.find('a').attrs['href']).text, 'html.parser')
    url = link_bs.find('div', attrs={'class': 'downloads'}).find('a').attrs['href']
    hemisphere_dict[name] = url

In [29]:
hemisphere_dict

{'Cerberus Hemisphere Enhanced': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
 'Schiaparelli Hemisphere Enhanced': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
 'Syrtis Major Hemisphere Enhanced': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
 'Valles Marineris Hemisphere Enhanced': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}