In [1]:
# Import libraries
import pandas as pd
from bs4 import BeautifulSoup as bs
from splinter import Browser
import requests
import time

In [2]:
# function to visit links
# Parameters url: url to visit
#            seconds: seconds to wait before scraping
#            browser: instance of the browser
# Returns: browser.html
def visit_url(url,seconds,browser):
    browser.visit(url)
    time.sleep(seconds)
    return browser.html

In [3]:
# Set up the chromedriver and browser
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# Call visit_url to get html for the page
soup = bs(visit_url("https://mars.nasa.gov/news/",5,browser), 'html.parser')

# Find the Title and Description from the html and print it
results = soup.find('li', class_='slide')
news_title = results.find('h3').text
news_p = results.find('div',class_="article_teaser_body").text
print(f"Title: {news_title} \nDescription: {news_p}")

Title: Air Deliveries Bring NASA's Perseverance Mars Rover Closer to Launch 
Description: A NASA Wallops Flight Facility cargo plane transported more than two tons of equipment — including the rover's sample collection tubes — to Florida for this summer's liftoff.


In [4]:
# Call visit_url to get html for the page
soup = bs(visit_url("https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars",0,browser), 'html.parser')

# Click on the 'Full Image' and 'more info' to get to the correct page
browser.click_link_by_id('full_image')
browser.click_link_by_partial_text('more info')

# Get the html for the page
soup = bs(browser.html, 'html.parser')

# Find the url for the image, split the browser url on '/spaceimages' and prepend the first part to the image url
image_url = soup.find("img", class_="main_image")
featured_image_url = browser.url.split("/spaceimages")[0] + image_url['src']
featured_image_url



'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17357_hires.jpg'

In [5]:
# Call visit_url to get html for the page
soup = bs(visit_url("https://twitter.com/marswxreport?lang=en",20,browser), 'html.parser')

# find all 'article' tags in the page
articles = soup.find_all('article')

# Set variables to do determine if a report was found, due to lengthy loading times sometimes the weather report is not found 
latest_report_found = False
i = 0
mars_weather = "No Report Available --- Try again shortly "

# Loop until a report is found by filtering 'InSight sol' or until there are no more articles
while latest_report_found == False and i < len(articles):
    spans = articles[i].find_all('span')
    if spans[4].text[0:11] == "InSight sol":
        latest_report_found = True
        mars_weather = spans[4].text
    i += 1
    
mars_weather

'InSight sol 526 (2020-05-20) low -94.0ºC (-137.2ºF) high -1.8ºC (28.8ºF)\nwinds from the SW at 5.2 m/s (11.7 mph) gusting to 19.6 m/s (43.9 mph)\npressure at 7.10 hPa'

In [6]:
# Read html tables into a dataframe
tables = pd.read_html("https://space-facts.com/mars/")

# Select the first table and set the columns
df = tables[0]
df.columns = ['Category', 'Value']

# Convert the dataframe into an html table, do not include an index
html_table = df.to_html(index=False)
print(html_table)

# Remove new lines from the html
html_table = html_table.replace('\n', '')


<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>Category</th>
      <th>Value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <td>Recorded By:</td>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


In [7]:
# Call visit_url to get html for the page
soup = bs(visit_url("https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars",0,browser), 'html.parser')

# Find all 'div' tags with the class 'description'
items = soup.find_all('div',class_='description')

#loop thru the links and add dictionary to list
hemisphere_image_urls = []
for item in items:
    # Find an 'a' tag in current 'div' and set the title to the link text
    link = item.find("a")
    title = link.text[0:len(link.text)-9]
    # Click the link
    browser.click_link_by_partial_text(link.text)
    # Get the html from the page
    soup = bs(browser.html,'html.parser')
    # Find the Sample link and get the url
    image = browser.find_by_text('Sample')
    image_url = image['href']
    # Add the title and url as a dictionary to the list
    hemisphere_image_urls.append({"title": title, "img_url": image_url})
    # Return to the previous page
    browser.back()
hemisphere_image_urls



[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [8]:
# Close the browser
browser.quit()