In [1]:
from splinter import Browser
import requests
import pymongo
from bs4 import BeautifulSoup as bs

In [2]:
def init_browser():
    executable_path = {"executable_path": "chromedriver"}
    return Browser("chrome", **executable_path, headless=True)

In [56]:
scrape_dict = {}

In [57]:
def scrape_info():
    browser = init_browser()
    url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
    browser.visit(url)

     # Scrape page into Soup
    html = browser.html
    soup = bs(html, "html.parser")

    # Get the first div 
    news = soup.find("div", class_="list_text")

    # Get the headline
    title = news.find("a").text

    # Get the body
    body = news.find("div", class_="article_teaser_body").text
    
    scrape_dict["latest_headline"]=title
    scrape_dict["latest_body"]=body
    
    browser.quit()

    # Return results
    return scrape_dict

In [58]:
scrape_info()

{'latest_headline': 'InSight Is the Newest Mars Weather Service',
 'latest_body': "By collecting data around the clock, NASA's lander will provide unique science about the Martian surface."}

In [59]:
def scrape_img():
    browser = init_browser()
    url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(url)

     # Scrape page into Soup
    html = browser.html
    soup = bs(html, "html.parser")
    
    results = soup.find("footer")

    # Get the first div 
    relative_path = results.find("a")["data-fancybox-href"]

    # Get the img
    featured_image_url = "https://www.jpl.nasa.gov" + relative_path
    
    scrape_dict["feat_img_url"]= featured_image_url
    
   
    browser.quit()

    # Return results
    return scrape_dict

In [60]:
scrape_img()

{'latest_headline': 'InSight Is the Newest Mars Weather Service',
 'latest_body': "By collecting data around the clock, NASA's lander will provide unique science about the Martian surface.",
 'feat_img_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18640_ip.jpg'}

In [61]:
def scrape_weather():
    browser = init_browser()
    url = "https://twitter.com/marswxreport?lang=en"
    browser.visit(url)

     # Scrape page into Soup
    html = browser.html
    soup = bs(html, "html.parser")
    
    weather = soup.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
    
    scrape_dict["weather_tweet"]= weather
    
   
    browser.quit()

    # Return results
    return scrape_dict

In [62]:
scrape_weather()

{'latest_headline': 'InSight Is the Newest Mars Weather Service',
 'latest_body': "By collecting data around the clock, NASA's lander will provide unique science about the Martian surface.",
 'feat_img_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18640_ip.jpg',
 'weather_tweet': 'InSight sol 84 (2019-02-20) low -95.1ºC (-139.2ºF) high -13.2ºC (8.3ºF)\nwinds from the SW at 4.1 m/s (9.3 mph) gusting to 10.8 m/s (24.2 mph)pic.twitter.com/WlR4gr8gpC'}

In [63]:
import pandas as pd

In [64]:
url = "https://space-facts.com/mars/"


In [65]:
tables = pd.read_html(url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [66]:
df= tables[0]
df.columns = ["description", "value"]
df.head()

Unnamed: 0,description,value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"


In [67]:
df.set_index("description", inplace = True)
df.head()

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"


In [68]:
df.to_html(classes="table table-striped")

'<table border="1" class="dataframe table table-striped">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>value</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astrono

In [116]:
def scrape_images():
    browser = init_browser()
    url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(url)


        
    html = browser.html
    soup = bs(html, "html.parser")
    hemisphere_image_urls = []
    
    results = soup.find_all("div", class_="description")
    for result in results:
        title = result.find("a").text
        href = result.find("a")["href"]
        browser.quit()
        browser = init_browser()
        browser.visit("https://astrogeology.usgs.gov"+href)
        html = browser.html
        soup_2 = bs(html, "html.parser")
        results_2 = soup_2.find_all("div", class_="downloads")
        for results in results_2:
            image_url = results.find("li")
            url = image_url.find("a")["href"]
            hemisphere_image_urls.append({"title":title, "image_url":url})
                
    browser.quit()    
             
    # Return results
    return print(hemisphere_image_urls)

In [117]:
scrape_images()

[{'title': 'Cerberus Hemisphere Enhanced', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'image_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


In [18]:
def scrape():
       
    return print()

In [19]:
scrape()