## Scraping

In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import time
import pandas as pd
import pprint

### NASA Mars News

In [2]:
browser = Browser("chrome")
news_url = "https://mars.nasa.gov/news"

browser.visit(news_url)

time.sleep(2)

html = browser.html 
soup = bs(html, "html.parser")

In [3]:
# Collect the latest News Title and Paragraph Text.
news_title = soup.find('div', class_ = 'content_title').find('a').text
print(news_title)
print("\n")

news_p = soup.find('div', class_ = 'article_teaser_body').text
print(news_p)

browser.quit()

InSight Captures Sunrise and Sunset on Mars


InSight joins the rest of NASA's Red Planet surface missions, all of which have photographed either the start or end of a Martian day.


### JPL Mars Space Images - Featured Image

In [42]:
browser = Browser("chrome")
img_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

browser.visit(img_url)

time.sleep(2)

html = browser.html 
soup = bs(html, "html.parser")

In [43]:
#  Find the image url for the current Featured Mars Image
img_str = soup.find('div', class_="carousel_items").find('article')['style']
print(img_str)

background-image: url('/spaceimages/images/wallpaper/PIA15254-1920x1200.jpg');


In [44]:
img_url[0:36]

'https://www.jpl.nasa.gov/spaceimages'

In [45]:
img_str[35:-3] 

'/images/wallpaper/PIA15254-1920x1200.jpg'

In [46]:
featured_image_url = img_url[0:36] + img_str[35:-3]

print(featured_image_url)

browser.quit()

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA15254-1920x1200.jpg


### Mars Weather

In [9]:
browser = Browser("chrome")
tweet_url = "https://twitter.com/marswxreport?lang=en"

browser.visit(tweet_url)

time.sleep(2)

html = browser.html 
soup = bs(html, "lxml")

In [10]:
# Scrape the latest Mars weather tweet from the page.

tweet_qry = soup.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").children

for i in tweet_qry:
    mars_weather = i
    break

print(mars_weather)

browser.quit()

# Alternate method 1
# mars_weather = soup.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").get_text()
# print(mars_weather)

# Alternate method 2
# mars_weather  = soup.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
# print(mars_weather)

# Alternate method 3
# mars_weather  = soup.find('p',{'data-aria-label-part':'0'}).get_text()
# print(mars_weather)

InSight sol 155 (2019-05-04) low -99.3ºC (-146.8ºF) high -18.8ºC (-1.8ºF)
winds from the SW at 4.5 m/s (10.1 mph) gusting to 14.3 m/s (31.9 mph)
pressure at 7.40 hPa


### Mars Facts

In [11]:
browser = Browser("chrome")
facts_url = "https://space-facts.com/mars/"

browser.visit(facts_url)

time.sleep(2)

html = browser.html 
soup = bs(html, "lxml")

In [12]:
facts_table = soup.find('tbody')
print(facts_table)

<tbody>
<tr class="row-1 odd">
<td class="column-1"><strong>Equatorial Diameter:</strong></td><td class="column-2">6,792 km<br/>
</td>
</tr>
<tr class="row-2 even">
<td class="column-1"><strong>Polar Diameter:</strong></td><td class="column-2">6,752 km<br/>
</td>
</tr>
<tr class="row-3 odd">
<td class="column-1"><strong>Mass:</strong></td><td class="column-2">6.42 x 10^23 kg (10.7% Earth)</td>
</tr>
<tr class="row-4 even">
<td class="column-1"><strong>Moons:</strong></td><td class="column-2">2 (<a href="https://space-facts.com/phobos/">Phobos</a> &amp; <a href="https://space-facts.com/deimos/">Deimos</a>)</td>
</tr>
<tr class="row-5 odd">
<td class="column-1"><strong>Orbit Distance:</strong></td><td class="column-2">227,943,824 km (1.52 AU)</td>
</tr>
<tr class="row-6 even">
<td class="column-1"><strong>Orbit Period:</strong></td><td class="column-2">687 days (1.9 years)<br/>
</td>
</tr>
<tr class="row-7 odd">
<td class="column-1"><strong>Surface Temperature: </strong></td><td class="c

In [13]:
column1_lst = []
column2_lst = []

for i in facts_table.find_all('td', class_ = "column-1"):
    column1_lst.append(i.text[0:-1]) # using 0:-1 to omit the ":" that's at the end of each row element

for i in facts_table.find_all('td', class_ = "column-2"):
    column2_lst.append(i.text)

In [14]:
# for i in column1_lst:
#     print(i)
#     print("---")

# for i in column2_lst:
#     print(i)
#     print("---")

In [15]:
facts_df = pd.DataFrame(
    {'Metric': column1_lst,
     'Data': column2_lst
    })

facts_df

Unnamed: 0,Metric,Data
0,Equatorial Diameter,"6,792 km\n"
1,Polar Diameter,"6,752 km\n"
2,Mass,6.42 x 10^23 kg (10.7% Earth)
3,Moons,2 (Phobos & Deimos)
4,Orbit Distance,"227,943,824 km (1.52 AU)"
5,Orbit Period,687 days (1.9 years)\n
6,Surface Temperature:,-153 to 20 °C
7,First Record,2nd millennium BC
8,Recorded By,Egyptian astronomers


In [16]:
# Alternate method using zip function
# facts_df = pd.DataFrame(list(zip(column1_lst, column2_lst)),columns=['Metric','Data'])
# facts_df

In [17]:
# mars_facts = facts_df.to_html
# print(mars_facts)

mars_facts = facts_df.to_dict(orient="records")

In [18]:
mars_facts

[{'Metric': 'Equatorial Diameter', 'Data': '6,792 km\n'},
 {'Metric': 'Polar Diameter', 'Data': '6,752 km\n'},
 {'Metric': 'Mass', 'Data': '6.42 x 10^23 kg (10.7% Earth)'},
 {'Metric': 'Moons', 'Data': '2 (Phobos & Deimos)'},
 {'Metric': 'Orbit Distance', 'Data': '227,943,824 km (1.52 AU)'},
 {'Metric': 'Orbit Period', 'Data': '687 days (1.9 years)\n'},
 {'Metric': 'Surface Temperature:', 'Data': '-153 to 20 °C'},
 {'Metric': 'First Record', 'Data': '2nd millennium BC'},
 {'Metric': 'Recorded By', 'Data': 'Egyptian astronomers'}]

In [19]:
# for i in range(len(mars_facts)):
#     print(i)
#     print(mars_facts[i]['Metric'])
#     print(mars_facts[i]['Data'])

In [20]:
type(mars_facts)

list

In [21]:
browser.quit()


### Mars Hemispheres

In [22]:
usgs_url = "https://astrogeology.usgs.gov"

#### Hemisphere1: Cerberus Hemisphere Enhanced

In [23]:
browser = Browser("chrome")
cerebrus_url = "https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced"

browser.visit(cerebrus_url)

time.sleep(2)

html = browser.html 
soup = bs(html, "lxml")

In [24]:
cerebrus_title = soup.find('h2', class_ = "title").text
cerebrus_title = cerebrus_title.rsplit(' ',1)[0]
print(cerebrus_title)

Cerberus Hemisphere


In [25]:
cerebrus_src = soup.find('img', class_ = "wide-image")['src']
cerebrus_img_url = usgs_url + cerebrus_src
print(cerebrus_img_url)

https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg


#### Hemisphere2: Schiaparelli Hemisphere Enhanced

In [26]:
browser = Browser("chrome")
schiaparelli_url = "https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced"

browser.visit(schiaparelli_url)

time.sleep(2)

html = browser.html 
soup = bs(html, "lxml")

In [27]:
schiaparelli_title = soup.find('h2', class_ = "title").text
schiaparelli_title = schiaparelli_title.rsplit(' ',1)[0]
print(schiaparelli_title)

Schiaparelli Hemisphere


In [28]:
schiaparelli_src = soup.find('img', class_ = "wide-image")['src']
schiaparelli_img_url = usgs_url + schiaparelli_src
print(schiaparelli_img_url)

https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg


#### Hemisphere3: Syrtis Major Hemisphere Enhanced

In [29]:
browser = Browser("chrome")
syrtis_url = "https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced"

browser.visit(syrtis_url)

time.sleep(2)

html = browser.html 
soup = bs(html, "lxml")

In [30]:
syrtis_title = soup.find('h2', class_ = "title").text
syrtis_title = syrtis_title.rsplit(' ',1)[0]
print(syrtis_title)

Syrtis Major Hemisphere


In [31]:
syrtis_src = soup.find('img', class_ = "wide-image")['src']
syrtis_img_url = usgs_url + syrtis_src
print(syrtis_img_url)

https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg


#### Hemisphere4: Valles Marineris Hemisphere Enhanced

In [32]:
browser = Browser("chrome")
valles_url = "https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced"

browser.visit(valles_url)

time.sleep(2)

html = browser.html 
soup = bs(html, "lxml")

In [33]:
valles_title = soup.find('h2', class_ = "title").text
valles_title = valles_title.rsplit(' ',1)[0]
print(valles_title)

Valles Marineris Hemisphere


In [34]:
valles_src = soup.find('img', class_ = "wide-image")['src']
valles_img_url = usgs_url + valles_src
print(valles_img_url)

https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg


In [35]:
hemisphere_image_urls = {}

hemisphere_image_urls = [
    {"title": cerebrus_title, "img_url": cerebrus_img_url},
    {"title": schiaparelli_title, "img_url": schiaparelli_img_url},
    {"title": syrtis_title, "img_url": syrtis_img_url},
    {"title": valles_title, "img_url": valles_img_url},
]

In [36]:
pprint.pprint(hemisphere_image_urls)

browser.quit()

[{'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
  'title': 'Cerberus Hemisphere'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
  'title': 'Schiaparelli Hemisphere'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
  'title': 'Syrtis Major Hemisphere'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
  'title': 'Valles Marineris Hemisphere'}]


### Summary Dictionary

In [37]:
summary_dict ={}

summary_dict = {
    "News_Title": news_title,
    "News_Teaser": news_p,
    "Featured_Image": featured_image_url,
    "Mars_Weather": mars_weather,
    "Mars_Facts": mars_facts,
    "Mars_Hemispheres": hemisphere_image_urls
    }

In [38]:
summary_dict

{'News_Title': 'InSight Captures Sunrise and Sunset on Mars',
 'News_Teaser': "InSight joins the rest of NASA's Red Planet surface missions, all of which have photographed either the start or end of a Martian day.",
 'Featured_Image': 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA17896-1920x1200.jpg',
 'Mars_Weather': 'InSight sol 155 (2019-05-04) low -99.3ºC (-146.8ºF) high -18.8ºC (-1.8ºF)\nwinds from the SW at 4.5 m/s (10.1 mph) gusting to 14.3 m/s (31.9 mph)\npressure at 7.40 hPa',
 'Mars_Facts': [{'Metric': 'Equatorial Diameter', 'Data': '6,792 km\n'},
  {'Metric': 'Polar Diameter', 'Data': '6,752 km\n'},
  {'Metric': 'Mass', 'Data': '6.42 x 10^23 kg (10.7% Earth)'},
  {'Metric': 'Moons', 'Data': '2 (Phobos & Deimos)'},
  {'Metric': 'Orbit Distance', 'Data': '227,943,824 km (1.52 AU)'},
  {'Metric': 'Orbit Period', 'Data': '687 days (1.9 years)\n'},
  {'Metric': 'Surface Temperature:', 'Data': '-153 to 20 °C'},
  {'Metric': 'First Record', 'Data': '2nd millennium BC'},

In [39]:
summary_dict['Mars_Facts'][0]

{'Metric': 'Equatorial Diameter', 'Data': '6,792 km\n'}

In [40]:
summary_dict['Mars_Facts'][0]['Metric']

'Equatorial Diameter'

In [41]:
summary_dict['Mars_Facts'][0]['Data']

'6,792 km\n'