# Missions to Mars

In [107]:
# ==============================================================
# IMPORT RELEVANT MODULES AND SET UP FOR WEB SCRAPING
# ==============================================================

In [108]:
from splinter import Browser
from bs4 import BeautifulSoup
import time
import pandas as pd

In [109]:
executable_path = {'executable_path': 'chromedriver.exe'}

In [110]:
browser = Browser('chrome', **executable_path, headless=False)

# Web Scraping -- NASA Mars News

In [111]:
# ==============================================================
# BEGINNING OF WEB SCRAPING FOR NASA MARS NEWS
# ==============================================================

In [112]:
# Specify the url:

url = 'https://mars.nasa.gov/news/'

In [113]:
# Extract the HTML from the website:

browser.visit(url)
time.sleep(3)
html = browser.html

In [114]:
# Create a Beautiful Soup object:

soup_01 = BeautifulSoup(html, 'html.parser')

In [115]:
# Creae a function to enable saving to a text file to allow for easy review of the HTML:

def savetofile(filename,contents):
    file = open(filename,"w",encoding="utf-8")
    file.write(contents)
    file.close()

In [116]:
# Save the output of the Beautiful Soup object to a text file for analysis:

filename_nasa_mars_news_01 = 'Output/web_scraping_nasa_mars_news_01.txt'

savetofile(filename_nasa_mars_news_01,soup_01.prettify())

In [117]:
# Extract all relevant content: 

nasa_mars_news_contents = soup_01.find_all('li', class_='slide')

In [118]:
# Save the output to a text file for analysis:

filename_nasa_mars_news_02 = 'Output/web_scraping_nasa_mars_news_02.txt'

savetofile(filename_nasa_mars_news_02,nasa_mars_news_contents[0].prettify())

In [119]:
# Extract the title of the Latest Mars News:

nasa_mars_news_title = nasa_mars_news_contents[0].find('div', class_ = 'content_title')

In [120]:
# Extract just the text of the title:

news_title = nasa_mars_news_title.text.strip()

In [121]:
# Print the text of the title: 

print(news_title)

How NASA's Perseverance Mars Team Adjusted to Work in the Time of Coronavirus


In [122]:
# Extract the description paragraph of the Latest Mars News:

nasa_mars_news_paragraph = nasa_mars_news_contents[0].find('div', class_ = 'article_teaser_body')

In [123]:
# Extract just the text of the description paragraph:

news_p = nasa_mars_news_paragraph.text.strip()

In [124]:
# Print the text of the description paragraph:  

print(news_p)

Like much of the rest of the world, the Mars rover team is pushing forward with its mission-critical work while putting the health and safety of their colleagues and community first.


In [125]:
# ==============================================================
# END OF SCRAPING FOR NASA MARS NEWS
# ==============================================================

# Web Scraping -- JPL Mars Space Images

In [126]:
# ==============================================================
# BEGINNING OF WEB SCRAPING FOR JPL MARS SPACE IMAGES
# ==============================================================

In [127]:
# Specify the url:

url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

In [128]:
# Extract the HTML from the website:

browser.visit(url)
time.sleep(3)
html = browser.html

In [129]:
# Create a Beautiful Soup object:

soup_02 = BeautifulSoup(html, 'html.parser')

In [130]:
# Save the output of the Beautiful Soup object to a text file for analysis:

filename_jpl_mars_space_images_01 = 'Output/web_scraping_jpl_mars_space_images_01.txt'

savetofile(filename_jpl_mars_space_images_01,soup_02.prettify())

In [131]:
# Extract the "base url" portion of the entire url:

base_url = 'https://www.jpl.nasa.gov'

In [132]:
# Extract the "extension url" portion to be added to the base url:

add_on_url = soup_02.find('a',class_='button fancybox')['data-fancybox-href']

In [133]:
# Extract the featured image url as a combination of the "base url" and the "extension url":

featured_image_url = base_url + add_on_url

In [134]:
# Print the featured image url:

print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18048_ip.jpg


In [135]:
# ==============================================================
# END OF WEB SCRAPING FOR JPL MARS SPACE IMAGES
# ==============================================================

# Web Scraping -- Mars Weather



In [136]:
# ==============================================================
# BEGINNING OF WEB SCRAPING FOR MARS WEATHER
# ==============================================================

In [137]:
# Specify the url:

url = 'https://twitter.com/marswxreport?lang=en'

In [138]:
# Extract the HTMl from the website:

browser.visit(url)
time.sleep(3)
html = browser.html

In [139]:
# Create a Beautiful Soup object:

soup_03 = BeautifulSoup(html, 'html.parser')

In [140]:
# Save the output of the Beautiful Soup object to a text file for analysis:

filename_mars_weather_01 = 'Output/web_scraping_mars_weather_01.txt'

savetofile(filename_mars_weather_01,soup_03.prettify())

In [141]:
# Extract the relevant content:

mars_weather_tweets = soup_03.find_all('span', class_='css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0')

In [142]:
# Create a for loop to capture the tweets:

# A blank list to hold the tweets
tweets_list = []
# Loop through the tweets
for tweet in mars_weather_tweets:
    # If tweet has the word Sol in it ...
        if 'sol' in (tweet.text):
            # Append the tweet to the list
            tweets_list.append(tweet.text)


In [143]:
# Extract the latest tweet:

mars_weather = tweets_list[0]

In [144]:
# Print the latest tweet containing the Current Mars Weather:

print(mars_weather)

InSight sol 500 (2020-04-22) low -93.8ºC (-136.8ºF) high -3.5ºC (25.6ºF)
winds from the WNW at 4.5 m/s (10.1 mph) gusting to 20.4 m/s (45.6 mph)
pressure at 6.70 hPa


In [145]:
# ==============================================================
# END OF WEB SCRAPING FOR MARS WEATHER
# ==============================================================

# Web Scraping -- Mars Facts

In [146]:
# ==============================================================
# BEGINNING OF WEB SCRAPING FOR MARS FACTS
# ==============================================================

In [148]:
# Specify the url:

url = 'https://space-facts.com/mars/'

In [149]:
# Extract the Tables from the website:

tables = pd.read_html(url)


In [150]:
# Print the different tables to understand their contents:

for i in tables:
    print(i)
    print('=====================================================================')

                      0                              1
0  Equatorial Diameter:                       6,792 km
1       Polar Diameter:                       6,752 km
2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
3                Moons:            2 (Phobos & Deimos)
4       Orbit Distance:       227,943,824 km (1.38 AU)
5         Orbit Period:           687 days (1.9 years)
6  Surface Temperature:                   -87 to -5 °C
7         First Record:              2nd millennium BC
8          Recorded By:           Egyptian astronomers
  Mars - Earth Comparison             Mars            Earth
0               Diameter:         6,779 km        12,742 km
1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
2                  Moons:                2                1
3      Distance from Sun:   227,943,824 km   149,598,262 km
4         Length of Year:   687 Earth days      365.24 days
5            Temperature:    -153 to 20 °C      -88 to 58°C
                      0       

In [151]:
# Extract the relevant table:

df_mars = tables[0]

In [152]:
# Print the relevant table:

df_mars

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [153]:
# Rename the columns of the relevant table:

df_mars = df_mars.rename(columns={0:"Category", 1:"Value"})

In [154]:
# Print the relevant table:

df_mars

Unnamed: 0,Category,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [155]:
# Set the index of the relevant table to the column called "Category":

df_mars = df_mars.set_index("Category")

In [156]:
# Print the relevant table:

df_mars

Unnamed: 0_level_0,Value
Category,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [157]:
# Generate the HTML for the relevant table:

marsfacts_html = df_mars.to_html()

In [158]:
# Print the HTML:

print(marsfacts_html)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Value</th>
    </tr>
    <tr>
      <th>Category</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


In [159]:
# Save the HTML to an HTML file:

df_mars.to_html('Output/mars_table.html')

In [160]:
# ==============================================================
# END OF WEB SCRAPING FOR MARS FACTS
# ==============================================================

# Mars Hemispheres

In [161]:
# ==============================================================
# BEGINNING OF WEB SCRAPING FOR MARS HEMISPHERES
# ==============================================================

In [162]:
# Specify the url:

url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

In [163]:
# Extract the HTML from the wenbite:

browser.visit(url)
time.sleep(3)
html = browser.html

In [164]:
# Create a Beautiful Soup object:

soup_04 = BeautifulSoup(html, 'html.parser')

In [165]:
# Save the output of the Beautiful Soup object to a text file for analysis:

filename_mars_hemispheres_01 = 'Output/web_scraping_mars_hemispheres_01.txt'

savetofile(filename_mars_hemispheres_01,soup_04.prettify())

In [166]:
# Extract the relevant content:

mars_hemispheres = soup_04.find_all('div', class_='item')

In [167]:
# Print the first item in the list for review:

print(mars_hemispheres[0])

<div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/dfaf3849e74bf973b59eb50dab52b583_cerberus_enhanced.tif_thumb.png"/></a><div class="description"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><h3>Cerberus Hemisphere Enhanced</h3></a><span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/><p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p></div> <!-- end description --></div>


In [168]:
# Save the output to a text file for further analysis:

filename_mars_hemispheres_02 = 'Output/web_scraping_mars_hemispheres_02.txt'

savetofile(filename_mars_hemispheres_02,mars_hemispheres[0].prettify())

In [169]:
# Create lists to hold the urls and the hemisphere titles:

url_list = []
hemisphere_title_list = []

In [170]:
# Specify the "base url" of the website:

base_url = 'https://astrogeology.usgs.gov'

In [171]:
# Create a for loop to append to the 2 lists created above:

for x in mars_hemispheres:
    url_list.append(base_url + x.find('a')['href'])
    hemisphere_title_list.append(x.find('h3').text.strip())


In [172]:
# Print the results of the first list:

for i in url_list:
    print(i)

https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced
https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced
https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced
https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced


In [173]:
# Print the results of the second list:

for i in hemisphere_title_list:
    print(i)

Cerberus Hemisphere Enhanced
Schiaparelli Hemisphere Enhanced
Syrtis Major Hemisphere Enhanced
Valles Marineris Hemisphere Enhanced


In [174]:
# Create a new list to get only the relevant portion of the hemisphere title:

reduced_hemisphere_title_list = []

In [175]:
# Create a for loop to append to the new title list, the individual words from the items in the previous title list:

for x in hemisphere_title_list:
    y = x.split()
    reduced_hemisphere_title_list.append(y)

In [176]:
# Print the results of the new title list:

print(reduced_hemisphere_title_list)

[['Cerberus', 'Hemisphere', 'Enhanced'], ['Schiaparelli', 'Hemisphere', 'Enhanced'], ['Syrtis', 'Major', 'Hemisphere', 'Enhanced'], ['Valles', 'Marineris', 'Hemisphere', 'Enhanced']]


In [177]:
# Create a for loop to remove the unnecessary words from the new title list: 

for z in reduced_hemisphere_title_list:
    z.remove("Enhanced")
    

In [178]:
# Print the new title list:

print(reduced_hemisphere_title_list)

[['Cerberus', 'Hemisphere'], ['Schiaparelli', 'Hemisphere'], ['Syrtis', 'Major', 'Hemisphere'], ['Valles', 'Marineris', 'Hemisphere']]


In [179]:
# Print the lenght of each item in the new title list:

for z in reduced_hemisphere_title_list:
    print(len(z))

2
2
3
3


In [180]:
# Create the final title list:

final_hemisphere_title_list = []

In [181]:
# Ensure that the final title list is empty:

for z in final_hemisphere_title_list:
    del z

In [182]:
# Print the final title list:

print(final_hemisphere_title_list)

[]


In [183]:
# Create the titles and append to the final title list:

for z in reduced_hemisphere_title_list:
    if len(z) == 2:
        z = z[0] + " " +z[1] 
    if len(z) == 3:
        z = z[0] + " " +z[1] + " " +z[2] 
    final_hemisphere_title_list.append(z)

In [184]:
# Print the final title list:

print(final_hemisphere_title_list)

['Cerberus Hemisphere', 'Schiaparelli Hemisphere', 'Syrtis Major Hemisphere', 'Valles Marineris Hemisphere']


In [185]:
# Create a new image url list to hold the full-size images of the different hemispheres:

image_url_list =[]

In [186]:
# Create a for loop to append to the new image url list:

for x in url_list:
    browser.visit(x)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    x = base_url+soup.find('img',class_='wide-image')['src']
    image_url_list.append(x)

In [187]:
# Print the new image url list:

for i in image_url_list:
    print(i)

https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg


In [188]:
# Create the final list to hold both, the hemsipheres titles, and the hemisphere image urls with the full-size images:

hemisphere_image_urls  =[]

In [189]:
# Ensure that the final list is empty:

for i in hemisphere_image_urls:
    del i

In [190]:
# Print the final list:

print(hemisphere_image_urls)

[]


In [191]:
# Create a for loop to populate the final list:

for x in range(len(final_hemisphere_title_list)):
    hemisphere_image_urls.append({'title':final_hemisphere_title_list[x],'img_url':image_url_list[x]})



In [192]:
# Print the contents of the final list that holds both:
# the hemsipheres titles, and the hemisphere image urls with the full-size images:

for x in hemisphere_image_urls:
    print(x)

{'title': 'Cerberus Hemisphere', 'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}
{'title': 'Schiaparelli Hemisphere', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}
{'title': 'Syrtis Major Hemisphere', 'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}
{'title': 'Valles Marineris Hemisphere', 'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}


In [None]:
# ==============================================================
# END OF WEB SCRAPING FOR MARS HEMISPHERES
# ==============================================================