In [1]:
%reload_ext black
from bs4 import BeautifulSoup
import pandas as pd
import pprint
import requests
import datetime
#import selenium

In [2]:
news_url = 'https://mars.nasa.gov/news/'
jpl_root = 'https://www.jpl.nasa.gov'
img_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
twiter_url = 'https://twitter.com/marswxreport?lang=en'
fact_url = 'https://space-facts.com/mars/'
hemisphere_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Latest Mars Articles

In [5]:
#open and parse the page
news_soup = BeautifulSoup(requests.get(news_url).text, 'html.parser')

#zip title and description items for iterability
zipped = zip(
    news_soup.find_all('div', class_='content_title'),
    news_soup.find_all('div', class_='image_and_description_container')
            )

#loop though zip object and extract data
articles = []
for div in zipped:
    info_dict = {
    'article_title' : div[0].a.text.strip(),
    'description_text' : div[1].text.strip(),
    'article_link' : div[1].a['href'].strip(),
    }
    articles.append(info_dict)

##grab the latest article
latest_article_title = articles[0]['article_title']
latest_article_desc = articles[0]['description_text']

print(latest_article_title)
print(latest_article_desc)

Virginia Middle School Student Earns Honor of Naming NASA's Next Mars Rover
NASA chose a seventh-grader from Virginia as winner of the agency's "Name the Rover" essay contest. Alexander Mather's entry for "Perseverance" was voted tops among 28,000 entries.


# Mars Images from JPL

In [4]:
#open and parse the page
img_soup = BeautifulSoup(requests.get(img_url).text, 'html.parser')

##start a list of featured images
images = [] 

##get current featured image
img_dict = {'feat_img_title' : (img_soup.find('div', class_='carousel_items').div.h1.text.strip()),
           'feat_img_url' : jpl_root + str(img_soup.find('div', class_='carousel_items').div.footer.a.attrs['data-fancybox-href'])}

##if its a new image, add to image list
if img_dict not in images:
    images.append(img_dict)

##display latest image title and URL
print(images[0]['feat_img_title'])
print(images[0]['feat_img_url'])

The Rose
https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA14944_ip.jpg


# Mars Weather

In [5]:
twit_soup = BeautifulSoup(requests.get(twiter_url).text, 'html.parser')

## the first tag contains the tweet and the 
## second tag contains the date it was posted
tw_zip = zip(
    twit_soup.find_all('div', class_="js-tweet-text-container"), 
    twit_soup.find_all('span', class_="_timestamp js-short-timestamp")
            )

weather = []

##extract the data and add to weather if not already there
for tweet in tw_zip:    
    info_dict = {'tweet_text' : tweet[0].p.text,
                'date' : datetime.date.fromtimestamp(int(tweet[1].attrs['data-time']))}
    if info_dict['tweet_text'].startswith('InSight') and info_dict not in weather:
        weather.append(info_dict)

#grab latest tweet 
latest_weather_tweet = weather[0]['tweet_text']
latest_weather_date = weather[0]['date']

print(f"Here is some weather data from Mars taken approximately at {latest_weather_date} \n\n" +
      f"{latest_weather_tweet}")

Here is some weather data from Mars taken approximately at 2020-03-08 

InSight sol 457 (2020-03-10) low -95.7ºC (-140.3ºF) high -9.1ºC (15.6ºF)
winds from the SSE at 6.5 m/s (14.5 mph) gusting to 21.0 m/s (46.9 mph)
pressure at 6.30 hPapic.twitter.com/2h0LKoSQrJ


In [9]:
fact_soup = BeautifulSoup(requests.get(fact_url).text, 'html.parser')

#:0 wow, extract tables with pandas
facts_df = pd.DataFrame(pd.read_html(str(fact_soup))[0]).set_index(0)


facts_df

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


# Mars Hemispheres

In [7]:
hemi_soup = BeautifulSoup(requests.get(hemisphere_url).text, 'html.parser')

##this the root of the URL when it gets redirected to full image
img_astrogeology_root = 'https://astropedia.astrogeology.usgs.gov'

##found a way to get full img URLs without selenium because I was having
## trouble with my virtualenv
##would like to try with selenium
hemispheres = []
for hemi in hemi_soup.find_all('div', class_='item'):
    info_dict = {
        'title' : hemi.h3.text,
        'full_img_url' : str(img_astrogeology_root + 
                       hemi.a['href'] + 
                       '.tif/full.jpg').replace('search/map', 'download')
    }
    hemispheres.append(info_dict)

    
pprint.pprint(hemispheres)

[{'full_img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'full_img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'full_img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'full_img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]


In [22]:

news_soup = BeautifulSoup(requests.get(news_url).text, 'html.parser')

#zip title and description items for iterability
zipped = zip(
    news_soup.find('div', class_='content_title'),
    news_soup.find('div', class_='image_and_description_container')
        )

print(zipped)
    
info_dict = {
'article_title' : zipped[1].a.text.strip(),
'description_text' : zipped[1].text.strip(),
'article_link' : zipped[1].a['href'].strip(),
}



<zip object at 0x000002693B0E4048>


AttributeError: 'NavigableString' object has no attribute 'a'