## Import Dependencies

In [235]:
import pandas as pd
import requests
import pymongo
import os
import tweepy
import json
import numpy as np

from config import consumer_key, consumer_secret, access_token, access_token_secret
from datetime import datetime
from splinter import Browser
from bs4 import BeautifulSoup as bs
from pprint import pprint

# Setup Tweepy Authentication 
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

## Step 1 - Scraping

In [236]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [237]:
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [238]:
db = client.space_db
collection = db.articles

## NASA Mars News

In [239]:
url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
browser.visit(url)

In [240]:
# Retrieve page with the requests module
response = requests.get(url)

# Create BeautifulSoup object; parse with 'lxml'
soup = bs(response.text, 'lxml')

In [241]:
results = soup.find_all("div", class_="list_text")

In [245]:
for x in range(1, 6):

    html = browser.html
    soup = bs(html, 'html.parser')
    
    posts = soup.find_all("div", class_="list_text")
    
    title = []
    para = []
    
    for post in posts:
        title.append(post.a.text)
        para.append(post.find("div", class_="article_teaser_body").text)
        
        article ={
            "news_title": title,
            "news_paragraph": para
        }

In [246]:
news_title = title[0]
news_p = para[0]

In [10]:
for result in results:
    title = result.a.text
    para = result.find("div", class_="article_teaser_body").text
    
    article = {
        "news_title": title,
        "news_paragraph": para
    }
    
    print("---------------------------------")
    print(f'{title}')
    print(f'{para}')

## Finding the Images URL

In [249]:
url2 = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url2)

In [250]:
# Retrieve page with the requests module
response_image = requests.get(url2)

# Create BeautifulSoup object; parse with 'lxml'
soup_image = bs(response_image.text, 'lxml')

In [251]:
browser.click_link_by_partial_text('FULL IMAGE')

In [253]:
browser.click_link_by_partial_text('more info')

In [254]:
# URL varies depending on what the web browser opens
url_image = "https://www.jpl.nasa.gov/spaceimages/details.php?id=PIA18185"

In [255]:
# Retrieve page with the requests module
response_image1 = requests.get(url_image)

# Create BeautifulSoup object; parse with 'lxml'
soup_image1 = bs(response_image1.text, 'lxml')

In [256]:
results_image = soup_image1.find_all("figure", class_="lede")

In [257]:
# Scarping the image url
image_url = []
for image in results_image:
    image_url.append(image.a["href"])

In [258]:
image_url

['/spaceimages/images/largesize/PIA18185_hires.jpg']

In [259]:
# Adding the url into images
for url in image_url:
    featured_image_url= "https://www.jpl.nasa.gov"+url
    print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18185_hires.jpg


## Mars Weather

In [260]:
target_user = "@MarsWxReport"

In [261]:
public_tweets = api.user_timeline(target_user)

In [262]:
weather_tweet = []

for tweet in public_tweets:
    weather_tweet.append(tweet["text"])

In [263]:
mars_weather = weather_tweet[0]
print(mars_weather)

Sol 2230 (2018-11-14), high -5C/23F, low -72C/-97F, pressure at 8.59 hPa, daylight 06:22-18:39


## Mars Facts

In [264]:
url_facts = "https://space-facts.com/mars/"

In [265]:
tables = pd.read_html(url_facts)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [266]:
df = tables[0]

In [267]:
df_clean = df.set_index(0)
df_clean.index.name=None

In [268]:
df_clean = df_clean.rename(columns={1: "Values"})
df_clean

Unnamed: 0,Values
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [269]:
html_table = df_clean.to_html()

In [270]:
html_table.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Values</th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [271]:
df_clean.to_html('mars_facts.html')

## Mars Hemisphere

In [281]:
url_cerberus = "https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced"
url_schiaparelli = "https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced"
url_syrtis = "https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced"
url_valles = "https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced"

In [324]:
url_hemisphere = [url_cerberus, url_schiaparelli, url_syrtis, url_valles]

for url in url_hemisphere:
    response_hemisphere = requests.get(url)
    soup_hemisphere = bs(response_hemisphere.text, 'lxml')
    
    results_hemisphere = soup_hemisphere.find_all("div", class_="container")
    
    for result in results_hemisphere:
        title = result.h2.text
        img_url = result.find("img", class_="wide-image")["src"]
        
        hemisphere_image_urls = {
            "title": title,
            "img_url": "https://astrogeology.usgs.gov"+img_url
        }
        
        print(hemisphere_image_urls)

{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}
{'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}
{'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}
{'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}


## Step 2 - MongoDB and Flask Application

In [280]:
print(hemisphere_image_urls["img_url"])

https://astrogeology.usgs.gov/cache/images/04085d99ec3713883a9a57f42be9c725_valles_marineris_enhanced.tif_thumb.png
