# **Step 1 - Scraping**
---

## ** Importing modules/dependencies**

In [1]:
# data manipulation
import pandas as pd
import os

# web scraping
import time
from bs4 import BeautifulSoup as bs
from splinter import Browser
import requests
import pymongo

#### Initialize Browser Instance

In [2]:
executable_path = {"executable_path": "chromedriver.exe"}
session = requests.Session()
browser = Browser("chrome", **executable_path)

## **Grabbing the Latest News Article from Nasa**

In [3]:
#intial scraping of site
url = "https://mars.nasa.gov/news"
browser.visit(url)
time.sleep(5)
news_brows = browser.html
news_soup = bs(news_brows,"lxml")
latest_news = news_soup.find("li", class_="slide")
news_t_lvl = latest_news.find("div",class_="content_title")
news_title = news_t_lvl.a.text
news_date = latest_news.find("div",class_="list_date").text
news_p = str(latest_news.find("div",class_="article_teaser_body").text)

## **Grabbing the featured image data from **[https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars)

In [4]:
space_image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(space_image_url)
time.sleep(2)
browser.click_link_by_id("full_image")
time.sleep(2)
browser.click_link_by_text("more info     ")
time.sleep(2)
featured_brows = browser.html
images_soup = bs(featured_brows,"html.parser")
browser.click_link_by_href(images_soup.figure.a["href"])

featured_image_url = browser.url
time.sleep(2)

## **Getting Latest Mars Weather Data from Twitter**

In [5]:
mars_weather_url = "https://twitter.com/marswxreport?lang=en"
response = session.get(mars_weather_url).text
weather_soup = bs(response, "lxml")
mars_weather = weather_soup.find("div",class_="js-tweet-text-container").text

## **Getting Mars Facts Information for DataFrame and HTML Table**

In [6]:
mars_facts_url ="https://space-facts.com/mars/"
mars_facts_tables = pd.read_html(mars_facts_url)
mars_facts_df = mars_facts_tables[0]
mars_facts_df.columns = ["Element", "Data"]
html_df = mars_facts_df
mars_fact_html = html_df.to_html(justify="left", border=1).replace("\n      <th></th>\n      <th></th>\n    </tr>\n    <tr>","")

##### ***For additional facts, not using it for this exercise***

## **Mars Hemisphere Data**

In [7]:
hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
hemisphere_url_prefix ="https://astrogeology.usgs.gov"

hemisphere_links = []
hemisphere_image_url = []
hemisphere_titles = []

In [8]:
response = session.get(hemisphere_url).text
hemisphere_soup = bs(response,"lxml")
hemisphere_div = hemisphere_soup.find_all("div", class_="item")

In [9]:
for item in hemisphere_div:
    hemisphere_links.append(hemisphere_url_prefix+str(item.find("a")["href"]))
    hemisphere_titles.append((item.h3.text).replace(" Enhanced","")) 

In [10]:
for x in range(len(hemisphere_links)):
    browser.visit(hemisphere_links[x])
    response = requests.get(hemisphere_links[x]).text
    hemi_soup = bs(response,"lxml")
    search = hemi_soup.find("a",text="Original")
    hemisphere_image_url.append(search["href"])
    time.sleep(2)

## **Closing Browser Session**|

In [11]:
browser.quit()
try:
    browser.windows()
except:
    print("All driver browsers have been closed successfully")

All driver browsers have been closed successfully


-----

#  

# **Step 2 - Mongo DB Creation **
---

#### Creating "***mars***" database in Mongo DB

In [12]:
#start Mongo in terminal
mongod = "mongod"
mongo = "mongo"

os.system(mongod)
os.system(mongo)

conn = "mongodb://localhost:27017"
client = pymongo.MongoClient(conn)
try:
    db = client.mars
except:
    if "mars" in client.database_names():
        client.drop_database("mars")
    else:
        db = client.mars
    

#### Creating and populating "***news***" collection

In [13]:
try:
    db.create_collection("news")
except:
    db.drop_collection("news")
    db.create_collection("news")
    
collection = db.get_collection("news")
collection.insert_one({"title":news_title,"description":news_p})

<pymongo.results.InsertOneResult at 0x14d764726c8>

#### Creating and populating "***image***" collection

In [14]:
try:
    db.create_collection("image")
except:
    db.drop_collection("image")
    db.create_collection("image")
    
    
collection = db.get_collection("image")
collection.insert_one({"Featured_Image": featured_image_url})

<pymongo.results.InsertOneResult at 0x14d75fb6948>

#### Creating and populating "***weather***" collection

In [15]:
try:
    db.create_collection("weather")
except:
    db.drop_collection("weather")
    db.create_collection("weather")

collection = db.get_collection("weather")
collection.insert_one({"Mars_Weather":mars_weather})

<pymongo.results.InsertOneResult at 0x14d76237ac8>

#### Creating and populating "***facts***" collection

In [16]:
try:
    db.create_collection("facts")
except:
    db.drop_collection("facts")
    db.create_collection("facts")
    
collection = db.get_collection("facts")
for x in range(0,len(mars_facts_df)):
    fact_name = mars_facts_df.Element[x].replace(":","")
    fact = mars_facts_df.Data[x]
    fact_data = {fact_name: fact,}
    collection.insert_one(fact_data)

#### Creating and populating "***hemispheres***" collection

In [17]:
try:
    db.create_collection("hemispheres")
except:
    db.drop_collection("hemispheres")
    db.create_collection("hemispheres")
collection = db.get_collection("hemispheres")
for x in range(len(hemisphere_image_url)): 
    c = x-1 
    collection.insert_one({"title": hemisphere_titles[c], "img_url": hemisphere_image_url[c]})