In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo
from splinter import Browser
import time
import os
import pandas as pd
import numpy as np
import tweepy

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.mars_mission_db
collection = db.mission_records


In [4]:
#Step1: Information about Mars

# URL of page to be scraped
url_mission ="https://mars.nasa.gov/news"

In [5]:
# Retrieve page with the requests module
response_mission = requests.get(url_mission)

# Create BeautifulSoup object; parse with 'lxml'
soup_mission = BeautifulSoup(response_mission.text, 'lxml')

In [6]:
# results are returned as an iterable list
results=soup_mission.find_all('div', class_='slide')

In [7]:
#declare empty list to contain the result

title_record=[]
text_record=[]

for result in results:
    
    news_title=result.find('div', class_='content_title').text
    news_title=news_title.strip('\n')
    news_text=result.find('div', class_='rollover_description_inner').text
    news_text=news_text.strip('\n')
    title_record.append(news_title)
    text_record.append(news_text)
    
news_title_mars=title_record[0]
news_text_mars=text_record[0]

In [8]:
news_title_mars

"Martian Ridge Brings Out Rover's Color Talents"

In [9]:
news_text_mars

'On a part of "Vera Rubin Ridge" where rover-team researchers sought to determine whether dust coatings are hiding rocks\' hematite content, the Mast Camera (Mastcam) on NASA\'s Curiosity Mars rover took this image of a rock surface that had been brushed with the rover\'s Dust Removal Tool.'

In [10]:
#step 2: scrap for the featured image

browser = Browser('chrome', headless=False)

# URL of page to be scraped
url_images="https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

browser.visit(url_images)

# Click the full image button
full_image = browser.find_link_by_partial_text('FULL IMAGE')

#Click the more info button
full_image.click()
more_info = browser.find_link_by_partial_text('more info').first

while not more_info.visible:
    print('sleeping for 5 sec')
    time.sleep(5)

more_info.click()

#parse the kink for the image
image_elem = browser.find_by_tag('article').find_by_tag('img')
content=image_elem.outer_html

#parse the current html page to get the src of the image

soup_main_image = BeautifulSoup(content, 'lxml')
results_image=soup_main_image.find('img',class_="main_image",attrs={'src':True})

#Image url

featured_image_url="https://www.jpl.nasa.gov"+results_image['src']

sleeping for 5 sec


In [11]:
# Twitter API Credentials

#all the values of api keys

if os.path.isfile("apikey.csv"):
    key_df=pd.read_csv("apikey.csv")
    consumer_key=key_df.loc[0,'consumer_key']
    consumer_secret=key_df.loc[0,'consumer_secret']
    access_token=key_df.loc[0,'access_token']
    access_token_secret=key_df.loc[0,'access_token_secret']

else:
    consumer_key=input("Enter the consumer_key: ")
    consumer_secret=input("Enter the consumer_secret: ")
    access_token=input("Enter the access_token: ")
    access_token_secret=input("Enter the access_token_secret: ")
    data = [{'consumer_key': consumer_key,'consumer_secret':consumer_secret,'access_token':access_token,'access_token_secret':access_token_secret}]
    df = pd.DataFrame(data)
    df.to_csv('apikey.csv', index=False)
    print("file do not exist,creating..")
    

# Use Tweepy to Authenticate our access
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())



In [12]:
#Step:3-get tweet mention from timeline for Mars Weather

weather_tweets = api.user_timeline("@MarsWxReport")

mars_weather=weather_tweets[0]['text']
mars_weather

'Sol 1862 (Nov 01, 2017), Sunny, high -30C/-22F, low -81C/-113F, pressure at 8.54 hPa, daylight 05:56-17:39'

In [13]:
# Step 4: Mars Facts

# define the dataframe

mars_df=pd.DataFrame(columns=["fact","value"])
mars_df

Unnamed: 0,fact,value


In [14]:
#scrap the website for the Mars fact

# URL of page to be scraped
url_fact ="https://space-facts.com/mars/"

# Retrieve page with the requests module
response_facts = requests.get(url_fact)

# Create BeautifulSoup object; parse with 'lxml'
soup_fact = BeautifulSoup(response_facts.text, 'lxml')

# results are returned as an iterable list
table=soup_fact.find('table', id='tablepress-mars')

In [15]:
tr_tag=table.find_all('tr')


In [16]:
for item in tr_tag:
    
    col1_value=item.find('td',class_='column-1').text
    col2_value=item.find('td',class_='column-2').text 
    #Entering the values in dataframe
    mars_df = mars_df.append([{"Description":col1_value,
                              "value":col2_value}],ignore_index=True)

In [17]:
mars_df

Unnamed: 0,fact,value
0,Equatorial Diameter:,"6,792 km\n"
1,Polar Diameter:,"6,752 km\n"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)\n
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [18]:
fact_table=mars_df.to_html(index=False)

In [19]:
print(fact_table)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>fact</th>
      <th>value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>Equatorial Diameter:</td>
      <td>6,792 km\n</td>
    </tr>
    <tr>
      <td>Polar Diameter:</td>
      <td>6,752 km\n</td>
    </tr>
    <tr>
      <td>Mass:</td>
      <td>6.42 x 10^23 kg (10.7% Earth)</td>
    </tr>
    <tr>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.52 AU)</td>
    </tr>
    <tr>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)\n</td>
    </tr>
    <tr>
      <td>Surface Temperature:</td>
      <td>-153 to 20 °C</td>
    </tr>
    <tr>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <td>Recorded By:</td>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


In [20]:
#scrap the website for the Mars Hemisphere

list=[]
hemisphere_urls={}
hemisphere_image_urls=[]

# URL of page to be scraped

url_hemis ="https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

# Retrieve page with the requests module
response_hemis = requests.get(url_hemis)

# Create BeautifulSoup object; parse with 'lxml'
soup_hemis = BeautifulSoup(response_hemis.text, 'lxml')

# results are returned as an iterable list
links=soup_hemis.find_all('a', class_='item product-item',attrs={'href':True})

for link in links:
    
    list.append("https://astrogeology.usgs.gov"+link['href'])


In [21]:
list

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']

In [22]:
for item in list:
    
    # Retrieve page with the requests module
    response_list = requests.get(item)

    # Create BeautifulSoup object; parse with 'lxml'
    soup_list = BeautifulSoup(response_list.text, 'lxml')

    # results are returned as an iterable list
    image_hemis_url=soup_list.find('img', class_='wide-image',attrs={'src':True})
    hemis_url="https://astrogeology.usgs.gov"+image_hemis_url['src']
    title_of_url=soup_list.find('h2', class_='title').text
    hemisphere_urls={"title":title_of_url,"img_url":hemis_url}
    hemisphere_image_urls.append(hemisphere_urls)

In [23]:
hemisphere_image_urls

[{'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]