In [49]:
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [50]:
url = 'https://data-class-mars-challenge.s3.amazonaws.com/Mars/index.html'
browser.visit(url)
html = browser.html
soup = soup(html, 'html.parser')

In [51]:
rows = soup.find_all("tr", class_="data-row")

In [52]:
row_list = []
for row in rows:
    data = row.find_all("td")
    row=[col.text for col in data]
    row_list.append(row)

In [53]:
df = pd.DataFrame(row_list, columns = ["id", "terrestrial_date", "sol", "ls", "month", "min_temp", "pressure"])
df.head()

Unnamed: 0,id,terrestrial_date,sol,ls,month,min_temp,pressure
0,2,2012-08-16,10,155,6,-75.0,739.0
1,13,2012-08-17,11,156,6,-76.0,740.0
2,24,2012-08-18,12,156,6,-76.0,741.0
3,35,2012-08-19,13,157,6,-74.0,732.0
4,46,2012-08-20,14,157,6,-74.0,740.0


In [54]:
df.dtypes

id                  object
terrestrial_date    object
sol                 object
ls                  object
month               object
min_temp            object
pressure            object
dtype: object

In [55]:
df['terrestrial_date'] = pd.to_datetime(df['terrestrial_date'])

In [56]:
convert_dtypes = {'id': int,
                 'sol': int,
                 'ls': int,
                 'month':int,
                 'min_temp': float,
                 'pressure': float}
df = df.astype(convert_dtypes)
print(df.dtypes)

id                           int64
terrestrial_date    datetime64[ns]
sol                          int64
ls                           int64
month                        int64
min_temp                   float64
pressure                   float64
dtype: object


In [58]:
# How many months exist on Mars?
df.month.unique

<bound method Series.unique of 0       6
1       6
2       6
3       6
4       6
       ..
1862    5
1863    5
1864    5
1865    5
1866    5
Name: month, Length: 1867, dtype: int64>

In [59]:
# How many Martian (and not Earth) days worth of data exist in the scraped dataset?
df.terrestrial_date.unique

<bound method Series.unique of 0      2012-08-16
1      2012-08-17
2      2012-08-18
3      2012-08-19
4      2012-08-20
          ...    
1862   2018-02-23
1863   2018-02-24
1864   2018-02-25
1865   2018-02-26
1866   2018-02-27
Name: terrestrial_date, Length: 1867, dtype: datetime64[ns]>

In [60]:
# What are the coldest and the warmest months on Mars (at the location of Curiosity)? Get the answer by averaging the minimum daily temperature of all the months. 
# Plot the results as a bar chart
df.min_temp.mean()

-76.12104981253347

In [61]:
df.min_temp.max()

-62.0

In [62]:
df.min_temp.min()

-90.0

In [66]:
# Which months have the lowest and the highest atmospheric pressure on Mars? Get the answer by averaging the daily atmospheric pressure of all the months. 
# Plot the results as a bar chart
df.pressure.mean()

841.0664167113016

In [65]:
df.pressure.max()

925.0

In [64]:
df.pressure.min()

727.0

In [None]:
# About how many terrestrial (Earth) days exist in a Martian year? That is, in the time that Mars circles the Sun once, 
# how many days elapse on Earth? Visually estimate the result by plotting the daily minimum temperature.

In [31]:
df.sol.unique

<bound method Series.unique of 0         10
1         11
2         12
3         13
4         14
        ... 
1862    1973
1863    1974
1864    1975
1865    1976
1866    1977
Name: sol, Length: 1867, dtype: object>

In [None]:
df.to_csv('mars_temp.csv')