In [None]:
#import dependencies
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import datetime

In [None]:
#I am doing it the simple way.
#get the url and read the data from the table
url = "https://data-class-mars-challenge.s3.amazonaws.com/Mars/index.html"
df = pd.read_html(url)[0]
#look at some of the data
df.head()

In [None]:
#look at the data types
df.dtypes

In [None]:
#change the terrestrial dates into terrestrial datetime format
df["terrestrial_date"] = pd.to_datetime(df["terrestrial_date"])

In [None]:
#find the number of distinct Martian months
num_months = len(df["month"].unique())
print(f"There are {num_months} distinct months on Mars.")

In [None]:
#find how many Martian days of data we have
num_days = len(df["sol"].unique())
print(f"Data was taken on {num_days} distict Martian days.")

In [None]:
#group data by Martian month and take the average
avg_temps = df.groupby(df["month"]).mean()["min_temp"]
print(avg_temps)
#report coldest and least cold months
print(f"The lowest average minimum temperature occured in the 3rd month at {avg_temps[3]} degrees Celsius.")
print(f"The highest average minimum temperature occured in the 8th month at {avg_temps[8]} degrees Celsius")
#plot the temperature data
plt.bar([1,2,3,4,5,6,7,8,9,10,11,12],avg_temps)
plt.xlabel("Month")
plt.ylabel("Average Minimum Temperature (degrees C)")
plt.title("Average Minimum Temperatures on Mars by Month")
plt.show()

In [None]:
#group by month and find average pressure
avg_p = df.groupby(df["month"]).mean()["pressure"]
print(avg_p)
#report highest and lowest pressure months
print(f"The lowest average minimum temperature occured in the 6th month at {avg_p[6]}.")
print(f"The highest average minimum temperature occured in the 9th month at {avg_p[9]}.")
#plot the data
plt.bar([1,2,3,4,5,6,7,8,9,10,11,12],avg_p)
plt.xlabel("Month")
plt.ylabel("Average Pressure")
plt.title("Average Pressure on Mars by Month")
plt.show()

In [None]:
#find the number of Earth days in a Martian year
#the first measurement of the year is when the month column drops by 11
#make a list of first Earth day of each Martian year
firsts = []
prev = 0
for (day, month) in zip(df["terrestrial_date"], df["month"]):
    if prev-month == 11:
        firsts.append(day)
    prev = month
#report the length of Martian year based on Martian months
print("The martian year is about " + str((firsts[2]-firsts[0])/(len(firsts)-1)) + " long based on the Martian months in the dataset.")
#plot all temperature measurements
plt.plot(df["terrestrial_date"],df["min_temp"])
plt.xlabel("Terrestrial Year")
plt.ylabel("Min Temperature (Degrees C)")
plt.title("Martian Minimum Temperature over Time")
plt.show()
#report length of Martian year based on temperature cycles
#dates were extracted from the graph using the WebPlotDigitizer (https://apps.automeris.io/wpd/)
print(f"According to the temperature graph, temperature peaks occur around 2013/01/02, 2014/10/30, and 2016/10/05. this means the Martian year is around {str((datetime.date(2016,10,5)-datetime.date(2013,1,2))/2)} based on temperature cycles.")

In [None]:
#export data to .csv file
df.to_csv("marsData.csv",index=False)