In [1]:
# coding: utf-8



# # Assignment 2

# 

# Before working on this assignment please read these instructions fully. In the submission area, you will notice that you can click the link to **Preview the Grading** for each step of the assignment. This is the criteria that will be used for peer grading. Please familiarize yourself with the criteria before beginning the assignment.

# 

# An NOAA dataset has been stored in the file `data/C2A2_data/BinnedCsvs_d400/fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv`. The data for this assignment comes from a subset of The National Centers for Environmental Information (NCEI) [Daily Global Historical Climatology Network](https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt) (GHCN-Daily). The GHCN-Daily is comprised of daily climate records from thousands of land surface stations across the globe.

# 

# Each row in the assignment datafile corresponds to a single observation.

# 

# The following variables are provided to you:

# 

# * **id** : station identification code

# * **date** : date in YYYY-MM-DD format (e.g. 2012-01-24 = January 24, 2012)

# * **element** : indicator of element type

#     * TMAX : Maximum temperature (tenths of degrees C)

#     * TMAX : MAXimum temperature (tenths of degrees C)

# * **value** : data value for element (tenths of degrees C)

# 

# For this assignment, you must:

# 

# 1. Read the documentation and familiarize yourself with the dataset, then write some python code which returns a line graph of the record high and record low temperatures by day of the year over the period 2005-2014. The area between the record high and record low temperatures for each day should be shaded.

# 2. Overlay a scatter of the 2015 data for any points (highs and lows) for which the ten year record (2005-2014) record high or record low was broken in 2015.

# 3. Watch out for leap days (i.e. February 29th), it is reasonable to remove these points from the dataset for the purpose of this visualization.

# 4. Make the visual nice! Leverage principles from the first module in this course when developing your solution. Consider issues such as legends, labels, and chart junk.

# 

# The data you have been given is near **Ann Arbor, Michigan, United States**, and the stations the data comes from are shown on the map below.

In [None]:
import matplotlib.pyplot as plt
import mplleaflet
import pandas as pd

def leaflet_plot_stations(binsize, hashid):

    df = pd.read_csv('data/C2A2_data/BinSize_d{}.csv'.format(binsize))

    station_locations_by_hash = df[df['hash'] == hashid]

    lons = station_locations_by_hash['LONGITUDE'].tolist()
    lats = station_locations_by_hash['LATITUDE'].tolist()

    plt.figure(figsize=(8,8))

    plt.scatter(lons, lats, c='r', alpha=0.7, s=200)

    return mplleaflet.display()

#leaflet_plot_stations(400,'d14fab4d2e3813b981d42a951ea78707030b06592f1ef999f0c81f77')
leaflet_plot_stations(50,'7745f1d592b5a0db2837c402ce46dd28f37f7619a88e3e156f32df13')

In [None]:
import numpy as np
#df = pd.read_csv('data/C2A2_data/BinnedCsvs_d400/d14fab4d2e3813b981d42a951ea78707030b06592f1ef999f0c81f77.csv')
df = pd.read_csv('data/C2A2_data/BinnedCsvs_d50/7745f1d592b5a0db2837c402ce46dd28f37f7619a88e3e156f32df13.csv')
#df=pd.read_csv('data/C2A2_data/BinnedCsvs_d50/7cc99f89ab1a5f641b5e67a03d9fce798e7b7994e0f6506fe97b0e97.csv')
df = df.sort_values(by=['ID','Date','Element'])
zipped = df['Date'].apply(lambda string: (string[0:4], string[5:]))
df['year4digits'], df['monthdays'] = zip(*zipped)

#dropping leap year days from the time series:
df = df[df['monthdays']!='02-29']
df['Data_Value'] *= 1/10
df.drop(['ID','Date'],axis=1,inplace=True)


df_2015_alone = df[df['year4digits']== '2015']
#df_2015_alone_Data_Value = df_2015_alone['Data_Value']/10
df = df[df['year4digits']!='2015']
#df_except_2015_Data_Value = df_except_2015['Data_Value']/10
#df_2015_alone.drop(['ID','Date'],axis=1,inplace=True)
#df_except_2015.drop(['ID','Date'],axis=1,inplace=True)

highs_of_2015 = df_2015_alone[df_2015_alone['Element']=='TMAX'].groupby('monthdays').agg({'Data_Value':np.max})
highs_of_2015
lows_of_2015 = df_2015_alone[df_2015_alone['Element']=='TMIN'].groupby('monthdays').agg({'Data_Value':np.min})
lows_of_2015

highs_of_except_2015 = df[df['Element']=='TMAX'].groupby('monthdays').agg({'Data_Value':np.max})
lows_of_except_2015 = df[df['Element']=='TMIN'].groupby('monthdays').agg({'Data_Value':np.min})
lows_of_except_2015

#lows_of_2015.sort_index(inplace=True)
#lows_of_except_2015.sort_index(inplace=True)
min_points = np.where(lows_of_2015 < lows_of_except_2015)[0]
min_points
max_points = np.where(highs_of_2015 > highs_of_except_2015)[0]
max_points
#lows_of_2015

plt.figure()

plt.plot(lows_of_except_2015.values,'b',lw=0.7,label='Record Low Temperatures')
plt.plot(highs_of_except_2015.values,'c',lw=0.7,label='Record High Temperature')

plt.scatter(min_points,lows_of_2015.iloc[min_points],s=25,marker='o',c='m',label='Broken Low')
plt.scatter(max_points,highs_of_2015.iloc[max_points],s=25,marker='o',c='g',label='Broken High')


plt.fill_between(range(len(lows_of_except_2015)),lows_of_except_2015['Data_Value'],highs_of_except_2015['Data_Value'], facecolor='lightslategrey',alpha=0.25 )
plt.xlabel('Month & Day')

plt.xticks(range(0, len(lows_of_except_2015), 20), lows_of_except_2015.index[range(0, len(lows_of_except_2015), 20)],rotation='45')

plt.ylabel('Temperature in degree celcius')
plt.title('Temperature pattern of Suomi, Finland')

plt.legend(loc='best')
plt.subplots_adjust(bottom=0.20)

ax = plt.gca()

ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)


plt.savefig(r'C:\Users\410919\Documents\Temperature pattern of Suomi, Finland.png')