In [3]:
%matplotlib widget
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

#weather_data.to_csv('weather_data.csv')

weather_data = pd.read_csv('weather_data.csv')
# extract data from 2005 to 2015 (in fact, the original data is 2005-2015, but just in case)
# remove data points for leap days, and remove the column of ID, since we want min and max data regardless of stations
weather_data = (weather_data[(weather_data['Date'] >='2005-01-01') 
                             & (weather_data['Date'] <= '2015-12-31') 
                             & (weather_data['Date'].str.contains('.*-02-29') == False)])

# since Data_Value is in tenths of Celsius degrees, so it is converted to Celsius 
weather_data['Data_Value'] = weather_data['Data_Value'] / 10

# separate 05_14 data from 15 data to group 05_14 data by month-day and get max and min for TMAX and TMIN respectively
weather_05_14 = (weather_data[weather_data['Date'] < '2015-01-01']
                 .assign(Date_New = pd.to_datetime('2015-' + weather_data.Date.str[5:])).drop('Date', 1)
                 .pivot_table(values='Data_Value', index='Date_New', columns='Element', aggfunc=[max, min]))

weather_15 = (weather_data[weather_data['Date'] >= '2015-01-01']
              .assign(Date = pd.to_datetime('2015-' + weather_data.Date.str[5:]))
              .pivot_table(values='Data_Value', index='Date', columns='Element', aggfunc=[max, min]))

# change column names for better identification 
weather_05_14.columns = np.array(weather_05_14.columns.map('_'.join)) + '_05_14'
weather_15.columns = np.array(weather_15.columns.map('_'.join)) + '_15'

# merge useful columns into one dataframe 
columns_used = ['max_TMAX_05_14', 'min_TMIN_05_14', 'max_TMAX_15', 'min_TMIN_15']
data_for_plot = weather_05_14.merge(weather_15, how='inner', left_index=True, right_index=True)[columns_used]

#months = mdates.MonthLocator()

# create plot 
fig, axis1 = plt.subplots()
axis1.plot(data_for_plot.index, data_for_plot['max_TMAX_05_14'], 'salmon', linewidth='0.6', label='Highest Temp (05-14)')
axis1.plot(data_for_plot.index, data_for_plot['min_TMIN_05_14'], 'c', linewidth='0.6', label='Lowest Temp (05-14)')
axis2 = axis1.twinx()
y_lim_0 = axis1.get_ylim()

# set second y axis
y_lim_1 = np.array(y_lim_0)* 9/5 + 32
axis2.set_ylim(y_lim_1)
plt.title('Daily Record High & Low Temperature for 2005-2014\nWith Record-breakers of 2015 Highlighted')
axis1.yaxis.set_major_formatter(mticker.EngFormatter(unit=u"°C"))
axis2.yaxis.set_major_formatter(mticker.EngFormatter(unit=u"°F"))
axis1.margins(x=0)

# shade the area between the record high and record low temperatures 
axis1.fill_between(data_for_plot.index.values, data_for_plot['max_TMAX_05_14'], 
                   weather_05_14['min_TMIN_05_14'], facecolor='lightgrey', alpha=0.5)
    
# plot record-breakers of 2015
record_breaker_high = data_for_plot[data_for_plot['max_TMAX_15'] > data_for_plot['max_TMAX_05_14']]['max_TMAX_15']
record_breaker_low = data_for_plot[data_for_plot['min_TMIN_15'] < data_for_plot['min_TMIN_05_14']]['min_TMIN_15']
axis1.scatter(record_breaker_high.index, record_breaker_high, c='r', s=10, label='15 Record_breaker for Highest')
axis1.scatter(record_breaker_low.index, record_breaker_low, c='b', s=10, label='15 Record_breaker for Lowest')

# adjust ticks and labels of x axis to look better 
#axis1.set_xlim((axis1.get_xlim()[0], np.array(axis1.get_xlim())[1] + 1))
axis1.xaxis.set_major_locator(mdates.MonthLocator())
axis1.xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=15))
axis1.xaxis.set_major_formatter(mticker.NullFormatter())
axis1.xaxis.set_minor_formatter(mdates.DateFormatter('%b'))
axis1.tick_params(axis="x", which="minor", length=0)

# add back the last tick which was removed by setting 0 margins 
l = list(plt.xticks()[0])
l.append(axis1.get_xlim()[1])
plt.xticks(l)

# add and format legend
axis1.legend(loc='lower right', fontsize='x-small', frameon=False)
plt.show()

df_temp = data_for_plot['2015-02']
print(df_temp[df_temp['max_TMAX_05_14'] < df_temp['max_TMAX_15']])
print('-' * 100)
print(df_temp[df_temp['min_TMIN_05_14'] > df_temp['min_TMIN_15']])

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

            max_TMAX_05_14  min_TMIN_05_14  max_TMAX_15  min_TMIN_15
Date_New                                                            
2015-02-09             7.8           -21.0          8.3        -11.6
----------------------------------------------------------------------------------------------------
            max_TMAX_05_14  min_TMIN_05_14  max_TMAX_15  min_TMIN_15
Date_New                                                            
2015-02-03             8.9           -23.2         -2.1        -23.8
2015-02-14            10.6           -21.7         -2.7        -23.9
2015-02-15            11.7           -25.6         -3.9        -26.0
2015-02-16            14.4           -22.2        -12.1        -29.4
2015-02-17            15.6           -24.3         -5.5        -27.2
2015-02-19            12.8           -18.8         -5.6        -26.0
2015-02-20            10.6           -17.8         -8.8        -34.3
2015-02-21            10.0           -17.8         -1.0        -32.2
20