In [54]:
%matplotlib inline
import numpy as np                   # math
import pandas as pd                  # manipulating data
import matplotlib.pyplot as plt      # graphing
import os                            # useful for handling filenames etc.

# calculates the Pearson correlation coefficient, p-value, and does linear regression
from scipy.stats import pearsonr, linregress

#import seaborn as sns                # makes matplotlib beautiful
#sns.set_style('darkgrid')

import matplotlib as mpl             # control formatting
mpl.rcParams['axes.titlesize'] = 16
mpl.rcParams['axes.titleweight'] = 'semibold'

# interactive graphs
from bokeh.io import output_notebook, show, push_notebook
from bokeh.plotting import figure
from bokeh.layouts import row, column
from bokeh.models import DatetimeTickFormatter, HoverTool
output_notebook()

# manage date and time
from datetime import datetime, timedelta, date

In [51]:
# An example of what the website query looks like without any processing
CSV_URL = 'https://www.wunderground.com/weatherstation/WXDailyHistory.asp?\
ID=KCABERKE169&day=01&month=11&year=2018&graphspan=day&format=1'
df = pd.read_csv(CSV_URL, index_col=False)
df

Unnamed: 0,Time,TemperatureF,DewpointF,PressureIn,WindDirection,WindDirectionDegrees,WindSpeedMPH,WindSpeedGustMPH,Humidity,HourlyPrecipIn,Conditions,Clouds,dailyrainin,SolarRadiationWatts/m^2,SoftwareType,DateUTC<br>
0,2018-11-01 12:49:32,72.3,48.0,-100.0,SSW,202.0,0.0,0.0,42.0,0.00,,,0.00,44.49,Weather logger V3.0.,2018-11-01 19:49:32
1,<br>,,,,,,,,,,,,,,,
2,2018-11-01 12:54:36,72.5,48.2,-100.0,SE,133.0,0.0,0.0,42.0,0.21,,,0.04,277.27,Weather logger V3.0.,2018-11-01 19:54:36
3,<br>,,,,,,,,,,,,,,,
4,2018-11-01 12:59:56,73.4,45.7,-100.0,South,187.0,0.0,0.0,37.0,0.21,,,0.04,292.76,Weather logger V3.0.,2018-11-01 19:59:56
5,<br>,,,,,,,,,,,,,,,
6,2018-11-01 13:04:44,74.3,44.2,-100.0,ESE,114.0,1.8,4.9,34.0,0.00,,,0.04,288.55,Weather logger V3.0.,2018-11-01 20:04:44
7,<br>,,,,,,,,,,,,,,,
8,2018-11-01 13:09:48,75.4,44.4,-100.0,SE,133.0,0.4,2.5,33.0,0.00,,,0.04,295.80,Weather logger V3.0.,2018-11-01 20:09:48
9,<br>,,,,,,,,,,,,,,,


In [52]:
# We can clean-up the odd row structure trivially:
dg = df.drop([2*i + 1 for i in range(df.shape[0] // 2)])
dg

Unnamed: 0,Time,TemperatureF,DewpointF,PressureIn,WindDirection,WindDirectionDegrees,WindSpeedMPH,WindSpeedGustMPH,Humidity,HourlyPrecipIn,Conditions,Clouds,dailyrainin,SolarRadiationWatts/m^2,SoftwareType,DateUTC<br>
0,2018-11-01 12:49:32,72.3,48.0,-100.0,SSW,202.0,0.0,0.0,42.0,0.00,,,0.00,44.49,Weather logger V3.0.,2018-11-01 19:49:32
2,2018-11-01 12:54:36,72.5,48.2,-100.0,SE,133.0,0.0,0.0,42.0,0.21,,,0.04,277.27,Weather logger V3.0.,2018-11-01 19:54:36
4,2018-11-01 12:59:56,73.4,45.7,-100.0,South,187.0,0.0,0.0,37.0,0.21,,,0.04,292.76,Weather logger V3.0.,2018-11-01 19:59:56
6,2018-11-01 13:04:44,74.3,44.2,-100.0,ESE,114.0,1.8,4.9,34.0,0.00,,,0.04,288.55,Weather logger V3.0.,2018-11-01 20:04:44
8,2018-11-01 13:09:48,75.4,44.4,-100.0,SE,133.0,0.4,2.5,33.0,0.00,,,0.04,295.80,Weather logger V3.0.,2018-11-01 20:09:48
10,2018-11-01 13:14:52,76.3,44.2,-100.0,SE,139.0,0.0,0.0,32.0,0.00,,,0.04,274.66,Weather logger V3.0.,2018-11-01 20:14:52
12,2018-11-01 13:19:56,77.5,43.7,-100.0,SSE,150.0,0.0,0.0,30.0,0.00,,,0.04,274.23,Weather logger V3.0.,2018-11-01 20:19:56
14,2018-11-01 13:24:44,78.8,45.7,-100.0,South,186.0,0.0,0.0,31.0,0.00,,,0.04,287.99,Weather logger V3.0.,2018-11-01 20:24:44
16,2018-11-01 13:29:32,79.7,45.5,-100.0,East,97.0,0.0,0.0,30.0,0.00,,,0.04,279.70,Weather logger V3.0.,2018-11-01 20:29:32
18,2018-11-01 13:34:36,80.6,45.5,-100.0,NE,39.0,0.0,0.0,29.0,0.00,,,0.04,290.42,Weather logger V3.0.,2018-11-01 20:34:36


In [None]:
# Method for grabbing data for a specific location and data and cleaning up the website query output
def get_clean_df(location_id, date):
    """Get weather data from `location_id` on `date`, then
    remove all the `<br>` tags in the file.
    
    `date` should be a list/tuple of 3 strings in the format
    [MM, DD, YYYY].
    """
    url = "https://www.wunderground.com/weatherstation/WXDailyHistory.asp?" + \
          "ID={}&".format(location_id) + \
          "day={}&".format(date[1]) + \
          "month={}&".format(date[0]) + \
          "year={}&".format(date[2]) + \
          "graphspan=day&format=1"
    print(url)
    data = pd.read_csv(url, index_col=False)
    # drop every other row because it contains `<br>`
    return data.drop([2*i + 1 for i in range(data.shape[0] // 2)])

In [49]:
# Method to process weather data DataFrame 
#   - converts temperature/pressure to sensible units and drop unneeded columns
def process_data(data_df):
    def deg_f_to_c(deg_f):
        return (5. / 9.) * (deg_f - 32)

    def inhg_to_mbar(inhg):
        return 33.863753 * inhg

    data_df.reset_index()
    for row in data_df.itertuples():
        idx = row.Index
        itime, tempf, dewf, pressure = row.Time, row.TemperatureF, row.DewpointF, row.PressureIn
        data_df.loc[idx, 'Time'] = datetime.strptime(itime, '%Y-%m-%d %H:%M:%S')
        data_df.loc[idx, 'Temperature'] = deg_f_to_c(tempf)
        data_df.loc[idx, 'Dewpoint'] = deg_f_to_c(dewf)
        data_df.loc[idx, 'Pressure'] = inhg_to_mbar(pressure)

    return data_df.drop(['TemperatureF', 'DewpointF', 'PressureIn', 'Conditions', 'Clouds',
                         'SoftwareType', 'DateUTC<br>'], axis=1)

ws_data = process_data(get_clean_df('KCABERKE169', ['11', '01', '2018']))

https://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID=KCABERKE169&day=01&month=11&year=2018&graphspan=day&format=1


In [50]:
ws_data

Unnamed: 0,Time,WindDirection,WindDirectionDegrees,WindSpeedMPH,WindSpeedGustMPH,Humidity,HourlyPrecipIn,dailyrainin,SolarRadiationWatts/m^2,Temperature,Dewpoint,Pressure
0,2018-11-01 12:49:32,SSW,202.0,0.0,0.0,42.0,0.00,0.00,44.49,22.388889,8.888889,-3386.3753
2,2018-11-01 12:54:36,SE,133.0,0.0,0.0,42.0,0.21,0.04,277.27,22.500000,9.000000,-3386.3753
4,2018-11-01 12:59:56,South,187.0,0.0,0.0,37.0,0.21,0.04,292.76,23.000000,7.611111,-3386.3753
6,2018-11-01 13:04:44,ESE,114.0,1.8,4.9,34.0,0.00,0.04,288.55,23.500000,6.777778,-3386.3753
8,2018-11-01 13:09:48,SE,133.0,0.4,2.5,33.0,0.00,0.04,295.80,24.111111,6.888889,-3386.3753
10,2018-11-01 13:14:52,SE,139.0,0.0,0.0,32.0,0.00,0.04,274.66,24.611111,6.777778,-3386.3753
12,2018-11-01 13:19:56,SSE,150.0,0.0,0.0,30.0,0.00,0.04,274.23,25.277778,6.500000,-3386.3753
14,2018-11-01 13:24:44,South,186.0,0.0,0.0,31.0,0.00,0.04,287.99,26.000000,7.611111,-3386.3753
16,2018-11-01 13:29:32,East,97.0,0.0,0.0,30.0,0.00,0.04,279.70,26.500000,7.500000,-3386.3753
18,2018-11-01 13:34:36,NE,39.0,0.0,0.0,29.0,0.00,0.04,290.42,27.000000,7.500000,-3386.3753


In [55]:
p = figure(plot_width=960, plot_height=480, title='Etcheverry Rooftop Temperature', x_axis_type='datetime')

p.line(ws_data['Time'], ws_data['Temperature'], line_width=2)
p.add_tools(HoverTool(tooltips=[('Time', '$x'), ('Temp', '$y')]))

p.xaxis.axis_label = 'Time'
p.yaxis.axis_label = 'Temperature'

show(p)