In [None]:
from IPython.display import Image
Image("panda_cub_in_snow.jpg")

First, watch this:

http://vimeo.com/59324550

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
%matplotlib inline

# Loading data

In [None]:
!head rdu-daily-2014.csv

In [None]:
rdu_weather = pd.read_csv("rdu-daily-2014.csv")
rdu_weather.head()

In [None]:
rdu_weather.describe()

# Indexing by date

In [None]:
rdu_weather['DATE'].head()

In [None]:
rdu_weather.index = pd.to_datetime(rdu_weather.pop('DATE'), format="%Y%m%d")

In [None]:
rdu_weather.head()

# Cleaning up data

In [None]:
# -9999 is being used as a placeholder for no/invalid data.

rdu_weather.replace(-9999, np.nan)[0:4]

In [None]:
rdu_weather = rdu_weather.replace(-9999, np.nan)

In [None]:
rdu_weather.head()

In [None]:
# Only keep these five columns 
# - temperature max
# - temperature min
# - wind speed
# - precipitation
# - snowfall
rdu_weather = rdu_weather[['TMAX', 'TMIN', 'AWND', 'PRCP', 'SNOW']]
rdu_weather.columns

In [None]:
rdu_weather.head()

In [None]:
# Temperature is in 10ths of Celsius degrees.

def to_fahrenheit(temps):
    # Why does this work? Temps is not one number.
    return (temps / 10 * (9 / 5) + 32).round(1)

rdu_weather["temp_max"] = to_fahrenheit(rdu_weather.pop('TMAX'))
rdu_weather['temp_min'] = to_fahrenheit(rdu_weather.pop('TMIN'))

In [None]:
# Wind speed is in 10ths of meters per second.
# Precipitation is in 10ths of millimeters.
# Snowfall is in millimeters.

rdu_weather["wind_speed"] = rdu_weather.pop("AWND") / 10
rdu_weather["precipitation"] = rdu_weather.pop("PRCP") / 10
rdu_weather = rdu_weather.rename(columns={"SNOW": "snowfall"})

In [None]:
rdu_weather.tail()

## Remove NaNs

In [None]:
rdu_weather.info()

Snowfall has 5 NaN values.

In [None]:
rdu_weather.head()

In [None]:
rdu_weather = rdu_weather.fillna(0)

In [None]:
rdu_weather.head()

# Querying data

In [None]:
# What are the snowiest days?
rdu_weather[rdu_weather.snowfall > 0].sort("snowfall")[::-1]

In [None]:
# Explanation
rdu_weather.snowfall > 0

In [None]:
rdu_weather[rdu_weather.temp_max > 90]

In [None]:
# Hottest days
rdu_weather[rdu_weather.temp_max == rdu_weather.temp_max.max()]

In [None]:
# Coldest days
rdu_weather[rdu_weather.temp_min == rdu_weather.temp_min.min()]

In [None]:
# Wettest days
rdu_weather[rdu_weather.precipitation == rdu_weather.precipitation.max()]

# Time series manipulation

In [None]:
rdu_weather.precipitation.resample('M', how=["sum", "mean"])

In [None]:
rdu_weather.temp_max.resample('M', how=["mean", "median", "std"])

In [None]:
rdu_weather.resample("M")

In [None]:
rdu_weather.temp_max.resample('W', how=["mean", "median", "std"])

In [None]:
rdu_weather[["temp_max", "temp_min"]].resample('W', how=["mean", "median", "std"])

In [None]:
rdu_weather.temp_max.resample('MS', how=["median", "std"]).plot()
rdu_weather.temp_max.resample('W', how=["median", "std"]).plot()
plt.show()

In [None]:
rdu_weather[["temp_max", "temp_min"]].resample('MS').plot()
rdu_weather[["temp_max", "temp_min"]].resample('W').plot()
plt.show()