# Temperature Visualization for Portland, OR: 1940 - 2020

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import seaborn as sns
import numpy as np
import calendar
from math import floor

In [None]:
pd.set_option('display.max_columns', 700)
pd.set_option('display.max_rows', 100)
pd.set_option('display.min_rows', 10)
pd.set_option('display.expand_frame_repr', True)

In [None]:
plt.rcParams['figure.figsize'] = (16.0, 10.0)
# plt.style.use('ggplot')
# sns.set_style("white")

# Load and Clean Data

## Load a local copy

In [None]:
# local use
p = Path('data/Portland_dailyclimatedata1940-2019.xlsx')
pdx_19 = pd.read_excel(p, sheet_name='Portland_dailyclimatedata1940-2', skiprows=list(range(6)))

## Load from website

- Data from [NOAA Nation Weather Service Forecast Office: Portland, OR][1]

  [1]: https://w2.weather.gov/climate/local_data.php?wfo=pqr

In [None]:
url = 'http://www.weather.gov/source/pqr/climate/webdata/Portland_dailyclimatedata.csv'
pdx_19 = pd.read_csv(url, skiprows=list(range(6)))

## View initial dataframe

In [None]:
pdx_19

## Transform and clean data

In [None]:
# Drop unused column
pdx_19.drop(columns=['AVG or Total'], inplace=True)

In [None]:
# add prefix to days for use with wide_to_long
pdx_19.columns = list(pdx_19.columns[:3]) + [f'v_{day}' for day in pdx_19.columns[3:]]

In [None]:
# Select TX (max temp) and TN (min temp)
pdx_19.rename(columns={'Unnamed: 2': 'TYPE'}, inplace=True)
pdx_19 = pdx_19[pdx_19.TYPE.isin(['TX', 'TN'])]

### Convert to Tidy format

In [None]:
# reshape the data to tidy
pdx = pd.wide_to_long(pdx_19, stubnames='v', sep='_', i=['YR', 'MO', 'TYPE'], j='day').reset_index()
pdx

In [None]:
# Give a more descriptive name
pdx.TYPE = pdx.TYPE.map({'TX': 'MAX', 'TN': 'MIN'})

In [None]:
# rename so they can be used to create a date column
pdx.rename(columns={'YR': 'year', 'MO': 'month'}, inplace=True)

In [None]:
# - denotes days that don't exist for a given month; drop those
pdx = pdx[pdx.v != '-'].copy()

In [None]:
# create date column
pdx['date'] = pd.to_datetime(pdx[['year', 'month', 'day']])

In [None]:
# add decade
pdx['dec'] = pdx.year.apply(lambda x: f"{floor(x/10)*10}'s")

In [None]:
pdx.v.replace({'M': np.nan, 'T': np.nan}, inplace=True)

In [None]:
# convert from str to float
pdx.v = pdx.v.astype('float')

In [None]:
# add bins
pdx['range'] = pd.cut(pdx.v, bins=[0, 64, 74, 84, 94, 200], labels=['< 65', '65 - 74', '75 - 84', '85 - 94', '>= 95'])

In [None]:
display(pdx.head())
display(pdx.tail())

# Create max temperature dataframe

In [None]:
pdx_max = pdx[pdx.TYPE == 'MAX'].reset_index(drop=True)
pdx_max

# Create January to May (inclusive) dataframe

In [None]:
pdx_max_jan_may = pdx_max[(pdx_max.date.dt.month >= 1) & (pdx_max.date.dt.month < 6)]

## Groupby `year` and `range`

In [None]:
pdx_max_g = pdx_max_jan_may.groupby([pdx_max_jan_may.date.dt.year, 'range'])['v'].agg('count').reset_index(level=0)

In [None]:
display(pdx_max_g.head())
display(pdx_max_g.tail())

## Plot the `groupby` dataframe

In [None]:
years = list(range(1950, 2021, 10))

with sns.axes_style("darkgrid"):
    for year in years:
        plt.figure()
        data = pdx_max_g[(pdx_max_g.date >= year - 10) & (pdx_max_g.date < year)]
        ax = sns.barplot(x=data.index, y=data.v, hue=data.date)
        
        plt.annotate('Prepared By: Trenton McKinney', xy=(3, 140.1), xytext=(3, 140.1), fontsize=8)
        
        for p in ax.patches:
            if p.get_height() > 0:
                ax.annotate(format(p.get_height(), '.0f'),
                            (p.get_x() + p.get_width() / 2., p.get_height()),
                            ha = 'center', va = 'center', fontsize=8,
                            xytext = (0, 10), textcoords = 'offset points')
        
        plt.ylim(0, 150)
        plt.ylabel('Days')
        plt.xlabel('High Temperatures °F')
        plt.title(f"Portland, OR\nJan - May High Temperature Days: {year-10}'s")

# Create June dataframe

In [None]:
pdx_max_june = pdx_max[(pdx_max.date.dt.month == 6)]
pdx_max_june = pdx_max_june.groupby([pdx_max_june.date.dt.year, 'range'])['v'].agg('count').reset_index(level=0)
pdx_max_june

## Plot June

In [None]:
years = list(range(1950, 2021, 10))

with sns.axes_style("darkgrid"):
    for year in years:
        plt.figure()
        data = pdx_max_june[(pdx_max_june.date >= year - 10) & (pdx_max_june.date < year)]
        ax = sns.barplot(x=data.index, y=data.v, hue=data.date)
        
        plt.annotate('Prepared By: Trenton McKinney', xy=(0, 24.3), xytext=(0, 24.3), fontsize=8)
        
        for p in ax.patches:
            if p.get_height() > 0:
                ax.annotate(format(p.get_height(), '.0f'),
                            (p.get_x() + p.get_width() / 2., p.get_height()),
                            ha='center', va='center', fontsize=9,
                            xytext=(0, 10), textcoords='offset points')
        
        plt.ylim(0, 25)
        plt.ylabel('Days')
        plt.xlabel('High Temperatures °F')
        plt.title(f"Portland, OR\nJune High Temperature Days: {year-10}'s")

# Resample max monthly mean

In [None]:
pdx_m_mean = pdx_max.set_index('date').resample('m').mean()

In [None]:
years = list(range(1950, 2021, 10))

with sns.axes_style("darkgrid"):
    for year in years:
        data = pdx_m_mean[(pdx_m_mean.index.year >= year - 10) & (pdx_m_mean.index.year < year)]
        ax = sns.lineplot(x=data.index, y=data.v)
    
    plt.annotate('Prepared By: Trenton McKinney', xy=(730120, 32), xytext=(730120, 32), fontsize=10)
    plt.xlabel('Year')
    plt.ylabel('Monthly Mean Max Temperature °F')
    plt.title(f"Portland, OR\nResampled Monthly Mean High Temperature")

# Mean: Resample max yearly

In [None]:
pdx_y_mean = pdx_max.set_index('date').resample('y').mean()

In [None]:
years = list(range(1950, 2021, 10))

with sns.axes_style("darkgrid"):
    for year in years:
        data = pdx_y_mean[(pdx_y_mean.index.year >= year - 10) & (pdx_y_mean.index.year < year)]
        ax = sns.lineplot(x=data.index, y=data.v)

    plt.annotate('Prepared By: Trenton McKinney', xy=(730120, 52.1), xytext=(730120, 52.1), fontsize=10)
    plt.xlabel('Year')
    plt.ylabel('Yearly Mean Max Temperature °F')
    plt.title(f"Portland, OR\nResampled Yearly Mean High Temperature")

# Mean: Groupby year & month

In [None]:
mym = pdx_max.groupby([pdx_max.date.dt.year, pdx_max.date.dt.month])['v'].mean().unstack(level=0)
mym = mym.iloc[:, :-1]  # there's no data in 2020 so drop that column
mym.iloc[:, :5]

In [None]:
ix = list(range(0, len(mym.columns), 10))

with sns.axes_style("darkgrid"):
    for i in ix:
        plt.figure()
        data = mym.iloc[:, i:i+10]
        sns.lineplot(data=data, markers=None, dashes=False)
        plt.annotate('Prepared By: Trenton McKinney', xy=(10, 31), xytext=(10, 31), fontsize=10)

        plt.xticks(np.arange(1, 13), calendar.month_name[1:13])
        plt.ylim(30, 100)
        plt.xlabel('Month')
        plt.ylabel('Mean Max Temperature °F')
        plt.title(f"Portland, OR\nMonthly Mean per Year High Temperature")
        plt.show()

# Mean: Groupby decade & month

In [None]:
mymd = pdx_max.groupby([pdx_max.dec, pdx_max.date.dt.month])['v'].mean().unstack(level=0)
mymd = mymd.iloc[:, :-1]  # No data for 2020's
mymd

In [None]:
ix = list(range(0, len(mym.columns), 10))

with sns.axes_style("darkgrid"):

    plt.figure()
    sns.lineplot(data=mymd, markers=None, dashes=False)
    plt.annotate('Prepared By: Trenton McKinney', xy=(10, 40.5), xytext=(10, 40.5), fontsize=10)
    plt.xticks(np.arange(1, 13), calendar.month_name[1:13])
    plt.ylim(40, 85)
    plt.xlabel('Month')
    plt.ylabel('Mean Max Temperature °F')
    plt.title(f"Portland, OR\nMonthly Mean Per Decade High Temperature")
    plt.show()