In [0]:
import requests
import numpy as np
import pandas as pd
import datetime

import matplotlib as mlp
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
from matplotlib.colors import ListedColormap
import matplotlib.dates as mdates

%matplotlib inline

In [0]:
def make_request(endpoint, payload=None):
    """
    Make a request to a specific endpoint on the weather API
    passing headers and optional payload.
    
    Parameters:
        - endpoint: The endpoint of the API you want to 
                    make a GET request to.
        - payload: A dictionary of data to pass along 
                   with the request.
    
    Returns:
        Response object.
    ---
    S. Moline, 'Hands on Data Analysis with Pandas'
    https://github.com/stefmolin/Hands-On-Data-Analysis-with-Pandas/blob/master/ch_04/0-weather_data_collection.ipynb
    """
    return requests.get(
        f'https://www.ncdc.noaa.gov/cdo-web/api/v2/{endpoint}',
        headers={
            'token': 'HfZphTTGbXckpmEuqevAXQZFRCTEJgkJ' # obtained by X. Li, 2/4/2020
        },
        params=payload
    )

# Set date range
daterange = np.arange(1990,1999)

results = []
for i in range(len(daterange)):
    # Update the cell with status information
    display.clear_output(wait=True)
    display.display(f'Gathering data for {str(daterange[i])}')
    
    response = make_request(
        'data', 
        {
            'datasetid' : 'GHCND', # Global Historical Climatology Network - Daily (GHCND) dataset
            'datatypeid' : ['TMAX','TMIN'],
            'locationid' : 'CITY:US360019', # NYC
            'stationid' : 'GHCND:USC00280907',
            'startdate' : datetime.date(daterange[i], 1, 1),
            'enddate' : datetime.date(daterange[i], 12, 31),
            'units' : 'metric',
            'limit' : 1000 # max allowed
        })

    if response.ok:
        # We extend the list instead of appending to avoid getting a nested list
        results.extend(response.json()['results'])

def clean_up(datain,key):
    '''
    TBD
    credit S. Nesbitt
    '''
    datain[key][(datain[key] == 9999)] = None
    datain[key]= datain[key].astype('float')
    return datain
    

In [0]:
# Create a dataframe to store the results
# Contains date, TMAX, TMIN, station, and value
df = pd.DataFrame(results)

# Clean up data for missing values (as represented by '9999' according to 
# GHCND documentation: https://www1.ncdc.noaa.gov/pub/data/cdo/documentation/GHCND_documentation.pdf)
df = clean_up(df, 'value')
df['date'] = pd.to_datetime(df['date'], infer_datetime_format=True)
df.index = df['date']
df

In [0]:
df = fetch_data(locationid="CITY:US360019", startdate=datetime.date(2017, 1, 1), enddate=datetime.date(2018, 1, 10), stationid="GHCND:USC00280907")
df = clean_up(df, "value")
display(df.describe(), df.head(), df.tail())

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,value
count,748.0
mean,10.900535
std,11.44345
min,-17.2
25%,2.2
50%,11.1
75%,18.9
max,34.4


Unnamed: 0_level_0,date,datatype,station,attributes,value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-01,2017-01-01,TMAX,GHCND:USC00280907,",,7,0700",5.6
2017-01-01,2017-01-01,TMIN,GHCND:USC00280907,",,7,0700",-2.8
2017-01-02,2017-01-02,TMAX,GHCND:USC00280907,",,7,0700",7.2
2017-01-02,2017-01-02,TMIN,GHCND:USC00280907,",,7,0700",-2.8
2017-01-03,2017-01-03,TMAX,GHCND:USC00280907,",,7,0700",2.8


Unnamed: 0_level_0,date,datatype,station,attributes,value
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-01-08,2018-01-08,TMIN,GHCND:USC00280907,",,7,0700",-16.7
2018-01-09,2018-01-09,TMAX,GHCND:USC00280907,",,7,0700",-1.7
2018-01-09,2018-01-09,TMIN,GHCND:USC00280907,",,7,0700",-8.3
2018-01-10,2018-01-10,TMAX,GHCND:USC00280907,",,7,0700",5.0
2018-01-10,2018-01-10,TMIN,GHCND:USC00280907,",,7,0700",-7.8


In [0]:
df_mean = df.resample("7D").mean()
display(df_mean, df_mean.describe())
# freq="W" starts with Sunday
df["W_num"] = (df["date"].dt.dayofyear - 1) // 7
df.groupby("W_num").mean()

Unnamed: 0_level_0,value,W_num
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-01,0.914286,0.0
2017-01-08,-1.378571,1.0
2017-01-15,0.957143,2.0
2017-01-22,5.007143,3.0
2017-01-29,-0.628571,4.0
2017-02-05,-0.021429,5.0
2017-02-12,0.4,6.0
2017-02-19,7.864286,7.0
2017-02-26,5.4,8.0
2017-03-05,1.614286,9.0


Unnamed: 0,value,W_num
count,54.0,54.0
mean,10.71056,24.707011
std,9.629381,15.429806
min,-10.557143,0.0
25%,2.225,11.25
50%,13.05,24.5
75%,18.55,37.75
max,25.764286,51.0


Unnamed: 0_level_0,value
W_num,Unnamed: 1_level_1
0,-5.239286
1,-2.83
2,0.957143
3,5.007143
4,-0.628571
5,-0.021429
6,0.4
7,7.864286
8,5.4
9,1.614286


In [0]:
def cal_anomaly(df, freq="W"):
    df_mean = df.resample(7).mean()
    if freq == "W":
        
        

SyntaxError: unexpected EOF while parsing (<ipython-input-5-1137edb23fd0>, line 5)

In [0]:
def stripe_plot(df, freq="W"):
    df_mean = df.resample(freq).mean()
    if freq == "W":
        date_interval = 7
    
    temp_min = df_mean.loc[:, "value"].min()
    temp_max = df_mean.loc[:, "value"].max()
    buffer = 1.
    
    cmap = ListedColormap([
        '#08306b', '#08519c', '#2171b5', '#4292c6',
        '#6baed6', '#9ecae1', '#c6dbef', '#deebf7',
        '#fee0d2', '#fcbba1', '#fc9272', '#fb6a4a',
        '#ef3b2c', '#cb181d', '#a50f15', '#67000d',
    ])
    fig = plt.figure(figsize=(12, 5))
    ax = fig.add_axes([0.1, 0.12, 0.9, 0.88])
    date_str_list = ['{}'.format(d) for d in df['date'][0:730:2]]
    date_corrected = mdates.datestr2num(date_str_list)

    col = PatchCollection([
        Rectangle((y, min_temp-buffer), 7, temp_delta+2*buffer) # need to change 7 to other values when resampled at other frequencies
        for y in date_corrected.astype(int)[::7]
    ])

    

In [0]:
[0, 1, 2, 3, 4, 5][::3]

In [0]:
def stripe_plot(freq, flag=True):
    '''
    the plotting function.
    ------
    Inputs-
    freq: 'W', 'M', or 'Y'
        frequency for calculating and ploting the anomaly
    flag: Boolean
        whether to plot the data points over the strips or not. Default is True.
    ------
    Output-
    warming stripes figure named 'warming-stripe-[freq].png'.
    '''
    if freq == 'W':
        df = ... # call in the func that calculates weekly anomaly
        interval = 7
    elif freq == 'M':
        df = ...
        interval = 31
    elif freq == 'Y':
        df = ...
        interval = 366
    else:
        print('Please enter a valid frequency (W, M, or Y)')
        break

#    df_mean = df.resample(freq).mean()
#    if freq == "W":
#        date_interval = 7
    
    temp_min = df.loc[:, "value"].min()
    temp_max = df.loc[:, "value"].max()
    temp_delta = max_temp - min_temp # deg C
    buffer = 0.2 # deg C
    
    cmap = ListedColormap([
        '#08306b', '#08519c', '#2171b5', '#4292c6',
        '#6baed6', '#9ecae1', '#c6dbef', '#deebf7',
        '#fee0d2', '#fcbba1', '#fc9272', '#fb6a4a',
        '#ef3b2c', '#cb181d', '#a50f15', '#67000d',
    ])
    fig = plt.figure(figsize=(12, 5))
    ax = fig.add_axes([0.1, 0.12, 0.9, 0.88])
    date_str_list = ['{}'.format(d) for d in df['date'][0:-1]]
    date_corrected = mdates.datestr2num(date_str_list)
    date_int = date_corrected.astype(int)

    col = PatchCollection([
        Rectangle((y, min_temp-buffer), interval, temp_delta+2*buffer) 
        for y in date_int
    ]) 

    # set data, colormap and color limits
    col.set_array(df)
    col.set_cmap(cmap)
    col.set_clim(min_temp, max_temp)
    ax.add_collection(col)

    ax.set_ylim(min_temp-buffer, max_temp+buffer)
    ax.set_xlim(date_corrected[0], date_corrected[-1])
    formatter = mdates.DateFormatter('%Y-%m-%d')
    ax.xaxis.set_major_formatter(formatter)
    fig.autofmt_xdate()
    ax.set_xlabel('date')

    # flag
    if flag == True:
        ax.plot(df2['value'],':o',color='gold',linewidth=3, markersize=10)
        ax.set_ylabel('temperature [deg C]')
    else:
        ax.axes.get_yaxis().set_visible(False)

    fig.savefig('warming-stripes-'+freq+'.png')
    fig.show()
