### Get latest NOAA GHCN dataset for Custom Location

In [2]:
%matplotlib inline

import numpy as np
import pandas as pd

import requests
import gzip

import time

import warnings
warnings.filterwarnings("ignore")

In [3]:
# https://www.ncei.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt
# USC00046624  34.5883 -118.0939  796.1 CA PALMDALE  
# USW00023182  34.6294 -118.0831  764.7 CA PALMDALE AP 72382
# https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/by_station/USW00023182.csv.gz
# https://www.tutorialspoint.com/downloading-files-from-web-using-python

# https://www.ncei.noaa.gov/pub/data/ghcn/daily/readme.txt
# https://github.com/gojiplus/get-weather-data/blob/master/noaaweb/noaaweb.py

In [4]:
def get_noaa_ghcn_data(ghcn_data_file):
    '''
    '''
    print('getting ' + ghcn_data_file)
    
    url = 'https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/by_station/' + ghcn_data_file
    r = requests.get(url, allow_redirects=True)
    open('./USW00023182.csv.gz', 'wb').write(r.content)
    
    #with gzip.open('./data_noaa/USW00023182.csv.gz', 'rb') as f:
    #    file_content = f.read()
    
    df_noaa = pd.read_csv('./USW00023182.csv.gz', 
                          #nrows=100, 
                          compression='gzip',
                          header = None,
                          error_bad_lines=False
                         )
    df_noaa.columns = ['station', 'date', 'quantity', 'value', 
                       'detail1', 'detail2', 'detail3', 'detail4']

    df_noaa['date'] = pd.to_datetime(df_noaa['date'], format='%Y%m%d')
    df_noaa = df_noaa.sort_values(by='date')
    df_noaa = df_noaa.reset_index(drop=True)
    
    print("returning data: " + str(df_noaa.shape))
    
    return df_noaa

In [5]:
df_noaa = get_noaa_ghcn_data(ghcn_data_file='USW00023182.csv.gz')

getting USW00023182.csv.gz
returning data: (145949, 8)


In [6]:
df_noaa['quantity'].value_counts()

TMIN    21374
TMAX    21373
PRCP    18488
SNWD    12660
SNOW    12390
AWND     9056
WSF2     9038
WDF2     9038
WSF5     8944
WDF5     8942
PGTM     4811
FMTM     2810
TAVG     2643
TSUN     1171
WT08     1155
WT16      966
WT01      631
WT03      233
WT18      131
WT02       38
WT07       31
WT05       17
WT09        4
WSFG        3
WT04        1
WT06        1
Name: quantity, dtype: int64

TMIN    21374 TMIN = Minimum temperature (tenths of degrees C)
TMAX    21373 TMAX = Maximum temperature (tenths of degrees C)
PRCP    18488 PRCP = Precipitation (tenths of mm)
SNWD    12660 SNWD = Snow depth (mm)
SNOW    12390 SNOW = Snowfall (mm)
AWND     9056 AWND = Average daily wind speed (tenths of meters per second)
WSF2     9038 WSF2 = Fastest 2-minute wind speed (tenths of meters per second)
WDF2     9038 WDF2 = Direction of fastest 2-minute wind (degrees)
WSF5     8944 WSF5 = Fastest 5-second wind speed (tenths of meters per second)
WDF5     8942 WDF5 = Direction of fastest 5-second wind (degrees)
PGTM     4811 PGTM = Peak gust time (hours and minutes, i.e., HHMM)
FMTM     2810 FMTM = Time of fastest mile or fastest 1-minute wind (hours and minutes, i.e., HHMM)
TAVG     2643 TAVG = Average temperature (tenths of degrees C) [Note that TAVG from source 'S' corresponds
		             to an average for the period ending at 2400 UTC rather than local midnight]
TSUN     1171 TSUN = Daily total sunshine (minutes)
WT08     1155 WT08 - Smoke or haze
WT16      966 WT16 - Rain (may include freezing rain, drizzle, and freezing drizzle)
WT01      631 WT01 - Fog, ice fog, or freezing fog (may include heavy fog)
WT03      233 WT03 - Thunder
WT18      131 WT18 - Snow, snow pellets, snow grains, or ice crystals
WT02       38 WT02 - Heavy fog or heavy freezing fog (not always distinguished from fog)
WT07       31 WT07 - Dust, volcanic ash, blowing dust, blowing sand, or blowing obstruction
WT05       17 WT05 - Hail (may include small hail)
WT09        4 WT09 - Blowing or drifting snow
WSFG        3 WSFG - Peak guest wind speed (tenths of meters per second)
WT04        1 WT04 - Ice pellets, sleet, snow pellets, or small hail
WT06        1 WT06 - Glaze or rime


Want to have: ADPT: Avg Dew Point Temp for day (tenths of degrees C). (combine with temp and calc RelHum) 
                        (python package can calc with a call, add this)
                        https://pypi.org/project/MetPy/
                        https://ui.adsabs.harvard.edu/abs/2019AGUFMNS21A..16M/abstract
                        https://github.com/gojiplus/get-weather-data
                        
Sources for Forecast data:

    https://pypi.org/project/noaa-sdk/

    https://forecast.weather.gov/MapClick.php?CityName=Palmdale&state=CA&site=LOX&textField1=34.5794&textField2=-118.116&e=1#.ZDvy7brMJPY
    https://forecast.weather.gov/MapClick.php?lat=34.5794&lon=-118.116&unit=0&lg=english&FcstType=graphical
    https://forecast.weather.gov/MapClick.php?lat=34.5794&lon=-118.116&lg=english&&FcstType=digital

### Pivot NOAA GHCN data

In [None]:
def pivot_noaa_ghcn(df_noaa):
    '''
    '''
    df_noaa_daily = df_noaa.pivot(index='date', columns='quantity', values='value')
    df_noaa_daily = df_noaa_daily.reset_index()
    df_noaa_daily['date'] = pd.to_datetime(df_noaa_daily['date'])
    df_noaa_daily = df_noaa_daily.sort_values(by='date')
    df_noaa_daily = df_noaa_daily.reset_index(drop=True)
    
    print(df_noaa.shape, df_noaa_daily.shape)
    
    return df_noaa_daily

In [None]:
df_noaa_daily = pivot_noaa_ghcn(df_noaa)

### NOAA Feature Engineering

In [None]:
def noaa_dhcn_feature_engineer(df_noaa_daily):
    '''
    '''
    df_noaa_daily['TMAX_C'] = df_noaa_daily['TMAX'] / 10
    df_noaa_daily['TMAX_F'] = (df_noaa_daily['TMAX_C'] * (9/5) ) + 32

    df_noaa_daily['TMIN_C'] = df_noaa_daily['TMIN'] / 10
    df_noaa_daily['TMIN_F'] = (df_noaa_daily['TMIN_C'] * (9/5) ) + 32

    df_noaa_daily['AWND_ms'] = df_noaa_daily['AWND'] / 10   # AWND = Average daily wind speed (tenths of meters per second)
    df_noaa_daily['AWND_mph'] = df_noaa_daily['AWND_ms'] * 2.23694  # to mph

    df_noaa_daily['PRCP_mm'] = df_noaa_daily['PRCP'] / 10  # PRCP = Precipitation (tenths of mm)
    df_noaa_daily['PRCP_in'] = df_noaa_daily['PRCP_mm'] / 25.4

    df_noaa_daily['SNOW_in'] = df_noaa_daily['SNOW'] / 25.4 # SNOW = Snowfall (mm)
    
    return df_noaa_daily

In [None]:
print(df_noaa_daily.shape)
df_noaa_daily = noaa_dhcn_feature_engineer(df_noaa_daily)
print(df_noaa_daily.shape)

### Save data

In [None]:
df_noaa_daily.to_csv('data_noaaX_pivoted_customlocation_' + str(round(time.time())) + '.csv')