In [2]:
import os
import requests
from ftplib import FTP
import gzip
import io
import pandas as pd
import numpy as np

In [3]:
raw_path = "../data/raw/"

In [4]:
# noaa_ftp = FTP('ftp.ncei.noaa.gov')
# noaa_ftp.login()
# # noaa_ftp.retrlines('LIST')

In [None]:
    assert type(year) == int, "Year must be entered as an integer"
    assert (
        type(station_number) == str
    ), "Station number must be entered as a string"
    assert re.match(
        "^[0-9]{6}[-][0-9]{5}$", station_number
    ), 'Station number must be entered in form "911650-22536".'

    # Generate filename based on selected station number and year and download
    # data from NOAA FTP site.
    filename = station_number + "-" + str(year) + ".gz"

    compressed_data = io.BytesIO()

    try:
        noaa_ftp = FTP("ftp.ncei.noaa.gov")
        noaa_ftp.login()  # Log in (no user name or password required)
        noaa_ftp.cwd("pub/data/noaa/" + str(year) + "/")
        noaa_ftp.retrbinary("RETR " + filename, compressed_data.write)
    except error_perm as e_mess:
        print("Error generated from NOAA FTP site: \n", e_mess)
        noaa_ftp.quit()
        return 'FTP Error'

    noaa_ftp.quit()

    # Unzip and process data line by line and extract variables of interest
    # The raw data file format is described here:
    # ftp://ftp.ncei.noaa.gov/pub/data/noaa/isd-format-document.pdf
    compressed_data.seek(0)
    stn_year_df = pd.DataFrame(
        columns=[
            "stn",
            "datetime",
            "air_temp",
            "atm_press",
            "wind_spd",
            "wind_dir",
        ]
    )
    with gzip.open(compressed_data, mode="rt") as stn_data:
        for i, line in enumerate(stn_data):
            stn_year_df.loc[i, "datetime"] = pd.to_datetime(line[15:27])
            stn_year_df.loc[i, "air_temp"] = float(line[87:92]) / 10
            stn_year_df.loc[i, "atm_press"] = float(line[99:104]) / 10
            stn_year_df.loc[i, "wind_spd"] = float(line[65:69]) / 10
            stn_year_df.loc[i, "wind_dir"] = float(line[60:63])

    # Replace missing value indicators with NaNs
    stn_year_df = stn_year_df.replace(
        [999, 999.9, 9999.9], [np.nan, np.nan, np.nan]
    )

    stn_year_df.loc[:, "stn"] = station_number
    return stn_year_df

In [7]:
location = pd.read_csv('../data/raw/stinson2019/ACERnet_LatLon.csv')
location = location.rename(columns = {'Site':'site', 'Loc':'state_province'})
location = location.set_index('site')

# Fix inconsistency in acronym for Quebec site between data tables
if 'QB' in location.index:
    location = location.rename({'QB': 'QC'}, axis='index')
    location.loc['QC',:] = location.loc['QC',:].replace(regex =[r'^QB$'], value = 'QC')
    
location = location[['lat', 'lon', 'short_name', 'long_name', 'state_province']]
location

Unnamed: 0_level_0,lat,lon,short_name,long_name,state_province
site,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
DOF,43.7338,-72.249,Dartmouth,Dartmouth Organic Farm,NH
QC,48.430919,-70.688456,Boris,Boris,QC
HF,42.5315,-72.1899,Harvard,Harvard Forest,MA
INDU,41.6254,-87.0811,Indiana Dunes,Indiana Dunes National Lakeshore,IN
SMM,38.235181,-79.657058,Southernmost,Southernmost Maple,VA
DR,37.0108,-82.6764,Divide Ridge,Divide Ridge,VA


In [12]:
stn_ids = [['INDU', '726358-00384', 'Michigan City Airport'],
           ['SOUTHERNMOST MAPLE', '724115-93757', 'Ingalls Field Airport'],
           ['DIVIDE RIDGE', '724117-63802', 'Lonesome Pine Airport'],
           ['NORTHERN RANGE - QUEBEC', '716170-99999', 'Bagotville (CAN-MIL)'],
           ['HARVARD FOREST', '725085-54756', 'Orange Municipal Airport'],
           ['DARTMOUTH ORGANIC FARM', '726116-94765', 'Lebanon Municipal Airport']]

closest_weather_station = pd.DataFrame(stn_ids, columns = ['site', 'stn_id','stn_name'])
closest_weather_station

Unnamed: 0,site,stn_id,stn_name
0,INDU,726358-00384,Michigan City Airport
1,SOUTHERNMOST MAPLE,724115-93757,Ingalls Field Airport
2,DIVIDE RIDGE,724117-63802,Lonesome Pine Airport
3,NORTHERN RANGE - QUEBEC,716170-99999,Bagotville (CAN-MIL)
4,HARVARD FOREST,725085-54756,Orange Municipal Airport
5,DARTMOUTH ORGANIC FARM,726116-94765,Lebanon Municipal Airport


In [5]:
# Get station info
noaa_ftp = FTP('ftp.ncei.noaa.gov')
noaa_ftp.login()


if not os.path.exists(raw_path + '/NOAA'):
    os.makedirs(raw_path + '/NOAA')
    
noaa_ftp.cwd("pub/data/noaa/")

stn_history_file = 'isd-history.txt'

with open(raw_path + '/NOAA/' + stn_history_file, 'wb+') as stn_hist:
        noaa_ftp.retrbinary('RETR ' + stn_history_file, stn_hist.write)

noaa_ftp.quit()

'221 Goodbye.'

In [57]:
float('+0200.3 	')

200.3

In [54]:
weather_stn

Unnamed: 0_level_0,stn_name,lat,lon,elevation,country,state,start,end
stn_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
726358-00384,MICHIGAN CITY MUNICIPAL AIRPO,41.703,-86.821,200.3,US,IN,2012-01-10,2021-02-23
724115-93757,INGALLS FIELD AIRPORT,37.95,-79.817,1156.1,US,VA,2006-01-01,2021-02-22
724117-63802,LONESOME PINE AIRPORT,36.988,-82.53,818.1,US,VA,2006-01-01,2021-02-22
716170-99999,JONQUIERE QUE,48.417,-71.133,128.0,CA,,1992-01-06,2021-02-19
725085-54756,ORANGE MUNICIPAL AIRPORT,42.57,-72.291,169.2,US,MA,1996-07-02,2021-02-22
726116-94765,LEBANON MUNICIPAL AIRPORT,43.626,-72.305,182.3,US,NH,1993-01-01,2021-02-22


In [55]:
weather_stn = pd.DataFrame(
    columns=["stn_name", "lat", "lon", "elevation_m", "country", "state", "start", "end"],
    index=closest_weather_station["stn_id"],
)

with open(raw_path + "/NOAA/" + stn_history_file, mode="rt") as stn_hist:
    for i, line in enumerate(stn_hist):
        stn = line[0:6] + "-" + line[7:12]
        if stn in weather_stn.index.tolist():
            weather_stn.loc[stn] = [
                line[13:43].strip(),
                line[57:65].strip(),
                line[65:74].strip(),
                line[74:82].strip(),
                line[43:48].strip(),
                line[48:51].strip(),
                line[82:91].strip(),
                line[91:].strip(),
            ]
weather_stn.start = pd.to_datetime(weather_stn.start)
weather_stn.end =  pd.to_datetime(weather_stn.end)
weather

In [11]:
stn_info.iloc[0,1]

'716170 99999 JONQUIERE  QUE                CA      CWJO  +48.417 -071.133 +0128.0 19920106 20210219\n'

In [71]:
stn_info = pd.DataFrame(columns = ['row','text'])
stn_info = stn_info.append(pd.DataFrame({'row': [1], 'text': ['dkl']}))
stn_info

Unnamed: 0,row,text
0,1,dkl


In [3]:
if not os.path.exists(raw_path + '/LEB'):
    os.makedirs(raw_path + '/LEB')

year = "2010"
filename = "726116-94765-2010.gz"
noaa_ftp.cwd("pub/data/noaa/" + year)

In [40]:
os.getcwd()

'/home/steffen/UBC/personal/sapflow/src'

In [42]:
with open(raw_path + '/LEB/' + filename, 'wb+') as stn_gz:
        noaa_ftp.retrbinary('RETR ' + filename, stn_gz.write)

In [45]:
with gzip.open(raw_path + '/LEB/' + filename, 'rb') as stn_data:
    file_content = stn_data.read()


In [48]:
with gzip.open(raw_path + '/LEB/' + filename, mode='rt') as stn_data:
    for i, line in enumerate(stn_data):
        year_df.loc[i, 'datetime'] = pd.to_datetime(line[15:27])
        year_df.loc[i, 'airt'] = float(line[87:92])/10

NameError: name 'year_df' is not defined

Indiana Dunes National Lakeshore - INDU
726358-00384 -- Michigan City Airport

Southernmost Maple
724115-93757 -- Ingalls Field Airport

Divide Ridge
724117-63802 -- Lonesome Pine Airport

Boris
716170-99999 -- Bagotville (CAN-MIL)

Harvard Forest
725085-54756 -- Orange Municipal Airport

Dartmouth Organic Farm
726116-94765 -- Lebanon Municipal Airport 
