## Chihuahuan desert USHCN station data

JORNADA EXPERIMENTAL RANGE: USH00294426



In [1]:
# Importing part of our climate tools module
# If you don't have it see here: https://github.com/gremau/climtools
import climtools.get_ushcn as ushcn

# Import standard python modules for data and file handling
import pandas as pd
import numpy as np
import os

# Path to our USHCN data store we downloaded
# Later versions of the data have had issues...
ushcn_path = '/home/greg/data/rawdata/NCDC/ushcn_v2.5/ushcn.v2.5.5.20220609'

In [2]:
# Get the inventory file for USHCN stations
inventory = ushcn.get_stationsfile(os.path.dirname(ushcn_path))
inventory.head()

Retrieving /home/greg/data/rawdata/NCDC/ushcn_v2.5/ushcn-v2.5-stations.txt


Unnamed: 0,id,lat,lon,elev,state,name,comp1,comp2,comp3,utcoffset
0,USH00011084,31.0581,-87.0547,25.9,AL,BREWTON 3 SSE,------,------,------,6
1,USH00012813,30.5467,-87.8808,7.0,AL,FAIRHOPE 2 NE,------,------,------,6
2,USH00013160,32.8347,-88.1342,38.1,AL,GAINESVILLE LOCK,011694,------,------,6
3,USH00013511,32.7017,-87.5808,67.1,AL,GREENSBORO,------,------,------,6
4,USH00013816,31.87,-86.2542,132.0,AL,HIGHLAND HOME,------,------,------,6


In [3]:
# Choose a search term ('JORNADA', 'EL PASO', etc) and get the matching
# station id, name and latitude from the inventory
search = inventory[inventory['name'].str.contains('EL PASO')]
print(search)
studystn = search.id.values.tolist()[0]
studystnnames = search.name.values.tolist()[0]
studystnlat = search.lat.values.tolist()[0]

               id      lat       lon    elev state        name   comp1  \
1012  USH00412797  31.8111 -106.3758  1194.2    TX  EL PASO AP  ------   

       comp2   comp3  utcoffset  
1012  ------  ------          7  


In [4]:
# See functions, this will fetch precip and avg T, subset to site
# drop flags, and convert to correct units
tavg = ushcn.get_monthly_var('tavg', stationids=studystn, dpath=ushcn_path)
prcp = ushcn.get_monthly_var('prcp', stationids=studystn, dpath=ushcn_path)
# Then subset to years before 2022
tavg = tavg.loc[tavg.year < 2022,:]
prcp = prcp.loc[prcp.year < 2022,:]

Opening /home/greg/data/rawdata/NCDC/ushcn_v2.5/ushcn.v2.5.5.20220609/USH00412797.FLs.52j.tavg
Opening /home/greg/data/rawdata/NCDC/ushcn_v2.5/ushcn.v2.5.5.20220609/USH00412797.FLs.52j.prcp


In [5]:
# Here we are adding a station_name and latitude column and populating
tavg['station_name'] = ''
tavg['latitude'] = np.nan
prcp['station_name'] = ''
prcp['latitude'] = np.nan
for i in range(0, 1):
    print(str(i) + ' ' + studystn + ' ' + studystnnames + ' ' + str(studystnlat))
    tavg.loc[tavg.stationid==studystn, 'station_name'] = studystnnames
    tavg.loc[tavg.stationid==studystn, 'latitude'] = studystnlat
    prcp.loc[prcp.stationid==studystn, 'station_name'] = studystnnames
    prcp.loc[prcp.stationid==studystn, 'latitude'] = studystnlat

# Put together the T and PRCP dataframes into one
out = pd.concat([tavg, prcp])

0 USH00412797 EL PASO AP 31.8111


In [6]:
# Write data out to a file
out.to_csv('../data/ELPASO_USHCN_monthlyclimate.csv', index=False)
out.tail()

Unnamed: 0,stationid,date,year,month,day,variable,value,station_name,latitude
1166,USH00412797,2021-08-31,2021,aug,31,prcp,62.7,EL PASO AP,31.8111
1312,USH00412797,2021-09-30,2021,sep,30,prcp,12.0,EL PASO AP,31.8111
1458,USH00412797,2021-10-31,2021,oct,31,prcp,0.0,EL PASO AP,31.8111
1604,USH00412797,2021-11-30,2021,nov,30,prcp,8.6,EL PASO AP,31.8111
1750,USH00412797,2021-12-31,2021,dec,31,prcp,15.0,EL PASO AP,31.8111
