In [1]:
from osgeo import ogr
import gdal
from gdalconst import GDT_Float32
from gdalconst import GA_ReadOnly

import numpy as np
import pandas as pd

import glob

In [2]:
hucs = !ls ./data/WSC_basin_tiffs/HUC8*.tif

In [3]:
hucs

['./data/WSC_basin_tiffs/HUC8_10180001.tif',
 './data/WSC_basin_tiffs/HUC8_10180010.tif',
 './data/WSC_basin_tiffs/HUC8_10190001.tif',
 './data/WSC_basin_tiffs/HUC8_10190002.tif',
 './data/WSC_basin_tiffs/HUC8_10190004.tif',
 './data/WSC_basin_tiffs/HUC8_10190005.tif',
 './data/WSC_basin_tiffs/HUC8_10190006.tif',
 './data/WSC_basin_tiffs/HUC8_10190007.tif',
 './data/WSC_basin_tiffs/HUC8_11020001.tif',
 './data/WSC_basin_tiffs/HUC8_14010001.tif',
 './data/WSC_basin_tiffs/HUC8_14010002.tif',
 './data/WSC_basin_tiffs/HUC8_14010003.tif',
 './data/WSC_basin_tiffs/HUC8_14010004.tif',
 './data/WSC_basin_tiffs/HUC8_14020003.tif']

In [4]:
len(hucs)

14

In [9]:
tmp = hucs[0]

In [16]:
hc = tmp.split('/')[-1].split('.')[0].split('_')[-1]

In [21]:
hc[0:6]

'101800'

In [16]:
cells = []
huc4s = []
huc6s = []
huc8s = []

for huc in hucs:
    
    hc8 = huc.split('/')[-1].split('.')[0].split('_')[-1] # grab the HUC10 codefrom this
    hc4 = int(hc8[0:4])
    hc6 = int(hc8[0:6])
    
    raster = gdal.Open(huc, GA_ReadOnly) # open the raster
    r = np.array(raster.GetRasterBand(1).ReadAsArray(), dtype=np.float32) # load the raster into memory
    
    m,n = np.shape(r) # get the shape
    r = np.reshape(r,[m*n,1]) # reshape the raster
    r[r==-9999] = np.NaN
    r = r[np.isnan(r) == 0] # remove nans
    cells.extend(np.unique(r)) # grab the unique values and add to the cells index list
    
    n = len(r)
    
    huc8s.extend(np.repeat(hc8,n))
    huc6s.extend(np.repeat(hc6,n))
    huc4s.extend(np.repeat(hc4,n))

In [17]:
print len(huc6s)
print len(cells)

1507
1507


In [19]:
df =  pd.DataFrame({'idx':cells,'huc8':huc8s,'huc6':huc6s,'huc4':huc4s})
cells = df

In [20]:
cells.to_pickle('./data/WSC_HUC8_basin_index.df')

In [21]:
df = pd.read_hdf('./budyko_working.hdf','df')
df['idx2'] = df.idx.copy()

In [22]:
df = pd.merge(df,cells, on='idx', left_index=True)

In [23]:
df.dropna(subset=['idx'],inplace=True)

In [24]:
len(df)

1507

In [25]:
df.to_hdf('./budyko_working_WSC_HUC8.hdf','df',format='fixed',complevel=5,complib='bzip2',fletcher32=True)

In [2]:
tmp = pd.read_hdf('./budyko_working_WSC_HUC8.hdf','df')

In [4]:
print tmp.columns

Index([u'frlon', u'PETpenmanvic', u'P', u'ET', u'frlat', u'Sf', u'meanablseason', u'meandosnowfree', u'meanpeakswe', u'meanpeaket', u'meandopeaket', u'meanablslope', u'meandopeakswe', u'infilt', u'fs_active', u'l1_quartz', u'l3_Wpwp_FRACT', u'l3_bulkD', u'l3_init_moist', u'annual_prec', u'l2_soil_density', u'l2_Ksat', u'rough', u'l2_Wcr_FRACT', u'l2_depth', u'l2_quartz', u'l1_phi_s', u'l3_bubble', u'l1_expt', u'Ws', u'l1_Ksat', u'l1_soil_density', u'l1_resid_moisture', u'l2_phi_s', u'l2_init_moist', u'l2_expt', u'l1_init_moist', u'l1_depth', u'off_gmt', u'l3_expt', u'l3_depth', u'Dsmax', u'l1_Wpwp_FRACT', u'l3_resid_moisture', u'avg_T', u'l3_soil_density', u'l3_phi_s', u'dp', u'c', u'l2_bulkD', u'l2_bubble', u'l2_Wpwp_FRACT', u'l3_Ksat', u'l3_quartz', u'Ds', u'l1_bulkD', u'l1_bubble', u'l2_resid_moisture', u'elev', u'snow_rough', u'l1_Wcr_FRACT', u'l3_Wcr_FRACT', u'slope', u'rsquared_adj', u'intercept', u'rsquared', u'pvalue', u'w_flux', u'w_flux2', u'w_flux3', u'snowmelt', u'Sf2', u'S