## Download packages

In [1]:
import pandas as pd
import numpy as np
import os
import re


from datetime import datetime
import math
import time

import scipy.stats as stats
from sklearn.neighbors import KNeighborsRegressor
import glob

import nltk
from string import punctuation
from nltk.corpus import stopwords  
from nltk.stem import WordNetLemmatizer
import nltk
#nltk.download('punkt')
  
# Create WordNetLemmatizer object 
word_lemm_obj = WordNetLemmatizer() 
stop_words = set(stopwords.words('english'))  

import warnings
warnings.filterwarnings('ignore')

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

[nltk_data] Downloading package punkt to /Users/aparajita/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Load data
**Note:** when loading files, make sure your files are located in a directory that is correctly referenced within the "read_csv" pandas function, depending on how your local computer/directory is set up.

In [2]:
# Site data
site = pd.read_csv('Input Files/site.csv',
                  dtype={'siteiid': 'str'})

# Siteobs data
siteobs = pd.read_csv('Input Files/siteobs.csv', 
                     dtype={'siteiidref': 'str', 'siteobsiid': 'str'})

# Pedon data
pedon = pd.read_csv('Input Files/pedon.csv',
                   dtype={'siteobsiid': 'str'})

# -------- PART 1 --


# Site parent material data
sitepm = pd.read_csv('Input Files/sitepm.csv',
                         dtype = {'siteiidref': 'str'})
# Geomorphic data
geomorfeat = pd.read_csv('Input Files/geomorfeat.csv', 
                         dtype = {'geomfiidref': 'str', 'siteiidref': 'str', 'sitegeomdiid':'str'})
# NCSS Pedon Lab data
ncsspedonlabdata = pd.read_csv('Input Files/ncsspedonlabdata.csv', 
                              dtype = {'peiidref': 'str', 'ncsspedonlabdataiid': 'str', 'grpiidref':'str'})
# NCSS Layer Lab data
ncsslayerlabdata = pd.read_csv('Input Files/ncsslayerlabdata.csv',
                              dtype = {'ncsspedonlabdataiidref': 'str', 'ncsslayerlabdataiid': 'str'})
# Climate data
site_prism = pd.read_csv('Input Files/SITE_PRISM.csv', dtype = {'siteiid': 'str'})

# Landsat satellite data
satellite = pd.read_csv('Input Files/pedon_satellite.csv',
                              dtype = {'siteiid': 'str', 'siteobsiid': 'str', 'peiid': 'str'})

#phorizon
phorizon_df_org = pd.read_csv('Input Files/phorizon.csv'
                          ,index_col=0)

# Topographical data
EcoReg_Key_df = pd.read_csv('Input Files/EcoReg_Key.csv')
geology_legend_df = pd.read_csv('Input Files/geology_legend.csv')
pedons_topo_df = pd.read_csv('Input Files/pedons_topo.csv')
surfacegeo = pd.read_csv('Input Files/surfacegeology_legend_final.csv')
topo_df_rest = pd.read_csv('Input Files/siteiid_lat_long_topo_20210319.csv')

# Vegetation data
windbreakrowdata = pd.read_csv('Input Files/windbreakrowdata.txt', delimiter="|")
plottreeinventory = pd.read_csv('Input Files/plottreeinventory.txt', delimiter="|")
plotplantinventory = pd.read_csv('Input Files/plotplantinventory.txt', delimiter="|")
plottreesiteindexsummary = pd.read_csv('Input Files/plottreesiteindexsummary.txt', delimiter="|")
vegplot = pd.read_csv('Input Files/vegplot.txt', delimiter="|")
plant = pd.read_csv('Input Files/plant.txt', delimiter="|", encoding='latin-1')

In [3]:
print(pedon.shape)
pedon.head()

(560930, 68)


Unnamed: 0.1,Unnamed: 0,X,siteobsiid,upedonid,pedrecorig,taxonname,taxonname_,localphase,taxclname,taxclname_,taxonkind,taxonkind_,pedontype,pedonpurpo,pedonunit,labdatades,relexpsize,relexpuom,earthcovki,earthcov_1,erocl,labsourcei,pedlabsamp,tsectiidre,tsectstopn,tsectinter,rcapointnu,soilreplic,azimuthfro,distancefr,rectangula,distance_1,pedodermcl,pedodermco,biolcrustt,biolcrus_1,physcrusts,crustdevcl,rangevegca,rangeveg_1,forestover,forestunde,forestgrou,forestgr_1,agronomicf,otherfeatu,currentcro,littercove,residuedes,pedonhydri,pecertstat,peqcstatus,peqastatus,saspipelen,saspipel_1,saspipel_2,sascoreset,sascorelen,sascoresto,sasexposur,sasexpos_1,pedbiidref,grpiidref,objwlupdat,objuseriid,recwlupdat,recuseriid,peiid
0,8,8,14517,93KS121030,Converted from PDP 3.x,Summit,3,,"Fine, smectitic, thermic Vertic Argiudolls",1,Series,3,Correlates to named soil,Full pedon description,0,No,0,,Crop cover,,,,,7571,10,6.0,0,0,0,0.0,0,0.0,,0,,,,,,,,,,,,,,0,,No,,,,0.0,0.0,0.0,0.0,0.0,,,,139,19808,2013-04-02,2542,2013-04-02,2542,14549
1,9,9,14518,93KS121031,Converted from PDP 3.x,Eram,3,,"Fine, smectitic, thermic Aquic Argiudolls",1,Series,3,Map unit inclusion,Full pedon description,0,No,0,,Grass/herbaceous cover,,,,,4182,1,27.4,0,0,0,0.0,0,0.0,,0,,,,,,,,,,,,,,0,,No,,,,0.0,0.0,0.0,0.0,0.0,,,,139,19808,2013-04-02,2542,2013-04-02,2542,14550
2,18,18,14527,93KS121040,Converted from PDP 3.x,Lula,3,,"Fine, smectitic, thermic Vertic Paleudolls",1,Series,3,Taxadjunct to the series,Full pedon description,0,No,0,,Grass/herbaceous cover,,,,,4182,10,27.4,0,0,0,0.0,0,0.0,,0,,,,,,,,,,,,,,0,,No,,,,0.0,0.0,0.0,0.0,0.0,,,,139,19808,2013-04-02,2542,2013-04-02,2542,14559
3,19,19,14528,93KS121041,Converted from PDP 3.x,Bates,3,,"Fine-loamy, sesquic Typic Argiudolls",1,Series,3,Map unit inclusion,Full pedon description,0,No,0,,Grass/herbaceous cover,,,,,5209,1,7.5,0,0,0,0.0,0,0.0,,0,,,,,,,,,,,,,,0,,No,,,,0.0,0.0,0.0,0.0,0.0,,,,139,19808,2013-04-02,2542,2013-04-02,2542,14560
4,28,28,14537,93KS121050,Converted from PDP 3.x,Summit,3,,"Fine, smectitic, thermic Vertic Argiudolls",1,Series,3,Map unit inclusion,Full pedon description,0,No,0,,Grass/herbaceous cover,,,,,5209,10,7.5,0,0,0,0.0,0,0.0,,0,,,,,,,,,,,,,,0,,No,,,,0.0,0.0,0.0,0.0,0.0,,,,139,19808,2013-04-02,2542,2013-04-02,2542,14569


## Cleaning Site + Siteobs + Pedon

site and siteobs are joined by **siteiid** on the left and siteiidref on the right

siteobs and pedon are joined by **siteobsiid** on the left and **siteobsiid** on the right

In [4]:
# define missing values function to check datasets for how many missing values there are
def missing_values(df):
    percent_missing = df.isnull().sum() * 100 / len(df)
    missing_value_df = pd.DataFrame({'total # of rows': len(df),
                                      'total # of NaN': df.isnull().sum(),
                                      'percent missing': percent_missing})
    missing_value_df.sort_values('percent missing', inplace=True, ascending=False)
    return missing_value_df

### Site

In [5]:
# preview of the raw data
site.head()

Unnamed: 0.1,Unnamed: 0,X,usiteid,latdegrees,latminutes,latseconds,latdir,longdegree,longminute,longsecond,longdir,horizdatnm,plsssectio,plsstownsh,plssrange,plssmeridi,utmzone,utmnorthin,utmeasting,geocoordso,elev,geomposhil,geomposmnt,geompostrc,geomposfla,hillslopep,geomslopes,slope,aspect,slopelenus,slopelenup,shapeacros,shapedown,slopecompl,locphysnm,siteksatcl,siteksat_1,drainagecl,runoff,drainagepa,pmgroupnam,pmgroupn_1,climstaid,climstanm,climstatyp,ffd,map,reannualpr,airtempa,soiltempa,airtemps,soiltemps,airtempw,soiltempw,benchmarks,flodfreqcl,floddurcl,flodmonthb,pondfreqcl,ponddurcl,pondmonthb,wtabledur,latstddeci,longstddec,gpspositio,gpspdop,elevcorrec,sdbiidref,siteiid
0,8,8,93KS121030,38,41,0.0,North,94,54,0.0,West,North American Datum of 1983,6,15 S,23 E,,0,0.0,0.0,Estimated from other source,290.0,,,,,Backslope,,4.0,225,0,0.0,Linear,Convex,,,,,Moderately well drained,,,clayey residuum weathered from clayey shale,1,,,,0,19,0,0.0,0.0,0.0,0.0,0.0,0.0,0,,,,,,,0,38.683333,-94.9,0.0,0,0.0,139,14572
1,9,9,93KS121031,38,42,30.27,North,94,55,39.08,West,North American Datum of 1983,35,15 S,23 E,,0,0.0,0.0,,320.0,,,,,Shoulder,,2.0,45,0,0.0,Linear,Convex,,,,,Moderately well drained,,,clayey residuum weathered from clayey shale,1,,,,0,19,0,0.0,0.0,0.0,0.0,0.0,0.0,0,,,,,,,0,38.708408,-94.927522,0.0,0,0.0,139,14573
2,18,18,93KS121040,38,42,16.52,North,94,55,49.95,West,North American Datum of 1983,35,15 S,23 E,,0,0.0,0.0,,320.0,,,,,Shoulder,,3.0,225,0,0.0,Linear,Convex,,,,,Moderately well drained,,,silty and clayey residuum weathered from limes...,1,,,,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,,,,,,,0,38.704589,-94.930542,0.0,0,0.0,139,14582
3,19,19,93KS121041,38,42,53.59,North,94,57,45.98,West,North American Datum of 1983,27,15 S,22 E,,0,0.0,0.0,,290.0,,,,,Footslope,,4.0,0,0,0.0,Linear,Linear,,,,,Well drained,,,sandstone and shale,1,,,,0,19,0,0.0,0.0,0.0,0.0,0.0,0.0,0,,,,,,,0,38.714886,-94.962772,0.0,0,0.0,139,14583
4,28,28,93KS121050,38,42,53.48,North,94,57,52.53,West,North American Datum of 1983,27,15 S,22 E,,0,0.0,0.0,,290.0,,,,,Footslope,,3.0,0,0,0.0,Linear,Linear,,,,,Moderately well drained,,,silty and clayey residuum weathered from claye...,1,,,,0,19,0,0.0,0.0,0.0,0.0,0.0,0.0,0,,,,,,,0,38.714856,-94.964592,0.0,0,0.0,139,14592


In [6]:
# displays missing values
missing_values(site)

Unnamed: 0,total # of rows,total # of NaN,percent missing
siteksat_1,560930,558811,99.622235
pondmonthb,560930,558495,99.565899
siteksatcl,560930,557379,99.366944
flodmonthb,560930,556686,99.243399
ponddurcl,560930,553842,98.736384
drainagepa,560930,552609,98.516571
climstatyp,560930,544832,97.130123
geomposmnt,560930,544599,97.088585
floddurcl,560930,541494,96.53504
climstaid,560930,539850,96.241955


In [7]:
# Drop the columns Unnamed: 0 and X
site = site.drop(columns=['Unnamed: 0', 'X'])

# drop duplicate rows except first occurence based on all columns
site = site.drop_duplicates()

# drop columns that have more than 70% missing values - 19 columns in total
site = site.drop(columns=["slopecompl","pondfreqcl","geomposhil","runoff","geomslopes","locphysnm","geompostrc",
                             "geomposfla","climstanm","climstaid","floddurcl","geomposmnt","climstatyp",
                          "drainagepa","ponddurcl","flodmonthb","siteksatcl","pondmonthb","siteksat_1",])

In [8]:
site.shape

(546262, 48)

### Siteobs

In [9]:
# preview of the raw data
siteobs.head()

Unnamed: 0.1,Unnamed: 0,X,seqnum,obsdate,obsdatekin,datacollec,photoid,swaterkind,swaterdept,hydrologys,geomicrore,geommicele,geommicpat,ecostateid,ecostatena,commphasei,commphasen,plantassoc,earthcovki,earthcov_1,resourcere,bareareama,pedodermcl,pedodermco,biolcrustt,biolcrus_1,physcrusts,crustdevcl,soilredist,exposedsoi,localdistu,localdis_1,drainedfla,beddingfla,plantation,forestrota,yldstudyid,currweathc,currairtem,tidalperio,bottomtype,saswaterte,saswater_1,saswaterph,saswater_2,phdetermet,sasdissolv,sasdisso_1,saswatersa,saswater_3,siteiidref,siteobsiid
0,8,8,0,1993-06-30,Actual Site Observation Date,,8,,0,,,0,,0,,0,,,,,,0,,0,,,,,,0,0,,0,0,0,,,,0,,,0,0,0.0,0.0,,0.0,0.0,0.0,0.0,14572,14517
1,9,9,0,1993-07-15,Actual Site Observation Date,,3,,0,,,0,,0,,0,,,,,,0,,0,,,,,,0,0,,0,0,0,,,,0,,,0,0,0.0,0.0,,0.0,0.0,0.0,0.0,14573,14518
2,18,18,0,1993-07-15,Actual Site Observation Date,,3,,0,,,0,,0,,0,,,,,,0,,0,,,,,,0,0,,0,0,0,,,,0,,,0,0,0.0,0.0,,0.0,0.0,0.0,0.0,14582,14527
3,19,19,0,1993-07-13,Actual Site Observation Date,,2,,0,,,0,,0,,0,,,,,,0,,0,,,,,,0,0,,0,0,0,,,,0,,,0,0,0.0,0.0,,0.0,0.0,0.0,0.0,14583,14528
4,28,28,0,1993-07-13,Actual Site Observation Date,,2,,0,,,0,,0,,0,,,,,,0,,0,,,,,,0,0,,0,0,0,,,,0,,,0,0,0.0,0.0,,0.0,0.0,0.0,0.0,14592,14537


In [10]:
# displays missing values
missing_values(siteobs)

Unnamed: 0,total # of rows,total # of NaN,percent missing
forestrota,560930,560717,99.962027
commphasen,560930,560658,99.951509
phdetermet,560930,560620,99.944735
bottomtype,560930,560415,99.908188
tidalperio,560930,560310,99.889469
localdis_1,560930,560294,99.886617
biolcrus_1,560930,560179,99.866115
ecostatena,560930,559999,99.834026
crustdevcl,560930,559979,99.83046
soilredist,560930,559966,99.828143


In [11]:
# Drop the columns Unnamed: 0 and X
siteobs = siteobs.drop(columns=['Unnamed: 0', 'X'])

# drop duplicate rows
siteobs = siteobs.drop_duplicates()

# drop columns that have more than 70% missing values - 25 columns in total
siteobs = siteobs.drop(columns=['forestrota','localdis_1','phdetermet','commphasen','bottomtype',
                                'tidalperio','biolcrus_1','ecostatena','crustdevcl','soilredist',
                                'physcrusts','resourcere','biolcrustt','geommicpat','pedodermcl',
                                'yldstudyid','hydrologys','geomicrore','swaterkind','currweathc',
                                'plantassoc','earthcov_1','earthcovki','datacollec','photoid'])

In [12]:
siteobs.shape

(546761, 25)

### Pedon

In [13]:
# preview of the raw data
pedon.head()

Unnamed: 0.1,Unnamed: 0,X,siteobsiid,upedonid,pedrecorig,taxonname,taxonname_,localphase,taxclname,taxclname_,taxonkind,taxonkind_,pedontype,pedonpurpo,pedonunit,labdatades,relexpsize,relexpuom,earthcovki,earthcov_1,erocl,labsourcei,pedlabsamp,tsectiidre,tsectstopn,tsectinter,rcapointnu,soilreplic,azimuthfro,distancefr,rectangula,distance_1,pedodermcl,pedodermco,biolcrustt,biolcrus_1,physcrusts,crustdevcl,rangevegca,rangeveg_1,forestover,forestunde,forestgrou,forestgr_1,agronomicf,otherfeatu,currentcro,littercove,residuedes,pedonhydri,pecertstat,peqcstatus,peqastatus,saspipelen,saspipel_1,saspipel_2,sascoreset,sascorelen,sascoresto,sasexposur,sasexpos_1,pedbiidref,grpiidref,objwlupdat,objuseriid,recwlupdat,recuseriid,peiid
0,8,8,14517,93KS121030,Converted from PDP 3.x,Summit,3,,"Fine, smectitic, thermic Vertic Argiudolls",1,Series,3,Correlates to named soil,Full pedon description,0,No,0,,Crop cover,,,,,7571,10,6.0,0,0,0,0.0,0,0.0,,0,,,,,,,,,,,,,,0,,No,,,,0.0,0.0,0.0,0.0,0.0,,,,139,19808,2013-04-02,2542,2013-04-02,2542,14549
1,9,9,14518,93KS121031,Converted from PDP 3.x,Eram,3,,"Fine, smectitic, thermic Aquic Argiudolls",1,Series,3,Map unit inclusion,Full pedon description,0,No,0,,Grass/herbaceous cover,,,,,4182,1,27.4,0,0,0,0.0,0,0.0,,0,,,,,,,,,,,,,,0,,No,,,,0.0,0.0,0.0,0.0,0.0,,,,139,19808,2013-04-02,2542,2013-04-02,2542,14550
2,18,18,14527,93KS121040,Converted from PDP 3.x,Lula,3,,"Fine, smectitic, thermic Vertic Paleudolls",1,Series,3,Taxadjunct to the series,Full pedon description,0,No,0,,Grass/herbaceous cover,,,,,4182,10,27.4,0,0,0,0.0,0,0.0,,0,,,,,,,,,,,,,,0,,No,,,,0.0,0.0,0.0,0.0,0.0,,,,139,19808,2013-04-02,2542,2013-04-02,2542,14559
3,19,19,14528,93KS121041,Converted from PDP 3.x,Bates,3,,"Fine-loamy, sesquic Typic Argiudolls",1,Series,3,Map unit inclusion,Full pedon description,0,No,0,,Grass/herbaceous cover,,,,,5209,1,7.5,0,0,0,0.0,0,0.0,,0,,,,,,,,,,,,,,0,,No,,,,0.0,0.0,0.0,0.0,0.0,,,,139,19808,2013-04-02,2542,2013-04-02,2542,14560
4,28,28,14537,93KS121050,Converted from PDP 3.x,Summit,3,,"Fine, smectitic, thermic Vertic Argiudolls",1,Series,3,Map unit inclusion,Full pedon description,0,No,0,,Grass/herbaceous cover,,,,,5209,10,7.5,0,0,0,0.0,0,0.0,,0,,,,,,,,,,,,,,0,,No,,,,0.0,0.0,0.0,0.0,0.0,,,,139,19808,2013-04-02,2542,2013-04-02,2542,14569


In [14]:
# displays missing values missing_values(pedon)
missing_values(pedon)

Unnamed: 0,total # of rows,total # of NaN,percent missing
otherfeatu,560930,560887,99.992334
agronomicf,560930,560606,99.942239
sascoresto,560930,560552,99.932612
residuedes,560930,560548,99.931899
sasexpos_1,560930,560516,99.926194
crustdevcl,560930,560375,99.901057
sasexposur,560930,560362,99.89874
biolcrus_1,560930,560306,99.888756
biolcrustt,560930,560140,99.859162
rangeveg_1,560930,560068,99.846327


In [15]:
# Drop the columns Unnamed: 0 and X
pedon = pedon.drop(columns=['Unnamed: 0', 'X'])

# drop duplicate rows
pedon = pedon.drop_duplicates()

# drop columns that have more than 70% missing values - 25 columns in total
pedon = pedon.drop(columns=['otherfeatu','agronomicf','sascoresto','residuedes','sasexpos_1',
                               'crustdevcl','sasexposur','biolcrus_1','biolcrustt','rangeveg_1',
                               'physcrusts','forestgr_1','rangevegca','forestgrou','forestunde',
                               'pedodermcl','forestover','currentcro','localphase','relexpuom',
                               'peqastatus','labsourcei','pedlabsamp','pecertstat','peqcstatus',
                               'erocl'])

In [16]:
pedon.shape

(560930, 40)

## Join site + siteobs + pedon

In [17]:
# left join site and siteobs
site_siteobs = pd.merge(site, siteobs, how='left', left_on=['siteiid'], right_on=['siteiidref'], suffixes=('_site','_siteobs'))

# left join site_siteobs and pedon
site_siteobs_ped = pd.merge(site_siteobs, pedon, how='left', left_on=['siteobsiid'], right_on=['siteobsiid'], suffixes=('_site_siteobs','_pedon'))


In [18]:
# Additional variables to drop because they are either unnecessary in our analysis
# or they may have recorded missing values as 0 instead of NaN, leading to misinformation

drop_var_list = ['usiteid','latdegrees','latminutes','latseconds','latdir','longdegree','longminute','longsecond',
                 'longdir','utmzone','geocoordso','slopelenus','slopelenup','pmgroupn_1','ffd','map','reannualpr',
                 'airtempa','soiltempa','airtemps','soiltemps','airtempw','soiltempw','wtabledur','gpspositio',
                 'gpspdop','elevcorrec','seqnum','swaterdept','geommicele','bareareama','exposedsoi','localdistu',
                 'currairtem','saswaterte','saswater_1','saswaterph','saswater_2','sasdissolv','sasdisso_1',
                 'saswatersa','saswater_3','taxonname_','taxclname_','taxonkind_','pedonunit','labdatades',
                 'relexpsize','tsectstopn','tsectinter','rcapointnu','azimuthfro','distancefr','rectangula',
                 'distance_1','littercove','saspipelen','saspipel_1','saspipel_2','sascoreset','sascorelen',
                 'objwlupdat','recwlupdat']

ssp_final = site_siteobs_ped.drop(columns = drop_var_list)

In [19]:
ssp_final.shape

(560930, 49)

## Checkpoint - download data

In [20]:
# preview of the joined data
ssp_final.head()

Unnamed: 0,horizdatnm,plsssectio,plsstownsh,plssrange,plssmeridi,utmnorthin,utmeasting,elev,hillslopep,slope,aspect,shapeacros,shapedown,drainagecl,pmgroupnam,benchmarks,flodfreqcl,latstddeci,longstddec,sdbiidref,siteiid,obsdate,obsdatekin,ecostateid,commphasei,pedodermco_site_siteobs,drainedfla,beddingfla,plantation,siteiidref,siteobsiid,upedonid,pedrecorig,taxonname,taxclname,taxonkind,pedontype,pedonpurpo,earthcovki,earthcov_1,tsectiidre,soilreplic,pedodermco_pedon,pedonhydri,pedbiidref,grpiidref,objuseriid,recuseriid,peiid
0,North American Datum of 1983,6,15 S,23 E,,0.0,0.0,290.0,Backslope,4.0,225,Linear,Convex,Moderately well drained,clayey residuum weathered from clayey shale,0,,38.683333,-94.9,139,14572,1993-06-30,Actual Site Observation Date,0,0,0,0,0,0,14572,14517,93KS121030,Converted from PDP 3.x,Summit,"Fine, smectitic, thermic Vertic Argiudolls",Series,Correlates to named soil,Full pedon description,Crop cover,,7571,0,0,No,139,19808,2542,2542,14549
1,North American Datum of 1983,35,15 S,23 E,,0.0,0.0,320.0,Shoulder,2.0,45,Linear,Convex,Moderately well drained,clayey residuum weathered from clayey shale,0,,38.708408,-94.927522,139,14573,1993-07-15,Actual Site Observation Date,0,0,0,0,0,0,14573,14518,93KS121031,Converted from PDP 3.x,Eram,"Fine, smectitic, thermic Aquic Argiudolls",Series,Map unit inclusion,Full pedon description,Grass/herbaceous cover,,4182,0,0,No,139,19808,2542,2542,14550
2,North American Datum of 1983,35,15 S,23 E,,0.0,0.0,320.0,Shoulder,3.0,225,Linear,Convex,Moderately well drained,silty and clayey residuum weathered from limes...,0,,38.704589,-94.930542,139,14582,1993-07-15,Actual Site Observation Date,0,0,0,0,0,0,14582,14527,93KS121040,Converted from PDP 3.x,Lula,"Fine, smectitic, thermic Vertic Paleudolls",Series,Taxadjunct to the series,Full pedon description,Grass/herbaceous cover,,4182,0,0,No,139,19808,2542,2542,14559
3,North American Datum of 1983,27,15 S,22 E,,0.0,0.0,290.0,Footslope,4.0,0,Linear,Linear,Well drained,sandstone and shale,0,,38.714886,-94.962772,139,14583,1993-07-13,Actual Site Observation Date,0,0,0,0,0,0,14583,14528,93KS121041,Converted from PDP 3.x,Bates,"Fine-loamy, sesquic Typic Argiudolls",Series,Map unit inclusion,Full pedon description,Grass/herbaceous cover,,5209,0,0,No,139,19808,2542,2542,14560
4,North American Datum of 1983,27,15 S,22 E,,0.0,0.0,290.0,Footslope,3.0,0,Linear,Linear,Moderately well drained,silty and clayey residuum weathered from claye...,0,,38.714856,-94.964592,139,14592,1993-07-13,Actual Site Observation Date,0,0,0,0,0,0,14592,14537,93KS121050,Converted from PDP 3.x,Summit,"Fine, smectitic, thermic Vertic Argiudolls",Series,Map unit inclusion,Full pedon description,Grass/herbaceous cover,,5209,0,0,No,139,19808,2542,2542,14569


In [21]:
ssp_final.to_csv('Saved Datasets/ssp_final.csv', index=False)

## **** End of Part 1 - Saving merged site, site observations and pedon data ****

In [23]:
ssp_final= pd.read_csv('Saved Datasets/ssp_final.csv')
#create a mapping for pedon (peiid, peiidref,siteiid,siteiidref,siteobsiid)
site_var_list = ['siteobsiid','peiidref','peiid','siteiid','siteiidref']

site_pedon_df = ssp_final
site_pedon_df['peiidref'] = site_pedon_df['peiid'].copy()
site_map_ids = site_pedon_df[site_var_list]
site_map_ids.head()

Unnamed: 0,siteobsiid,peiidref,peiid,siteiid,siteiidref
0,14517,14549,14549,14572,14572
1,14518,14550,14550,14573,14573
2,14527,14559,14559,14582,14582
3,14528,14560,14560,14583,14583
4,14537,14569,14569,14592,14592


In [25]:
siteobs_from_siteobs_data = site_pedon_df.siteobsiid.unique().tolist()
pedons_from_phorizon_data = site_pedon_df['peiidref'].unique().tolist()

In [26]:
site_map_ids.shape
len(pedons_from_phorizon_data)

560930

## Cleaning sitepm + geomorph + ncss + climate + satellite

### sitepm

In [27]:
# preview of the raw data
sitepm.head()

Unnamed: 0,OID,seqnum,pmorder,pmdept,pmdepb,pmmodifier,pmgenmod,pmkind,pmorigin,pmweathering,siteiidref,sitepmiid
0,-1,1.0,,,,Clayey,,Residuum,Clayey shale,,14565,134714
1,-1,1.0,,,,Clayey,,Residuum,Clayey shale,,14566,134715
2,-1,1.0,,,,Clayey,,Residuum,Clayey shale,,14567,134716
3,-1,1.0,,,,Clayey,,Residuum,Clayey shale,,14568,134717
4,-1,1.0,,,,Clayey,,Residuum,Clayey shale,,14569,134718


In [28]:
# displays missing values
missing_values(sitepm)

Unnamed: 0,total # of rows,total # of NaN,percent missing
pmweathering,680326,628408,92.368659
pmgenmod,680326,608760,89.480631
pmdept,680326,583577,85.779024
pmdepb,680326,581159,85.423606
seqnum,680326,505161,74.252785
pmmodifier,680326,481913,70.835599
pmorder,680326,443626,65.207856
pmorigin,680326,428626,63.003031
pmkind,680326,26035,3.826842
OID,680326,0,0.0


In [29]:
# drop unnecessary variables
sitepm = sitepm.drop(columns=['OID', 'seqnum', 'sitepmiid'])

# drop duplicate rows
sitepm = sitepm.drop_duplicates()

# drop columns that has more than 70% missing values
sitepm_final = sitepm.drop(columns = ['pmweathering', 'pmgenmod', 'pmdept', 'pmdepb'])

# drop any duplicated rows
sitepm_final = sitepm_final.drop_duplicates()

# drop any rows where pmorder,pmmodifier,pmkind, pmorigin are all NaN
sitepm_final = sitepm_final[~sitepm_final[['pmorder','pmmodifier','pmkind','pmorigin']].isnull().all(axis=1)].reset_index(drop=True)

# read in siteiidref as string
sitepm_final['siteiidref'] = sitepm_final['siteiidref'].astype('str')


In [30]:
sitepm_final.shape

(633187, 5)

### geomorph

In [31]:
# preview of the raw data
geomorfeat.head()

Unnamed: 0.1,Unnamed: 0,geomfiidref,siteiidref,sitegeomdiid,Feature.Type,Feature.Name..singular.,Feature.Name..plural.,Obsolete.,Field.Code
0,2,335,14565,19259,Landscape,upland,uplands,Unchecked,UP
1,4,335,14566,19261,Landscape,upland,uplands,Unchecked,UP
2,6,335,14567,19263,Landscape,upland,uplands,Unchecked,UP
3,8,335,14568,19265,Landscape,upland,uplands,Unchecked,UP
4,10,335,14569,19267,Landscape,upland,uplands,Unchecked,UP


In [32]:
# drop unnecessary variables
geomorfeat = geomorfeat.drop(columns=['Unnamed: 0', 'Obsolete.', 'Feature.Name..plural.', 'Field.Code'])

# drop duplicate rows
geomorfeat = geomorfeat.drop_duplicates()

# drop rows where siteiidref is NaN
geomorfeat = geomorfeat[geomorfeat['siteiidref'].notna()]

# one hot encoding -- create new columns called "Features" where any feature that shows up more than 8000 times
# will be listed, otherwise it will be listed as "Other"
# create 27 feature variables + 1 "Other" variable
features = geomorfeat['Feature.Name..singular.'].value_counts().to_frame()
features = features[features['Feature.Name..singular.'] > 8000].reset_index()
features = features.drop(columns = 'Feature.Name..singular.')
features = features.rename(columns={"index": "Features"})

geomorfeat_v2 = pd.merge(geomorfeat, features, how='left', left_on=['Feature.Name..singular.'], 
                         right_on=['Features'],suffixes=('_geom','_feature'))

# fill in NaN in column Features with "Other"
values = {'Features': 'other'}
geomorfeat_v2 = geomorfeat_v2.fillna(value=values)

geomorfeat_v2_1 = geomorfeat_v2[['siteiidref']].join(pd.get_dummies(geomorfeat_v2['Features']).add_prefix('Feature_')).groupby('siteiidref').max().reset_index()
geomorfeat_v2_2 = geomorfeat_v2[['siteiidref']].join(pd.get_dummies(geomorfeat_v2['Feature.Type']).add_prefix('Feature_Type_')).groupby('siteiidref').max().reset_index()

geomorfeat_final = pd.merge(geomorfeat_v2_2, geomorfeat_v2_1, how='inner', 
                            left_on=['siteiidref'], right_on=['siteiidref'])


In [33]:
geomorfeat_final.shape

(446982, 33)

In [34]:
geomorfeat_final.head()

Unnamed: 0,siteiidref,Feature_Type_Anthropogenic Feature,Feature_Type_Landform,Feature_Type_Landscape,Feature_Type_Microfeature,Feature_alluvial fan,Feature_coastal plain,Feature_drainageway,Feature_flood plain,Feature_foothills,Feature_ground moraine,Feature_hill,Feature_hills,Feature_hillslope,Feature_interfluve,Feature_intermontane basin,Feature_lake plain,Feature_mountain,Feature_mountain slope,Feature_mountains,Feature_other,Feature_outwash plain,Feature_piedmont,Feature_plain,Feature_plains,Feature_plateau,Feature_ridge,Feature_river valley,Feature_stream terrace,Feature_terrace,Feature_till plain,Feature_upland,Feature_valley
0,10000,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0
1,100004,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,100005,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
3,100006,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,100007,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### NCSS data

#### ncss pedon

In [35]:
# preview of the raw data
ncsspedonlabdata.head()

Unnamed: 0,OID,pedlabsampnum,peiidref,psctopdepth,psctopdepth_s,pscbotdepth,pscbotdepth_s,noncarbclaywtavg,claytotwtavg,le0to100,wf0175wtavgpsc,volfractgt2wtavg,cec7clayratiowtavg,labdatasheeturl,ncsspedbiidref,grpiidref,objwlupdated,objuseriidref,recwlupdated,recuseriidref,ncsspedonlabdataiid
0,-1,93KS121474,14593,43.0,3,93.0,3,47.0,47.0,,0.0,,,http://ncsslabdatamart.sc.egov.usda.gov/rptExe...,105,19021,10/25/2018 20:02:06,1237.0,10/25/2018 20:02:06,1237.0,41020
1,-1,93KS121474,14593,43.0,3,93.0,3,47.0,47.0,,0.0,,,http://ncsslabdatamart.sc.egov.usda.gov/rptExe...,105,19021,10/25/2018 20:02:06,1237.0,10/25/2018 20:02:06,1237.0,41020
2,-1,93KS121474,14593,43.0,3,93.0,3,47.0,47.0,,0.0,,,http://ncsslabdatamart.sc.egov.usda.gov/rptExe...,105,19021,10/25/2018 20:02:06,1237.0,10/25/2018 20:02:06,1237.0,41020
3,-1,93KS121474,14593,43.0,3,93.0,3,47.0,47.0,,0.0,,,http://ncsslabdatamart.sc.egov.usda.gov/rptExe...,105,19021,10/25/2018 20:02:06,1237.0,10/25/2018 20:02:06,1237.0,41020
4,-1,93KS121474,14593,43.0,3,93.0,3,47.0,47.0,,0.0,,,http://ncsslabdatamart.sc.egov.usda.gov/rptExe...,105,19021,10/25/2018 20:02:06,1237.0,10/25/2018 20:02:06,1237.0,41020


In [36]:
# displays missing values
missing_values(ncsspedonlabdata)

Unnamed: 0,total # of rows,total # of NaN,percent missing
le0to100,350138,312605,89.280512
volfractgt2wtavg,350138,237377,67.795269
cec7clayratiowtavg,350138,200635,57.301692
wf0175wtavgpsc,350138,160519,45.844496
noncarbclaywtavg,350138,154460,44.114035
claytotwtavg,350138,154448,44.110608
pscbotdepth,350138,137328,39.221107
psctopdepth,350138,137197,39.183693
recuseriidref,350138,11778,3.363817
objuseriidref,350138,11764,3.359818


In [37]:
# drop unnecessary variables
ncsspedonlabdata = ncsspedonlabdata.drop(columns = ['OID', 'psctopdepth_s', 'pscbotdepth_s', 
                                                    'labdatasheeturl', 'objwlupdated', 'objuseriidref', 
                                                    'recwlupdated','recuseriidref'])
# drop duplicate rows 
ncsspedonlabdata = ncsspedonlabdata.drop_duplicates()

# finalize data by resetting index
ncsspedonlabdata_final = ncsspedonlabdata.reset_index(drop=True)

In [38]:
ncsspedonlabdata_final.shape

(57585, 13)

#### ncss layer

In [39]:
# preview of the raw data
ncsslayerlabdata.head()

Unnamed: 0,OID,ncsspedonlabdataiidref,layerseqnum,labsampnum,hzdept,hzdepb,layertype,hzname,hznameoriginal,stratextsflag,moistprepstate,texcl,sandvcmeasured,sandcomeasured,sandmedmeasured,sandfinemeasured,sandvfmeasured,sandtotmeasured,siltcomeasured,siltfinemeasured,silttotmeasured,claycarbmeasured,clayfinemeasured,claytotmeasured,carbonorganicpctmeasured,carbontotalpctmeasured,ompctest,fiberrubbedpct,fiberunrubbedpct,fragwt25,fragwt520,fragwt2075,fragwt275,wtpct0175,wtpctgt2ws,ph1to1h2o,ph01mcacl2,phnaf,phoxidized,resistivity,ecmeasured,esp,sar,cecsumcations,cec7,ecec,sumbases,basesatsumcations,basesatnh4oac,caco3equivmeasured,caco3lt20measured,gypsumequivlt2measured,gypsumequivmeasured,feoxalatemeasured,feextractable,fetotal,sioxalatemeasured,extracid,extral,aloxalatemeasured,altotal,pmehlich3,ph2osolublemeasured,poxalatemeasured,polsenmeasured,ptotalmeasured,nzpretention,dbthirdbar,dbovendry,aggstabpct,wtenthbarclod,wtenthbarsieve,wthirdbarclod,wthirdbarsieve,wfifteenbarmeasured,wretentiondiffws,wfifteenbartoclay,adod,lep,cole,liquidlimitmeasured,pi,recwlupdated,recuseriidref,ncsslayerlabdataiid
0,-1,41020,3.0,KSUSS0664,43,84,Reporting layer,Bt1,Bt1,NO,Air-dry,Silty clay,0.2,0.4,0.4,0.6,1.0,2.9,15.8,8.1,50.200001,,,46.900002,,,,,,,,,,,,5.9,4.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10/25/2018 20:02:13,1237.0,235238
1,-1,41020,4.0,KSUSS0665,84,99,Reporting layer,Bt2,Bt2,NO,Air-dry,Silty clay,0.1,0.4,0.4,0.6,1.0,2.6,14.3,7.4,49.799999,,,47.599998,,,,,,,,,,,,5.8,4.9,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10/25/2018 20:02:13,1237.0,235239
2,-1,41020,5.0,KSUSS0666,99,122,Reporting layer,Bt3,Bt3,NO,Air-dry,Silty clay,0.2,0.1,0.2,0.5,1.3,2.3,11.1,5.9,40.200001,,,57.5,,,,,,,,,,,,6.6,5.7,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10/25/2018 20:02:13,1237.0,235240
3,-1,41020,1.0,KSUSS0667,0,25,Reporting layer,A,A,NO,Air-dry,Silt loam,0.2,0.7,0.5,0.4,0.9,2.8,23.4,11.2,71.400002,,,25.799999,,,,,,,,,,,,5.6,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10/25/2018 20:02:13,1237.0,235241
4,-1,41020,2.0,KSUSS0663,25,43,Reporting layer,BA,BA,NO,Air-dry,Silty clay loam,0.0,0.5,0.5,0.4,1.0,2.6,18.6,8.5,60.700001,,,36.700001,,,,,,,,,,,,5.7,4.7,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10/25/2018 20:02:13,1237.0,235237


In [40]:
# displays missing values
missing_values(ncsslayerlabdata)

Unnamed: 0,total # of rows,total # of NaN,percent missing
caco3lt20measured,350138,350138,100.0
gypsumequivmeasured,350138,350060,99.977723
phoxidized,350138,349425,99.796366
fiberunrubbedpct,350138,348820,99.623577
fiberrubbedpct,350138,348807,99.619864
ph2osolublemeasured,350138,348487,99.528472
resistivity,350138,347965,99.379388
ompctest,350138,347030,99.11235
wtenthbarsieve,350138,345228,98.597696
polsenmeasured,350138,344599,98.418052


In [41]:
# drop unnecessary variables
# OID is an index
# layerseqnum, 'layerseqnum', 'labsampnum','recwlupdated', 'recuseriidref' not needed
# the rest are over 70% NaN values

ncsslayerlabdata_v2 = ncsslayerlabdata.drop(
    columns = ['OID', 'layerseqnum', 'labsampnum','recwlupdated', 'recuseriidref', 'caco3lt20measured',
              'gypsumequivmeasured','phoxidized','fiberunrubbedpct','fiberrubbedpct',
               'ph2osolublemeasured','resistivity','ompctest','wtenthbarsieve','polsenmeasured','wthirdbarsieve',
               'pmehlich3','aggstabpct','ptotalmeasured','fetotal','altotal','liquidlimitmeasured','pi',
               'wtenthbarclod','lep','gypsumequivlt2measured','nzpretention','poxalatemeasured','phnaf',
               'claycarbmeasured','sar','sioxalatemeasured','aloxalatemeasured','feoxalatemeasured','ecmeasured',
               'ecec','cole','wretentiondiffws','dbthirdbar','wthirdbarclod','dbovendry','clayfinemeasured',
               'carbontotalpctmeasured','extral','feextractable','ncsslayerlabdataiid'])

# drop duplicate rows 
ncsslayerlabdata_v2 = ncsslayerlabdata_v2.drop_duplicates()

# filling missing top/bottom depth values
# no top depth NaN values
ncsslayerlabdata_v2.loc[ncsslayerlabdata_v2['hzdept'].isnull()]

# no bottom depth NaN values
ncsslayerlabdata_v2.loc[ncsslayerlabdata_v2['hzdepb'].isnull()]

# cases where top depth > bottom depth - 5 cases
df1 = ncsslayerlabdata_v2[ncsslayerlabdata_v2['hzdept'] > ncsslayerlabdata_v2['hzdepb']]
    # flip variable names for hzdept and hzdepb for the rows there bottom depth < top depth
df1.rename(columns={'hzdept': 'hzdepb', 'hzdepb': 'hzdept'}, inplace=True)

# drop those 5 cases from df2
df2 = ncsslayerlabdata_v2.drop([58926, 308310, 314267, 317622, 321162]) # these are index values for the 5 cases where top depth > bottom depth above
df2.shape

# append df1 and df2
ncsslayerlabdata_v3 = df2.append([df1]).sort_values(by=['ncsspedonlabdataiidref', 'hzdept']).reset_index(drop=True)
ncsslayerlabdata_v3 = ncsslayerlabdata_v3.reset_index(drop=True)

In [42]:
ncsslayerlabdata_v3.shape

(347457, 39)

In [43]:
ncsslayerlabdata_v3.head()

Unnamed: 0,ncsspedonlabdataiidref,hzdept,hzdepb,layertype,hzname,hznameoriginal,stratextsflag,moistprepstate,texcl,sandvcmeasured,sandcomeasured,sandmedmeasured,sandfinemeasured,sandvfmeasured,sandtotmeasured,siltcomeasured,siltfinemeasured,silttotmeasured,claytotmeasured,carbonorganicpctmeasured,fragwt25,fragwt520,fragwt2075,fragwt275,wtpct0175,wtpctgt2ws,ph1to1h2o,ph01mcacl2,esp,cecsumcations,cec7,sumbases,basesatsumcations,basesatnh4oac,caco3equivmeasured,extracid,wfifteenbarmeasured,wfifteenbartoclay,adod
0,10001,0,30,Horizon,A,A,NO,Air-dry,Fine sandy loam,5.5,2.5,4.0,32.0,23.4,67.400002,13.2,12.5,25.700001,6.9,0.49,9.0,20.0,5.0,34.0,63.0,34.0,6.7,6.0,4.0,11.4,7.0,9.1,80.0,100.0,,2.3,5.9,0.86,1.007
1,10001,30,91,Horizon,Bw1,Bw1,NO,Air-dry,Fine sandy loam,5.4,3.5,6.2,34.099998,22.9,72.099998,10.5,9.5,20.0,7.9,0.32,9.0,34.0,12.0,55.0,77.0,55.0,6.7,6.1,5.0,10.5,8.0,9.0,86.0,100.0,0.0,1.5,6.2,0.78,1.009
2,10001,91,122,Horizon,Bw2,Bw2,NO,Air-dry,Sandy clay loam,5.3,3.1,4.5,24.299999,20.1,57.299999,10.4,11.4,21.799999,20.9,0.19,9.0,23.0,7.0,39.0,62.0,39.0,6.5,5.8,2.0,27.1,23.1,22.6,83.0,98.0,,4.5,13.4,0.64,1.028
3,10002,0,46,Horizon,A,A,NO,Air-dry,Loam,0.4,0.5,1.4,13.6,27.9,43.799999,23.299999,20.9,44.200001,12.0,1.14,1.0,1.0,0.0,2.0,18.0,2.0,5.8,5.2,3.0,19.700001,15.1,13.7,70.0,91.0,,6.0,10.2,0.85,1.016
4,10002,46,91,Horizon,Bw1,Bw1,NO,Air-dry,Loam,0.0,0.1,0.6,13.3,37.599998,51.599998,21.9,15.6,37.5,10.9,0.46,2.0,29.0,2.0,33.0,43.0,33.0,6.4,5.8,1.0,25.799999,22.700001,21.9,85.0,96.0,,3.9,13.2,1.21,1.026


##### Binning

In [44]:
# Create labels and bins
labels = ['0cm_9cm', '10cm_19cm', '20cm_29cm', '30cm_39cm', '40cm_49cm',
         '50cm_59cm', '60cm_69cm', '70cm_79cm', '80cm_89cm', '90cm_99cm',
         '100cm_109cm', '110cm_119cm', '120+cm']
cut_bins = [-11, 9, 19, 29, 39, 49, 59, 69, 79, 89, 99, 109, 119, 10000]

def binning_func(layer_df,cut_bins,labels):
    layer_df_in = layer_df.copy()
    ### binning
    layer_df_in['hzdept_bin'] = pd.cut(layer_df_in['hzdept'], cut_bins, labels=labels)
    
    return (layer_df_in)

# new column which designates each row into its proper bin 
ncsslayerlabdata_v4 = binning_func(ncsslayerlabdata_v3, cut_bins, labels)

In [45]:
ncsslayerlabdata_v4.head()

Unnamed: 0,ncsspedonlabdataiidref,hzdept,hzdepb,layertype,hzname,hznameoriginal,stratextsflag,moistprepstate,texcl,sandvcmeasured,sandcomeasured,sandmedmeasured,sandfinemeasured,sandvfmeasured,sandtotmeasured,siltcomeasured,siltfinemeasured,silttotmeasured,claytotmeasured,carbonorganicpctmeasured,fragwt25,fragwt520,fragwt2075,fragwt275,wtpct0175,wtpctgt2ws,ph1to1h2o,ph01mcacl2,esp,cecsumcations,cec7,sumbases,basesatsumcations,basesatnh4oac,caco3equivmeasured,extracid,wfifteenbarmeasured,wfifteenbartoclay,adod,hzdept_bin
0,10001,0,30,Horizon,A,A,NO,Air-dry,Fine sandy loam,5.5,2.5,4.0,32.0,23.4,67.400002,13.2,12.5,25.700001,6.9,0.49,9.0,20.0,5.0,34.0,63.0,34.0,6.7,6.0,4.0,11.4,7.0,9.1,80.0,100.0,,2.3,5.9,0.86,1.007,0cm_9cm
1,10001,30,91,Horizon,Bw1,Bw1,NO,Air-dry,Fine sandy loam,5.4,3.5,6.2,34.099998,22.9,72.099998,10.5,9.5,20.0,7.9,0.32,9.0,34.0,12.0,55.0,77.0,55.0,6.7,6.1,5.0,10.5,8.0,9.0,86.0,100.0,0.0,1.5,6.2,0.78,1.009,30cm_39cm
2,10001,91,122,Horizon,Bw2,Bw2,NO,Air-dry,Sandy clay loam,5.3,3.1,4.5,24.299999,20.1,57.299999,10.4,11.4,21.799999,20.9,0.19,9.0,23.0,7.0,39.0,62.0,39.0,6.5,5.8,2.0,27.1,23.1,22.6,83.0,98.0,,4.5,13.4,0.64,1.028,90cm_99cm
3,10002,0,46,Horizon,A,A,NO,Air-dry,Loam,0.4,0.5,1.4,13.6,27.9,43.799999,23.299999,20.9,44.200001,12.0,1.14,1.0,1.0,0.0,2.0,18.0,2.0,5.8,5.2,3.0,19.700001,15.1,13.7,70.0,91.0,,6.0,10.2,0.85,1.016,0cm_9cm
4,10002,46,91,Horizon,Bw1,Bw1,NO,Air-dry,Loam,0.0,0.1,0.6,13.3,37.599998,51.599998,21.9,15.6,37.5,10.9,0.46,2.0,29.0,2.0,33.0,43.0,33.0,6.4,5.8,1.0,25.799999,22.700001,21.9,85.0,96.0,,3.9,13.2,1.21,1.026,40cm_49cm


In [46]:
ncsslayerlabdata_v4.shape

(347457, 40)

##### Insert dummy "max" depth based on bottom depth

In [47]:
# dummy max_depth_bin based off of the bottom depth, hzdepb

dummy_bottom_bin = pd.read_csv('Input Files/dummy_bottom_bin.csv')

dummy_bottom_bin.head()

Unnamed: 0,hzdepb,max_depth_bin
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0


In [48]:
# merge dummy_bottom_bin into main dataframe

ncsslayerlabdata_v5 = pd.merge(ncsslayerlabdata_v4, 
                                         dummy_bottom_bin,
                                         left_on=['hzdepb'],
                                         right_on=['hzdepb'],
                                         how='left')

ncsslayerlabdata_v5.head(5)


Unnamed: 0,ncsspedonlabdataiidref,hzdept,hzdepb,layertype,hzname,hznameoriginal,stratextsflag,moistprepstate,texcl,sandvcmeasured,sandcomeasured,sandmedmeasured,sandfinemeasured,sandvfmeasured,sandtotmeasured,siltcomeasured,siltfinemeasured,silttotmeasured,claytotmeasured,carbonorganicpctmeasured,fragwt25,fragwt520,fragwt2075,fragwt275,wtpct0175,wtpctgt2ws,ph1to1h2o,ph01mcacl2,esp,cecsumcations,cec7,sumbases,basesatsumcations,basesatnh4oac,caco3equivmeasured,extracid,wfifteenbarmeasured,wfifteenbartoclay,adod,hzdept_bin,max_depth_bin
0,10001,0,30,Horizon,A,A,NO,Air-dry,Fine sandy loam,5.5,2.5,4.0,32.0,23.4,67.400002,13.2,12.5,25.700001,6.9,0.49,9.0,20.0,5.0,34.0,63.0,34.0,6.7,6.0,4.0,11.4,7.0,9.1,80.0,100.0,,2.3,5.9,0.86,1.007,0cm_9cm,30
1,10001,30,91,Horizon,Bw1,Bw1,NO,Air-dry,Fine sandy loam,5.4,3.5,6.2,34.099998,22.9,72.099998,10.5,9.5,20.0,7.9,0.32,9.0,34.0,12.0,55.0,77.0,55.0,6.7,6.1,5.0,10.5,8.0,9.0,86.0,100.0,0.0,1.5,6.2,0.78,1.009,30cm_39cm,90
2,10001,91,122,Horizon,Bw2,Bw2,NO,Air-dry,Sandy clay loam,5.3,3.1,4.5,24.299999,20.1,57.299999,10.4,11.4,21.799999,20.9,0.19,9.0,23.0,7.0,39.0,62.0,39.0,6.5,5.8,2.0,27.1,23.1,22.6,83.0,98.0,,4.5,13.4,0.64,1.028,90cm_99cm,120
3,10002,0,46,Horizon,A,A,NO,Air-dry,Loam,0.4,0.5,1.4,13.6,27.9,43.799999,23.299999,20.9,44.200001,12.0,1.14,1.0,1.0,0.0,2.0,18.0,2.0,5.8,5.2,3.0,19.700001,15.1,13.7,70.0,91.0,,6.0,10.2,0.85,1.016,0cm_9cm,40
4,10002,46,91,Horizon,Bw1,Bw1,NO,Air-dry,Loam,0.0,0.1,0.6,13.3,37.599998,51.599998,21.9,15.6,37.5,10.9,0.46,2.0,29.0,2.0,33.0,43.0,33.0,6.4,5.8,1.0,25.799999,22.700001,21.9,85.0,96.0,,3.9,13.2,1.21,1.026,40cm_49cm,90


In [49]:
ncsslayerlabdata_v5['max_depth_bin_final'] = ncsslayerlabdata_v5.groupby('ncsspedonlabdataiidref')\
                        ['max_depth_bin'].transform('max')

ncsslayerlabdata_v5.head()

Unnamed: 0,ncsspedonlabdataiidref,hzdept,hzdepb,layertype,hzname,hznameoriginal,stratextsflag,moistprepstate,texcl,sandvcmeasured,sandcomeasured,sandmedmeasured,sandfinemeasured,sandvfmeasured,sandtotmeasured,siltcomeasured,siltfinemeasured,silttotmeasured,claytotmeasured,carbonorganicpctmeasured,fragwt25,fragwt520,fragwt2075,fragwt275,wtpct0175,wtpctgt2ws,ph1to1h2o,ph01mcacl2,esp,cecsumcations,cec7,sumbases,basesatsumcations,basesatnh4oac,caco3equivmeasured,extracid,wfifteenbarmeasured,wfifteenbartoclay,adod,hzdept_bin,max_depth_bin,max_depth_bin_final
0,10001,0,30,Horizon,A,A,NO,Air-dry,Fine sandy loam,5.5,2.5,4.0,32.0,23.4,67.400002,13.2,12.5,25.700001,6.9,0.49,9.0,20.0,5.0,34.0,63.0,34.0,6.7,6.0,4.0,11.4,7.0,9.1,80.0,100.0,,2.3,5.9,0.86,1.007,0cm_9cm,30,120
1,10001,30,91,Horizon,Bw1,Bw1,NO,Air-dry,Fine sandy loam,5.4,3.5,6.2,34.099998,22.9,72.099998,10.5,9.5,20.0,7.9,0.32,9.0,34.0,12.0,55.0,77.0,55.0,6.7,6.1,5.0,10.5,8.0,9.0,86.0,100.0,0.0,1.5,6.2,0.78,1.009,30cm_39cm,90,120
2,10001,91,122,Horizon,Bw2,Bw2,NO,Air-dry,Sandy clay loam,5.3,3.1,4.5,24.299999,20.1,57.299999,10.4,11.4,21.799999,20.9,0.19,9.0,23.0,7.0,39.0,62.0,39.0,6.5,5.8,2.0,27.1,23.1,22.6,83.0,98.0,,4.5,13.4,0.64,1.028,90cm_99cm,120,120
3,10002,0,46,Horizon,A,A,NO,Air-dry,Loam,0.4,0.5,1.4,13.6,27.9,43.799999,23.299999,20.9,44.200001,12.0,1.14,1.0,1.0,0.0,2.0,18.0,2.0,5.8,5.2,3.0,19.700001,15.1,13.7,70.0,91.0,,6.0,10.2,0.85,1.016,0cm_9cm,40,110
4,10002,46,91,Horizon,Bw1,Bw1,NO,Air-dry,Loam,0.0,0.1,0.6,13.3,37.599998,51.599998,21.9,15.6,37.5,10.9,0.46,2.0,29.0,2.0,33.0,43.0,33.0,6.4,5.8,1.0,25.799999,22.700001,21.9,85.0,96.0,,3.9,13.2,1.21,1.026,40cm_49cm,90,110


In [50]:
ncsslayerlabdata_v5[ncsslayerlabdata_v5['ncsspedonlabdataiidref'] == '87618']

Unnamed: 0,ncsspedonlabdataiidref,hzdept,hzdepb,layertype,hzname,hznameoriginal,stratextsflag,moistprepstate,texcl,sandvcmeasured,sandcomeasured,sandmedmeasured,sandfinemeasured,sandvfmeasured,sandtotmeasured,siltcomeasured,siltfinemeasured,silttotmeasured,claytotmeasured,carbonorganicpctmeasured,fragwt25,fragwt520,fragwt2075,fragwt275,wtpct0175,wtpctgt2ws,ph1to1h2o,ph01mcacl2,esp,cecsumcations,cec7,sumbases,basesatsumcations,basesatnh4oac,caco3equivmeasured,extracid,wfifteenbarmeasured,wfifteenbartoclay,adod,hzdept_bin,max_depth_bin,max_depth_bin_final
346699,87618,75,100,Reporting layer,Bk3,,NO,Air-dry,Silty clay loam,0.0,0.0,0.0,0.1,9.2,9.3,30.200001,7.3,62.799999,28.0,,,,,,,,,,,,,,,,13.0,,,,,70cm_79cm,100,120
346700,87618,100,130,Reporting layer,Bk4,,NO,Air-dry,Silty clay loam,0.0,0.1,0.0,0.6,9.0,9.7,29.6,5.8,57.599998,32.700001,,,,,,,,,,,,,,,,10.0,,,,,100cm_109cm,120,120
346701,87618,130,160,Reporting layer,Bk5,,NO,Air-dry,Silty clay loam,0.0,0.0,0.1,1.0,11.7,12.8,21.6,6.3,50.700001,36.599998,,,,,,,,,,,,,,,,8.0,,,,,120+cm,120,120
346702,87618,160,200,Reporting layer,Btb1,,NO,Air-dry,Silty clay loam,0.0,0.1,0.0,0.7,8.3,9.0,26.200001,6.0,56.0,35.0,,,,,,,,,,,,,,,,3.0,,,,,120+cm,120,120
346703,87618,200,235,Reporting layer,Btb2,,NO,Air-dry,Silty clay loam,0.3,0.2,0.2,1.2,14.0,15.9,27.299999,4.1,51.099998,33.099998,,,,,,,,,,,,,,,,0.0,,,,,120+cm,120,120


In [51]:
ncsslayerlabdata_v5.shape

(347457, 42)

##### Insert missing bins

In [52]:
def create_dummy_df(peiid_list_in,labels_in):
    dummy_df = pd.DataFrame()
    dummy_df['ncsspedonlabdataiidref'] = pd.Series(peiid_list_in).repeat(len(labels_in)).reset_index(drop=True)
    dummy_df['bins'] = pd.Series(labels_in*len(peiid_list_in))
    dummy_df['lower_band'] = pd.Series([0,10,20,30,40,50,
                                              60,70,80,90,100,110,120]*len(peiid_list_in))
    return(dummy_df)

In [53]:
ncsspedonlabdataiidref_list = ncsslayerlabdata_v4.ncsspedonlabdataiidref.unique().tolist()
pedon_dummy_df = create_dummy_df(ncsspedonlabdataiidref_list,labels)
pedon_dummy_df.head(15)

Unnamed: 0,ncsspedonlabdataiidref,bins,lower_band
0,10001,0cm_9cm,0
1,10001,10cm_19cm,10
2,10001,20cm_29cm,20
3,10001,30cm_39cm,30
4,10001,40cm_49cm,40
5,10001,50cm_59cm,50
6,10001,60cm_69cm,60
7,10001,70cm_79cm,70
8,10001,80cm_89cm,80
9,10001,90cm_99cm,90


In [54]:
def insert_all_bin_cols(phorizon_df_binned_in,pedon_dummy_df):
    phorizon_df_binned_in['lower_band']=phorizon_df_binned_in.hzdept_bin.apply(lambda s:re.findall(r"(\d+)\+*cm", str(s))[0])
    phorizon_df_binned_in['lower_band'] = phorizon_df_binned_in['lower_band'].astype(int)
    phorizon_df_binned_in['max_bin'] = phorizon_df_binned_in.groupby('ncsspedonlabdataiidref')\
                        ['lower_band'].transform('max')

    phorizon_df_binned_in.drop(columns=['lower_band'],inplace=True)
    
    x = pd.merge(phorizon_df_binned_in,
             pedon_dummy_df,
             left_on=['ncsspedonlabdataiidref','hzdept_bin'],
             right_on=['ncsspedonlabdataiidref','bins'],
             how='right')
    print('forward fill')
    x['max_bin'] = x.groupby('ncsspedonlabdataiidref')['max_bin'].fillna(method='ffill')
    print('backward fill')
    x['max_bin'] = x.groupby('ncsspedonlabdataiidref')['max_bin'].fillna(method='bfill')
    x['hzdept_bin']=x['bins']
    
    return(x)

In [55]:
ncsslayerlabdata_complete = insert_all_bin_cols(ncsslayerlabdata_v5,pedon_dummy_df)
ncsslayerlabdata_complete.shape

forward fill
backward fill


(798117, 45)

In [57]:
ncsslayerlabdata_complete.head()

Unnamed: 0,ncsspedonlabdataiidref,hzdept,hzdepb,layertype,hzname,hznameoriginal,stratextsflag,moistprepstate,texcl,sandvcmeasured,sandcomeasured,sandmedmeasured,sandfinemeasured,sandvfmeasured,sandtotmeasured,siltcomeasured,siltfinemeasured,silttotmeasured,claytotmeasured,carbonorganicpctmeasured,fragwt25,fragwt520,fragwt2075,fragwt275,wtpct0175,wtpctgt2ws,ph1to1h2o,ph01mcacl2,esp,cecsumcations,cec7,sumbases,basesatsumcations,basesatnh4oac,caco3equivmeasured,extracid,wfifteenbarmeasured,wfifteenbartoclay,adod,hzdept_bin,max_depth_bin,max_depth_bin_final,max_bin,bins,lower_band
0,10001,0.0,30.0,Horizon,A,A,NO,Air-dry,Fine sandy loam,5.5,2.5,4.0,32.0,23.4,67.400002,13.2,12.5,25.700001,6.9,0.49,9.0,20.0,5.0,34.0,63.0,34.0,6.7,6.0,4.0,11.4,7.0,9.1,80.0,100.0,,2.3,5.9,0.86,1.007,0cm_9cm,30.0,120.0,90.0,0cm_9cm,0
1,10001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10cm_19cm,,,90.0,10cm_19cm,10
2,10001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,20cm_29cm,,,90.0,20cm_29cm,20
3,10001,30.0,91.0,Horizon,Bw1,Bw1,NO,Air-dry,Fine sandy loam,5.4,3.5,6.2,34.099998,22.9,72.099998,10.5,9.5,20.0,7.9,0.32,9.0,34.0,12.0,55.0,77.0,55.0,6.7,6.1,5.0,10.5,8.0,9.0,86.0,100.0,0.0,1.5,6.2,0.78,1.009,30cm_39cm,90.0,120.0,90.0,30cm_39cm,30
4,10001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,40cm_49cm,,,90.0,40cm_49cm,40


In [58]:
ncsslayerlabdata_complete.sort_values(by=['ncsspedonlabdataiidref','lower_band'], inplace=True)
ncsslayerlabdata_complete.head()

Unnamed: 0,ncsspedonlabdataiidref,hzdept,hzdepb,layertype,hzname,hznameoriginal,stratextsflag,moistprepstate,texcl,sandvcmeasured,sandcomeasured,sandmedmeasured,sandfinemeasured,sandvfmeasured,sandtotmeasured,siltcomeasured,siltfinemeasured,silttotmeasured,claytotmeasured,carbonorganicpctmeasured,fragwt25,fragwt520,fragwt2075,fragwt275,wtpct0175,wtpctgt2ws,ph1to1h2o,ph01mcacl2,esp,cecsumcations,cec7,sumbases,basesatsumcations,basesatnh4oac,caco3equivmeasured,extracid,wfifteenbarmeasured,wfifteenbartoclay,adod,hzdept_bin,max_depth_bin,max_depth_bin_final,max_bin,bins,lower_band
0,10001,0.0,30.0,Horizon,A,A,NO,Air-dry,Fine sandy loam,5.5,2.5,4.0,32.0,23.4,67.400002,13.2,12.5,25.700001,6.9,0.49,9.0,20.0,5.0,34.0,63.0,34.0,6.7,6.0,4.0,11.4,7.0,9.1,80.0,100.0,,2.3,5.9,0.86,1.007,0cm_9cm,30.0,120.0,90.0,0cm_9cm,0
1,10001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10cm_19cm,,,90.0,10cm_19cm,10
2,10001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,20cm_29cm,,,90.0,20cm_29cm,20
3,10001,30.0,91.0,Horizon,Bw1,Bw1,NO,Air-dry,Fine sandy loam,5.4,3.5,6.2,34.099998,22.9,72.099998,10.5,9.5,20.0,7.9,0.32,9.0,34.0,12.0,55.0,77.0,55.0,6.7,6.1,5.0,10.5,8.0,9.0,86.0,100.0,0.0,1.5,6.2,0.78,1.009,30cm_39cm,90.0,120.0,90.0,30cm_39cm,30
4,10001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,40cm_49cm,,,90.0,40cm_49cm,40


In [59]:
ncsslayerlabdata_complete = ncsslayerlabdata_complete.reset_index(drop=True)
ncsslayerlabdata_complete.head()

Unnamed: 0,ncsspedonlabdataiidref,hzdept,hzdepb,layertype,hzname,hznameoriginal,stratextsflag,moistprepstate,texcl,sandvcmeasured,sandcomeasured,sandmedmeasured,sandfinemeasured,sandvfmeasured,sandtotmeasured,siltcomeasured,siltfinemeasured,silttotmeasured,claytotmeasured,carbonorganicpctmeasured,fragwt25,fragwt520,fragwt2075,fragwt275,wtpct0175,wtpctgt2ws,ph1to1h2o,ph01mcacl2,esp,cecsumcations,cec7,sumbases,basesatsumcations,basesatnh4oac,caco3equivmeasured,extracid,wfifteenbarmeasured,wfifteenbartoclay,adod,hzdept_bin,max_depth_bin,max_depth_bin_final,max_bin,bins,lower_band
0,10001,0.0,30.0,Horizon,A,A,NO,Air-dry,Fine sandy loam,5.5,2.5,4.0,32.0,23.4,67.400002,13.2,12.5,25.700001,6.9,0.49,9.0,20.0,5.0,34.0,63.0,34.0,6.7,6.0,4.0,11.4,7.0,9.1,80.0,100.0,,2.3,5.9,0.86,1.007,0cm_9cm,30.0,120.0,90.0,0cm_9cm,0
1,10001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10cm_19cm,,,90.0,10cm_19cm,10
2,10001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,20cm_29cm,,,90.0,20cm_29cm,20
3,10001,30.0,91.0,Horizon,Bw1,Bw1,NO,Air-dry,Fine sandy loam,5.4,3.5,6.2,34.099998,22.9,72.099998,10.5,9.5,20.0,7.9,0.32,9.0,34.0,12.0,55.0,77.0,55.0,6.7,6.1,5.0,10.5,8.0,9.0,86.0,100.0,0.0,1.5,6.2,0.78,1.009,30cm_39cm,90.0,120.0,90.0,30cm_39cm,30
4,10001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,40cm_49cm,,,90.0,40cm_49cm,40


In [60]:
ncsslayerlabdata_complete.shape

(798117, 45)

##### Find weighted average

In [61]:
def find_weighted_average(df,var_name,kind):
    """
    Based on the kind of variable (logarithmic, numerical, Categorical )
    The function find the weighted average to handle duplicates
    
    
    """
    df_in = df.copy()
 
    if kind == 'num':
        
        dropped_na_df = df_in.dropna(subset=[var_name])
        dropped_index = list(set(df_in.index) - set(dropped_na_df.index))
        dropped_na_df['weight'] = find_weight(dropped_na_df)
        dropped_na_df['weighted_var'] = dropped_na_df['weight']*dropped_na_df[var_name]
        dropped_na_df['val'] = dropped_na_df.groupby(['ncsspedonlabdataiidref','hzdept_bin'])['weighted_var'].transform('sum')
        dropped_na_df.drop_duplicates(subset=['ncsspedonlabdataiidref','hzdept_bin','val'],inplace=True)


        na_df = df_in.loc[dropped_index,['ncsspedonlabdataiidref','hzdept_bin','lower_band']]
        na_df.drop_duplicates(subset=['ncsspedonlabdataiidref','hzdept_bin'],inplace=True)

        merged = pd.merge(dropped_na_df[['ncsspedonlabdataiidref','hzdept_bin','lower_band','val']],
                 na_df[['ncsspedonlabdataiidref','hzdept_bin','lower_band']],
                 on=['ncsspedonlabdataiidref','hzdept_bin','lower_band'],how='outer')
        merged.sort_values(by=['ncsspedonlabdataiidref','lower_band'],inplace=True)

    if kind == 'log':
        
        dropped_na_df = df_in.dropna(subset=[var_name])
        dropped_index = list(set(df_in.index) - set(dropped_na_df.index))
        dropped_na_df['weight'] = find_weight(dropped_na_df)
        dropped_na_df['weighted_var'] = dropped_na_df['weight']*np.exp(dropped_na_df[var_name])
        dropped_na_df['val'] = dropped_na_df.groupby(['ncsspedonlabdataiidref',
                    'hzdept_bin'])['weighted_var'].transform('sum')
        dropped_na_df['val'] = np.log(dropped_na_df['val'])
        dropped_na_df.drop_duplicates(subset=['ncsspedonlabdataiidref','hzdept_bin','val'],inplace=True)


        na_df = df_in.loc[dropped_index,['ncsspedonlabdataiidref','hzdept_bin','lower_band']]
        na_df.drop_duplicates(subset=['ncsspedonlabdataiidref','hzdept_bin'],inplace=True)

        merged = pd.merge(dropped_na_df[['ncsspedonlabdataiidref','hzdept_bin','lower_band','val']],
                 na_df[['ncsspedonlabdataiidref','hzdept_bin','lower_band']],
                 on=['ncsspedonlabdataiidref','hzdept_bin','lower_band'],how='outer')
        merged.sort_values(by=['ncsspedonlabdataiidref','lower_band'],inplace=True)
        
    if kind == 'cat':
        
        dropped_na_df = df_in.dropna(subset=[var_name])
        dropped_index = list(set(df_in.index) - set(dropped_na_df.index))
        dropped_na_df['weight'] = find_weight(dropped_na_df)
        dropped_na_df['max_weight']=dropped_na_df.groupby(['ncsspedonlabdataiidref','hzdept_bin'])['weight'].transform('max')
        
        dropped_na_df_new = dropped_na_df.loc[(dropped_na_df['weight'] == dropped_na_df['max_weight'])]
        dropped_na_df_new['val'] = dropped_na_df_new[var_name]
        dropped_na_df_new.drop_duplicates(subset=['ncsspedonlabdataiidref','hzdept_bin'],inplace=True)
        
        na_df = df_in.loc[dropped_index,['ncsspedonlabdataiidref','hzdept_bin','lower_band']]
        na_df.drop_duplicates(subset=['ncsspedonlabdataiidref','hzdept_bin'],inplace=True)

        merged = pd.merge(dropped_na_df_new[['ncsspedonlabdataiidref','hzdept_bin','lower_band','val']],
                 na_df[['ncsspedonlabdataiidref','hzdept_bin','lower_band']],
                 on=['ncsspedonlabdataiidref','hzdept_bin','lower_band'],how='outer')
        merged.sort_values(by=['ncsspedonlabdataiidref','lower_band'],inplace=True) 
    return(list(merged['val']))

In [62]:
# gets called within the find_weighted_average
def find_weight(df):
        
    """
        This function:
        finds the weight according to depth for each row.
    """
    df_in = df.copy()
    df_in['depth'] = df_in['hzdepb'] - df_in['hzdept']
    df_in.loc[df_in['depth']==0,'depth'] = df_in.loc[df_in['depth']==0,'depth']+0.001
    df_in['total_depth'] = df_in.groupby(['ncsspedonlabdataiidref','hzdept_bin'])['depth'].transform('sum')
    df_in['weight'] = df_in['depth']/df_in['total_depth']
    
    return df_in['weight']

In [63]:
hzdept_df=find_weighted_average(ncsslayerlabdata_complete,'hzdept','num')
hzdepb_df=find_weighted_average(ncsslayerlabdata_complete,'hzdepb','num')
sandvcmeasured_df=find_weighted_average(ncsslayerlabdata_complete,'sandvcmeasured','num')
sandcomeasured_df=find_weighted_average(ncsslayerlabdata_complete,'sandcomeasured','num')
sandmedmeasured_df=find_weighted_average(ncsslayerlabdata_complete,'sandmedmeasured','num')
sandfinemeasured_df=find_weighted_average(ncsslayerlabdata_complete,'sandfinemeasured','num')
sandvfmeasured_df=find_weighted_average(ncsslayerlabdata_complete,'sandvfmeasured','num')
sandtotmeasured_df=find_weighted_average(ncsslayerlabdata_complete,'sandtotmeasured','num')
siltcomeasured_df=find_weighted_average(ncsslayerlabdata_complete,'siltcomeasured','num')
siltfinemeasured_df=find_weighted_average(ncsslayerlabdata_complete,'siltfinemeasured','num')
silttotmeasured_df=find_weighted_average(ncsslayerlabdata_complete,'silttotmeasured','num')
claytotmeasured_df=find_weighted_average(ncsslayerlabdata_complete,'claytotmeasured','num')
carbonorganicpctmeasured_df=find_weighted_average(ncsslayerlabdata_complete,'carbonorganicpctmeasured','num')
fragwt25_df=find_weighted_average(ncsslayerlabdata_complete,'fragwt25','num')
fragwt520_df=find_weighted_average(ncsslayerlabdata_complete,'fragwt520','num')
fragwt2075_df=find_weighted_average(ncsslayerlabdata_complete,'fragwt2075','num')
fragwt275_df=find_weighted_average(ncsslayerlabdata_complete,'fragwt275','num')
wtpct0175_df=find_weighted_average(ncsslayerlabdata_complete,'wtpct0175','num')
wtpctgt2ws_df=find_weighted_average(ncsslayerlabdata_complete,'wtpctgt2ws','num')
esp_df=find_weighted_average(ncsslayerlabdata_complete,'esp','num')
cecsumcations_df=find_weighted_average(ncsslayerlabdata_complete,'cecsumcations','num')
cec7_df=find_weighted_average(ncsslayerlabdata_complete,'cec7','num')
cec7_df=find_weighted_average(ncsslayerlabdata_complete,'cec7','num')
sumbases_df=find_weighted_average(ncsslayerlabdata_complete,'sumbases','num')
basesatsumcations_df=find_weighted_average(ncsslayerlabdata_complete,'basesatsumcations','num')
basesatnh4oac_df=find_weighted_average(ncsslayerlabdata_complete,'basesatnh4oac','num')
caco3equivmeasured_df=find_weighted_average(ncsslayerlabdata_complete,'caco3equivmeasured','num')
extracid_df=find_weighted_average(ncsslayerlabdata_complete,'extracid','num')
wfifteenbarmeasured_df=find_weighted_average(ncsslayerlabdata_complete,'wfifteenbarmeasured','num')
wfifteenbartoclay_df=find_weighted_average(ncsslayerlabdata_complete,'wfifteenbartoclay','num')
adod_df=find_weighted_average(ncsslayerlabdata_complete,'adod','num')

In [64]:
ph1to1h2o_df = find_weighted_average(ncsslayerlabdata_complete,'ph1to1h2o','log')
ph01mcacl2_df = find_weighted_average(ncsslayerlabdata_complete,'ph01mcacl2','log')

In [65]:
layertype_df = find_weighted_average(ncsslayerlabdata_complete,'layertype','cat')
hzname_df = find_weighted_average(ncsslayerlabdata_complete,'hzname','cat')
hznameoriginal_df = find_weighted_average(ncsslayerlabdata_complete,'hznameoriginal','cat')
stratextsflag_df = find_weighted_average(ncsslayerlabdata_complete,'stratextsflag','cat')
moistprepstate_df = find_weighted_average(ncsslayerlabdata_complete,'moistprepstate','cat')
texcl_df = find_weighted_average(ncsslayerlabdata_complete,'texcl','cat')

##### Create "baseline" dataframe
contains the following: ncsspedonlabdataiidref,	hzdept_bin,	lower_band,	max_depth_bin_final

In [66]:
df_in = ncsslayerlabdata_complete
var_name = 'texcl'

dropped_na_df = df_in.dropna(subset=[var_name])
dropped_index = list(set(df_in.index) - set(dropped_na_df.index))
dropped_na_df['weight'] = find_weight(dropped_na_df)

print('aggregation')
dropped_na_df['ind']=dropped_na_df.groupby(['ncsspedonlabdataiidref','hzdept_bin'])['weight'].transform('idxmax')

print('subsetting')
dropped_na_df['ind'].unique().tolist()
dropped_na_df['val']=dropped_na_df.loc[dropped_na_df['ind'].unique().tolist(),var_name] 
dropped_na_df = dropped_na_df.loc[dropped_na_df['ind'].unique().tolist()]     

na_df = df_in.loc[dropped_index,['ncsspedonlabdataiidref','hzdept_bin','lower_band', 'max_depth_bin_final']]
na_df.drop_duplicates(subset=['ncsspedonlabdataiidref','hzdept_bin'],inplace=True)

merged = pd.merge(dropped_na_df[['ncsspedonlabdataiidref','hzdept_bin','lower_band','max_depth_bin_final','val']],
         na_df[['ncsspedonlabdataiidref','hzdept_bin','lower_band', 'max_depth_bin_final']],
         on=['ncsspedonlabdataiidref','hzdept_bin','lower_band', 'max_depth_bin_final'],how='outer')
merged.sort_values(by=['ncsspedonlabdataiidref','lower_band'],inplace=True) 

aggregation
subsetting


In [67]:
ncsslayerlab_base = merged[['ncsspedonlabdataiidref', 'hzdept_bin', 'lower_band', 'max_depth_bin_final']].reset_index(drop=True)
ncsslayerlab_base.head(10)

Unnamed: 0,ncsspedonlabdataiidref,hzdept_bin,lower_band,max_depth_bin_final
0,10001,0cm_9cm,0,120.0
1,10001,10cm_19cm,10,
2,10001,20cm_29cm,20,
3,10001,30cm_39cm,30,120.0
4,10001,40cm_49cm,40,
5,10001,50cm_59cm,50,
6,10001,60cm_69cm,60,
7,10001,70cm_79cm,70,
8,10001,80cm_89cm,80,
9,10001,90cm_99cm,90,120.0


In [68]:
# fill in NaN for max_depth_bin_final column

ncsslayerlab_base['max_depth_bin_final'] = ncsslayerlab_base.groupby('ncsspedonlabdataiidref')['max_depth_bin_final'].fillna(method='ffill')
ncsslayerlab_base['max_depth_bin_final'] = ncsslayerlab_base.groupby('ncsspedonlabdataiidref')['max_depth_bin_final'].fillna(method='bfill')


##### combine lists + append ncsslayerlab_base

In [69]:
ncsslayerlab_weighted = pd.DataFrame(
    {'hzdept': hzdept_df,
    'hzdepb': hzdepb_df,
    'ph1to1h2o': ph1to1h2o_df,
    'ph01mcacl2': ph01mcacl2_df,
    'sandvcmeasured': sandvcmeasured_df,
    'sandcomeasured': sandcomeasured_df,
    'sandmedmeasured': sandmedmeasured_df,
    'sandfinemeasured': sandfinemeasured_df,
    'sandvfmeasured': sandvfmeasured_df,
    'sandtotmeasured': sandtotmeasured_df,
    'siltcomeasured': siltcomeasured_df,
    'siltfinemeasured': siltfinemeasured_df,
    'silttotmeasured': silttotmeasured_df,
    'claytotmeasured': claytotmeasured_df,
    'carbonorganicpctmeasured': carbonorganicpctmeasured_df,
    'fragwt25': fragwt25_df,
    'fragwt520': fragwt520_df,
    'fragwt2075': fragwt2075_df,
    'fragwt275': fragwt275_df,
    'wtpct0175': wtpct0175_df,
    'wtpctgt2ws': wtpctgt2ws_df,
    'esp': esp_df,
    'cecsumcations': cecsumcations_df,
    'cec7': cec7_df,
    'sumbases': sumbases_df,
    'basesatsumcations': basesatsumcations_df,
    'basesatnh4oac': basesatnh4oac_df,
    'caco3equivmeasured': caco3equivmeasured_df,
    'extracid': extracid_df,
    'wfifteenbarmeasured': wfifteenbarmeasured_df,
    'wfifteenbartoclay': wfifteenbartoclay_df,
    'adod': adod_df,
    'layertype': layertype_df,
    'hzname': hzname_df,
    'hznameoriginal': hznameoriginal_df,
    'stratextsflag': stratextsflag_df,
    'moistprepstate': moistprepstate_df,
    'texcl': texcl_df,
    })

In [70]:
ncsslayerlab_weighted_complete = pd.concat([ncsslayerlab_base, ncsslayerlab_weighted], axis=1)
ncsslayerlab_weighted_complete

Unnamed: 0,ncsspedonlabdataiidref,hzdept_bin,lower_band,max_depth_bin_final,hzdept,hzdepb,ph1to1h2o,ph01mcacl2,sandvcmeasured,sandcomeasured,sandmedmeasured,sandfinemeasured,sandvfmeasured,sandtotmeasured,siltcomeasured,siltfinemeasured,silttotmeasured,claytotmeasured,carbonorganicpctmeasured,fragwt25,fragwt520,fragwt2075,fragwt275,wtpct0175,wtpctgt2ws,esp,cecsumcations,cec7,sumbases,basesatsumcations,basesatnh4oac,caco3equivmeasured,extracid,wfifteenbarmeasured,wfifteenbartoclay,adod,layertype,hzname,hznameoriginal,stratextsflag,moistprepstate,texcl
0,10001,0cm_9cm,0,120.0,0.0,30.0,6.7,6.0,5.5,2.5,4.0,32.000000,23.4,67.400002,13.2,12.5,25.700001,6.900000,0.49,9.0,20.0,5.0,34.0,63.0,34.0,4.0,11.4,7.0,9.1,80.0,100.0,,2.3,5.9,0.86,1.007,Horizon,A,A,NO,Air-dry,Fine sandy loam
1,10001,10cm_19cm,10,120.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,10001,20cm_29cm,20,120.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,10001,30cm_39cm,30,120.0,30.0,91.0,6.7,6.1,5.4,3.5,6.2,34.099998,22.9,72.099998,10.5,9.5,20.000000,7.900000,0.32,9.0,34.0,12.0,55.0,77.0,55.0,5.0,10.5,8.0,9.0,86.0,100.0,0.0,1.5,6.2,0.78,1.009,Horizon,Bw1,Bw1,NO,Air-dry,Fine sandy loam
4,10001,40cm_49cm,40,120.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
748600,87769,80cm_89cm,80,120.0,86.0,120.0,4.9,4.7,0.2,0.7,1.3,2.900000,3.5,8.600000,10.1,44.5,54.599998,36.799999,,4.0,1.0,3.0,8.0,13.0,8.0,0.0,2.5,,0.1,4.0,1.0,,2.4,21.1,0.57,1.016,Horizon,Cd,Cd,NO,Air-dry,Silty clay loam
748601,87769,90cm_99cm,90,120.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
748602,87769,100cm_109cm,100,120.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
748603,87769,110cm_119cm,110,120.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [71]:
ncsslayerlab_weighted_complete['max_depth_bin_final'] = ncsslayerlab_weighted_complete['max_depth_bin_final'].astype('int')

##### Fill bins

In [72]:
def fill_bins_func(df_in):
    # break into variable "family" of dataframes
    labels_list = ['ph1to1h2o',
                    'ph01mcacl2',
                    'hzdept',
                    'hzdepb',
                    'sandvcmeasured',
                    'sandcomeasured',
                    'sandmedmeasured',
                    'sandfinemeasured',
                    'sandvfmeasured',
                    'sandtotmeasured',
                    'siltcomeasured',
                    'siltfinemeasured',
                    'silttotmeasured',
                    'claytotmeasured',
                    'carbonorganicpctmeasured',
                    'fragwt25',
                    'fragwt520',
                    'fragwt2075',
                    'fragwt275',
                    'wtpct0175',
                    'wtpctgt2ws',
                    'esp',
                    'cecsumcations',
                    'cec7',
                    'sumbases',
                    'basesatsumcations',
                    'basesatnh4oac',
                    'caco3equivmeasured',
                    'extracid',
                    'wfifteenbarmeasured',
                    'wfifteenbartoclay',
                    'adod',
                    'layertype',
                    'hzname',
                    'hznameoriginal',
                    'stratextsflag',
                    'moistprepstate',
                    'texcl']
    for label in labels_list:
        startTime = datetime.now()
        print(label)       
        #### check when switching to new pedon and that first row of pedon is Null(maybe groupby)
        df_in[label] = df_in.groupby('ncsspedonlabdataiidref')[label].fillna(method='ffill')
        df_in.loc[df_in['lower_band']>df_in['max_depth_bin_final'],label]=np.nan
        print('Time elapsed...{}'.format(datetime.now() - startTime))    
        print(datetime.now() - startTime)

    return(df_in)

In [73]:
ncsslayerlab_weighted_filled = fill_bins_func(ncsslayerlab_weighted_complete)

ph1to1h2o
Time elapsed...0:00:11.042660
0:00:11.044230
ph01mcacl2
Time elapsed...0:00:10.776870
0:00:10.777019
hzdept
Time elapsed...0:00:10.451753
0:00:10.451876
hzdepb
Time elapsed...0:00:10.462007
0:00:10.462158
sandvcmeasured
Time elapsed...0:00:10.063613
0:00:10.063768
sandcomeasured
Time elapsed...0:00:11.433114
0:00:11.433250
sandmedmeasured
Time elapsed...0:00:14.551439
0:00:14.551615
sandfinemeasured
Time elapsed...0:00:14.013287
0:00:14.013470
sandvfmeasured
Time elapsed...0:00:12.520259
0:00:12.520416
sandtotmeasured
Time elapsed...0:00:11.614544
0:00:11.614697
siltcomeasured
Time elapsed...0:00:10.992648
0:00:10.992816
siltfinemeasured
Time elapsed...0:00:11.933714
0:00:11.933860
silttotmeasured
Time elapsed...0:00:11.375917
0:00:11.376035
claytotmeasured
Time elapsed...0:00:11.138649
0:00:11.138804
carbonorganicpctmeasured
Time elapsed...0:00:11.180148
0:00:11.180342
fragwt25
Time elapsed...0:00:11.314900
0:00:11.315063
fragwt520
Time elapsed...0:00:11.286145
0:00:11.28638

In [74]:
ncsslayerlab_weighted_filled.head()

Unnamed: 0,ncsspedonlabdataiidref,hzdept_bin,lower_band,max_depth_bin_final,hzdept,hzdepb,ph1to1h2o,ph01mcacl2,sandvcmeasured,sandcomeasured,sandmedmeasured,sandfinemeasured,sandvfmeasured,sandtotmeasured,siltcomeasured,siltfinemeasured,silttotmeasured,claytotmeasured,carbonorganicpctmeasured,fragwt25,fragwt520,fragwt2075,fragwt275,wtpct0175,wtpctgt2ws,esp,cecsumcations,cec7,sumbases,basesatsumcations,basesatnh4oac,caco3equivmeasured,extracid,wfifteenbarmeasured,wfifteenbartoclay,adod,layertype,hzname,hznameoriginal,stratextsflag,moistprepstate,texcl
0,10001,0cm_9cm,0,120,0.0,30.0,6.7,6.0,5.5,2.5,4.0,32.0,23.4,67.400002,13.2,12.5,25.700001,6.9,0.49,9.0,20.0,5.0,34.0,63.0,34.0,4.0,11.4,7.0,9.1,80.0,100.0,,2.3,5.9,0.86,1.007,Horizon,A,A,NO,Air-dry,Fine sandy loam
1,10001,10cm_19cm,10,120,0.0,30.0,6.7,6.0,5.5,2.5,4.0,32.0,23.4,67.400002,13.2,12.5,25.700001,6.9,0.49,9.0,20.0,5.0,34.0,63.0,34.0,4.0,11.4,7.0,9.1,80.0,100.0,,2.3,5.9,0.86,1.007,Horizon,A,A,NO,Air-dry,Fine sandy loam
2,10001,20cm_29cm,20,120,0.0,30.0,6.7,6.0,5.5,2.5,4.0,32.0,23.4,67.400002,13.2,12.5,25.700001,6.9,0.49,9.0,20.0,5.0,34.0,63.0,34.0,4.0,11.4,7.0,9.1,80.0,100.0,,2.3,5.9,0.86,1.007,Horizon,A,A,NO,Air-dry,Fine sandy loam
3,10001,30cm_39cm,30,120,30.0,91.0,6.7,6.1,5.4,3.5,6.2,34.099998,22.9,72.099998,10.5,9.5,20.0,7.9,0.32,9.0,34.0,12.0,55.0,77.0,55.0,5.0,10.5,8.0,9.0,86.0,100.0,0.0,1.5,6.2,0.78,1.009,Horizon,Bw1,Bw1,NO,Air-dry,Fine sandy loam
4,10001,40cm_49cm,40,120,30.0,91.0,6.7,6.1,5.4,3.5,6.2,34.099998,22.9,72.099998,10.5,9.5,20.0,7.9,0.32,9.0,34.0,12.0,55.0,77.0,55.0,5.0,10.5,8.0,9.0,86.0,100.0,0.0,1.5,6.2,0.78,1.009,Horizon,Bw1,Bw1,NO,Air-dry,Fine sandy loam


##### Pivot

In [75]:
def pivot_table_func(df_in):
    phorizon_df_pivoted = df_in.pivot(index='ncsspedonlabdataiidref', columns='hzdept_bin')
    # fix headers
    phorizon_df_pivoted.columns = ["_".join((i, j)) for i,j in phorizon_df_pivoted.columns]
    phorizon_df_pivoted.reset_index(inplace=True)
    
    return(phorizon_df_pivoted)

In [76]:
ncsslayerlab_weighted_filled_pivot = pivot_table_func(ncsslayerlab_weighted_filled)
ncsslayerlab_weighted_filled_pivot

Unnamed: 0,ncsspedonlabdataiidref,lower_band_0cm_9cm,lower_band_100cm_109cm,lower_band_10cm_19cm,lower_band_110cm_119cm,lower_band_120+cm,lower_band_20cm_29cm,lower_band_30cm_39cm,lower_band_40cm_49cm,lower_band_50cm_59cm,lower_band_60cm_69cm,lower_band_70cm_79cm,lower_band_80cm_89cm,lower_band_90cm_99cm,max_depth_bin_final_0cm_9cm,max_depth_bin_final_100cm_109cm,max_depth_bin_final_10cm_19cm,max_depth_bin_final_110cm_119cm,max_depth_bin_final_120+cm,max_depth_bin_final_20cm_29cm,max_depth_bin_final_30cm_39cm,max_depth_bin_final_40cm_49cm,max_depth_bin_final_50cm_59cm,max_depth_bin_final_60cm_69cm,max_depth_bin_final_70cm_79cm,max_depth_bin_final_80cm_89cm,max_depth_bin_final_90cm_99cm,hzdept_0cm_9cm,hzdept_100cm_109cm,hzdept_10cm_19cm,hzdept_110cm_119cm,hzdept_120+cm,hzdept_20cm_29cm,hzdept_30cm_39cm,hzdept_40cm_49cm,hzdept_50cm_59cm,hzdept_60cm_69cm,hzdept_70cm_79cm,hzdept_80cm_89cm,hzdept_90cm_99cm,hzdepb_0cm_9cm,hzdepb_100cm_109cm,hzdepb_10cm_19cm,hzdepb_110cm_119cm,hzdepb_120+cm,hzdepb_20cm_29cm,hzdepb_30cm_39cm,hzdepb_40cm_49cm,hzdepb_50cm_59cm,hzdepb_60cm_69cm,hzdepb_70cm_79cm,hzdepb_80cm_89cm,hzdepb_90cm_99cm,ph1to1h2o_0cm_9cm,ph1to1h2o_100cm_109cm,ph1to1h2o_10cm_19cm,ph1to1h2o_110cm_119cm,ph1to1h2o_120+cm,ph1to1h2o_20cm_29cm,ph1to1h2o_30cm_39cm,ph1to1h2o_40cm_49cm,ph1to1h2o_50cm_59cm,ph1to1h2o_60cm_69cm,ph1to1h2o_70cm_79cm,ph1to1h2o_80cm_89cm,ph1to1h2o_90cm_99cm,ph01mcacl2_0cm_9cm,ph01mcacl2_100cm_109cm,ph01mcacl2_10cm_19cm,ph01mcacl2_110cm_119cm,ph01mcacl2_120+cm,ph01mcacl2_20cm_29cm,ph01mcacl2_30cm_39cm,ph01mcacl2_40cm_49cm,ph01mcacl2_50cm_59cm,ph01mcacl2_60cm_69cm,ph01mcacl2_70cm_79cm,ph01mcacl2_80cm_89cm,ph01mcacl2_90cm_99cm,sandvcmeasured_0cm_9cm,sandvcmeasured_100cm_109cm,sandvcmeasured_10cm_19cm,sandvcmeasured_110cm_119cm,sandvcmeasured_120+cm,sandvcmeasured_20cm_29cm,sandvcmeasured_30cm_39cm,sandvcmeasured_40cm_49cm,sandvcmeasured_50cm_59cm,sandvcmeasured_60cm_69cm,sandvcmeasured_70cm_79cm,sandvcmeasured_80cm_89cm,sandvcmeasured_90cm_99cm,sandcomeasured_0cm_9cm,sandcomeasured_100cm_109cm,sandcomeasured_10cm_19cm,sandcomeasured_110cm_119cm,sandcomeasured_120+cm,sandcomeasured_20cm_29cm,sandcomeasured_30cm_39cm,sandcomeasured_40cm_49cm,sandcomeasured_50cm_59cm,sandcomeasured_60cm_69cm,sandcomeasured_70cm_79cm,sandcomeasured_80cm_89cm,sandcomeasured_90cm_99cm,sandmedmeasured_0cm_9cm,sandmedmeasured_100cm_109cm,sandmedmeasured_10cm_19cm,sandmedmeasured_110cm_119cm,sandmedmeasured_120+cm,sandmedmeasured_20cm_29cm,sandmedmeasured_30cm_39cm,sandmedmeasured_40cm_49cm,sandmedmeasured_50cm_59cm,sandmedmeasured_60cm_69cm,sandmedmeasured_70cm_79cm,sandmedmeasured_80cm_89cm,sandmedmeasured_90cm_99cm,sandfinemeasured_0cm_9cm,sandfinemeasured_100cm_109cm,sandfinemeasured_10cm_19cm,sandfinemeasured_110cm_119cm,sandfinemeasured_120+cm,sandfinemeasured_20cm_29cm,sandfinemeasured_30cm_39cm,sandfinemeasured_40cm_49cm,sandfinemeasured_50cm_59cm,sandfinemeasured_60cm_69cm,sandfinemeasured_70cm_79cm,sandfinemeasured_80cm_89cm,sandfinemeasured_90cm_99cm,sandvfmeasured_0cm_9cm,sandvfmeasured_100cm_109cm,sandvfmeasured_10cm_19cm,sandvfmeasured_110cm_119cm,sandvfmeasured_120+cm,sandvfmeasured_20cm_29cm,sandvfmeasured_30cm_39cm,sandvfmeasured_40cm_49cm,sandvfmeasured_50cm_59cm,sandvfmeasured_60cm_69cm,sandvfmeasured_70cm_79cm,sandvfmeasured_80cm_89cm,sandvfmeasured_90cm_99cm,sandtotmeasured_0cm_9cm,sandtotmeasured_100cm_109cm,sandtotmeasured_10cm_19cm,sandtotmeasured_110cm_119cm,sandtotmeasured_120+cm,sandtotmeasured_20cm_29cm,sandtotmeasured_30cm_39cm,sandtotmeasured_40cm_49cm,sandtotmeasured_50cm_59cm,sandtotmeasured_60cm_69cm,sandtotmeasured_70cm_79cm,sandtotmeasured_80cm_89cm,sandtotmeasured_90cm_99cm,siltcomeasured_0cm_9cm,siltcomeasured_100cm_109cm,siltcomeasured_10cm_19cm,siltcomeasured_110cm_119cm,siltcomeasured_120+cm,siltcomeasured_20cm_29cm,siltcomeasured_30cm_39cm,siltcomeasured_40cm_49cm,siltcomeasured_50cm_59cm,siltcomeasured_60cm_69cm,siltcomeasured_70cm_79cm,siltcomeasured_80cm_89cm,siltcomeasured_90cm_99cm,siltfinemeasured_0cm_9cm,siltfinemeasured_100cm_109cm,siltfinemeasured_10cm_19cm,siltfinemeasured_110cm_119cm,siltfinemeasured_120+cm,siltfinemeasured_20cm_29cm,siltfinemeasured_30cm_39cm,siltfinemeasured_40cm_49cm,siltfinemeasured_50cm_59cm,siltfinemeasured_60cm_69cm,siltfinemeasured_70cm_79cm,siltfinemeasured_80cm_89cm,siltfinemeasured_90cm_99cm,silttotmeasured_0cm_9cm,silttotmeasured_100cm_109cm,silttotmeasured_10cm_19cm,silttotmeasured_110cm_119cm,silttotmeasured_120+cm,silttotmeasured_20cm_29cm,silttotmeasured_30cm_39cm,silttotmeasured_40cm_49cm,silttotmeasured_50cm_59cm,silttotmeasured_60cm_69cm,silttotmeasured_70cm_79cm,silttotmeasured_80cm_89cm,silttotmeasured_90cm_99cm,claytotmeasured_0cm_9cm,claytotmeasured_100cm_109cm,claytotmeasured_10cm_19cm,claytotmeasured_110cm_119cm,claytotmeasured_120+cm,claytotmeasured_20cm_29cm,claytotmeasured_30cm_39cm,claytotmeasured_40cm_49cm,claytotmeasured_50cm_59cm,claytotmeasured_60cm_69cm,claytotmeasured_70cm_79cm,claytotmeasured_80cm_89cm,claytotmeasured_90cm_99cm,carbonorganicpctmeasured_0cm_9cm,carbonorganicpctmeasured_100cm_109cm,carbonorganicpctmeasured_10cm_19cm,carbonorganicpctmeasured_110cm_119cm,carbonorganicpctmeasured_120+cm,carbonorganicpctmeasured_20cm_29cm,carbonorganicpctmeasured_30cm_39cm,carbonorganicpctmeasured_40cm_49cm,carbonorganicpctmeasured_50cm_59cm,carbonorganicpctmeasured_60cm_69cm,carbonorganicpctmeasured_70cm_79cm,carbonorganicpctmeasured_80cm_89cm,carbonorganicpctmeasured_90cm_99cm,fragwt25_0cm_9cm,fragwt25_100cm_109cm,fragwt25_10cm_19cm,fragwt25_110cm_119cm,fragwt25_120+cm,fragwt25_20cm_29cm,fragwt25_30cm_39cm,fragwt25_40cm_49cm,fragwt25_50cm_59cm,fragwt25_60cm_69cm,fragwt25_70cm_79cm,fragwt25_80cm_89cm,fragwt25_90cm_99cm,fragwt520_0cm_9cm,fragwt520_100cm_109cm,fragwt520_10cm_19cm,fragwt520_110cm_119cm,fragwt520_120+cm,fragwt520_20cm_29cm,fragwt520_30cm_39cm,fragwt520_40cm_49cm,fragwt520_50cm_59cm,fragwt520_60cm_69cm,fragwt520_70cm_79cm,fragwt520_80cm_89cm,fragwt520_90cm_99cm,fragwt2075_0cm_9cm,fragwt2075_100cm_109cm,...,fragwt275_70cm_79cm,fragwt275_80cm_89cm,fragwt275_90cm_99cm,wtpct0175_0cm_9cm,wtpct0175_100cm_109cm,wtpct0175_10cm_19cm,wtpct0175_110cm_119cm,wtpct0175_120+cm,wtpct0175_20cm_29cm,wtpct0175_30cm_39cm,wtpct0175_40cm_49cm,wtpct0175_50cm_59cm,wtpct0175_60cm_69cm,wtpct0175_70cm_79cm,wtpct0175_80cm_89cm,wtpct0175_90cm_99cm,wtpctgt2ws_0cm_9cm,wtpctgt2ws_100cm_109cm,wtpctgt2ws_10cm_19cm,wtpctgt2ws_110cm_119cm,wtpctgt2ws_120+cm,wtpctgt2ws_20cm_29cm,wtpctgt2ws_30cm_39cm,wtpctgt2ws_40cm_49cm,wtpctgt2ws_50cm_59cm,wtpctgt2ws_60cm_69cm,wtpctgt2ws_70cm_79cm,wtpctgt2ws_80cm_89cm,wtpctgt2ws_90cm_99cm,esp_0cm_9cm,esp_100cm_109cm,esp_10cm_19cm,esp_110cm_119cm,esp_120+cm,esp_20cm_29cm,esp_30cm_39cm,esp_40cm_49cm,esp_50cm_59cm,esp_60cm_69cm,esp_70cm_79cm,esp_80cm_89cm,esp_90cm_99cm,cecsumcations_0cm_9cm,cecsumcations_100cm_109cm,cecsumcations_10cm_19cm,cecsumcations_110cm_119cm,cecsumcations_120+cm,cecsumcations_20cm_29cm,cecsumcations_30cm_39cm,cecsumcations_40cm_49cm,cecsumcations_50cm_59cm,cecsumcations_60cm_69cm,cecsumcations_70cm_79cm,cecsumcations_80cm_89cm,cecsumcations_90cm_99cm,cec7_0cm_9cm,cec7_100cm_109cm,cec7_10cm_19cm,cec7_110cm_119cm,cec7_120+cm,cec7_20cm_29cm,cec7_30cm_39cm,cec7_40cm_49cm,cec7_50cm_59cm,cec7_60cm_69cm,cec7_70cm_79cm,cec7_80cm_89cm,cec7_90cm_99cm,sumbases_0cm_9cm,sumbases_100cm_109cm,sumbases_10cm_19cm,sumbases_110cm_119cm,sumbases_120+cm,sumbases_20cm_29cm,sumbases_30cm_39cm,sumbases_40cm_49cm,sumbases_50cm_59cm,sumbases_60cm_69cm,sumbases_70cm_79cm,sumbases_80cm_89cm,sumbases_90cm_99cm,basesatsumcations_0cm_9cm,basesatsumcations_100cm_109cm,basesatsumcations_10cm_19cm,basesatsumcations_110cm_119cm,basesatsumcations_120+cm,basesatsumcations_20cm_29cm,basesatsumcations_30cm_39cm,basesatsumcations_40cm_49cm,basesatsumcations_50cm_59cm,basesatsumcations_60cm_69cm,basesatsumcations_70cm_79cm,basesatsumcations_80cm_89cm,basesatsumcations_90cm_99cm,basesatnh4oac_0cm_9cm,basesatnh4oac_100cm_109cm,basesatnh4oac_10cm_19cm,basesatnh4oac_110cm_119cm,basesatnh4oac_120+cm,basesatnh4oac_20cm_29cm,basesatnh4oac_30cm_39cm,basesatnh4oac_40cm_49cm,basesatnh4oac_50cm_59cm,basesatnh4oac_60cm_69cm,basesatnh4oac_70cm_79cm,basesatnh4oac_80cm_89cm,basesatnh4oac_90cm_99cm,caco3equivmeasured_0cm_9cm,caco3equivmeasured_100cm_109cm,caco3equivmeasured_10cm_19cm,caco3equivmeasured_110cm_119cm,caco3equivmeasured_120+cm,caco3equivmeasured_20cm_29cm,caco3equivmeasured_30cm_39cm,caco3equivmeasured_40cm_49cm,caco3equivmeasured_50cm_59cm,caco3equivmeasured_60cm_69cm,caco3equivmeasured_70cm_79cm,caco3equivmeasured_80cm_89cm,caco3equivmeasured_90cm_99cm,extracid_0cm_9cm,extracid_100cm_109cm,extracid_10cm_19cm,extracid_110cm_119cm,extracid_120+cm,extracid_20cm_29cm,extracid_30cm_39cm,extracid_40cm_49cm,extracid_50cm_59cm,extracid_60cm_69cm,extracid_70cm_79cm,extracid_80cm_89cm,extracid_90cm_99cm,wfifteenbarmeasured_0cm_9cm,wfifteenbarmeasured_100cm_109cm,wfifteenbarmeasured_10cm_19cm,wfifteenbarmeasured_110cm_119cm,wfifteenbarmeasured_120+cm,wfifteenbarmeasured_20cm_29cm,wfifteenbarmeasured_30cm_39cm,wfifteenbarmeasured_40cm_49cm,wfifteenbarmeasured_50cm_59cm,wfifteenbarmeasured_60cm_69cm,wfifteenbarmeasured_70cm_79cm,wfifteenbarmeasured_80cm_89cm,wfifteenbarmeasured_90cm_99cm,wfifteenbartoclay_0cm_9cm,wfifteenbartoclay_100cm_109cm,wfifteenbartoclay_10cm_19cm,wfifteenbartoclay_110cm_119cm,wfifteenbartoclay_120+cm,wfifteenbartoclay_20cm_29cm,wfifteenbartoclay_30cm_39cm,wfifteenbartoclay_40cm_49cm,wfifteenbartoclay_50cm_59cm,wfifteenbartoclay_60cm_69cm,wfifteenbartoclay_70cm_79cm,wfifteenbartoclay_80cm_89cm,wfifteenbartoclay_90cm_99cm,adod_0cm_9cm,adod_100cm_109cm,adod_10cm_19cm,adod_110cm_119cm,adod_120+cm,adod_20cm_29cm,adod_30cm_39cm,adod_40cm_49cm,adod_50cm_59cm,adod_60cm_69cm,adod_70cm_79cm,adod_80cm_89cm,adod_90cm_99cm,layertype_0cm_9cm,layertype_100cm_109cm,layertype_10cm_19cm,layertype_110cm_119cm,layertype_120+cm,layertype_20cm_29cm,layertype_30cm_39cm,layertype_40cm_49cm,layertype_50cm_59cm,layertype_60cm_69cm,layertype_70cm_79cm,layertype_80cm_89cm,layertype_90cm_99cm,hzname_0cm_9cm,hzname_100cm_109cm,hzname_10cm_19cm,hzname_110cm_119cm,hzname_120+cm,hzname_20cm_29cm,hzname_30cm_39cm,hzname_40cm_49cm,hzname_50cm_59cm,hzname_60cm_69cm,hzname_70cm_79cm,hzname_80cm_89cm,hzname_90cm_99cm,hznameoriginal_0cm_9cm,hznameoriginal_100cm_109cm,hznameoriginal_10cm_19cm,hznameoriginal_110cm_119cm,hznameoriginal_120+cm,hznameoriginal_20cm_29cm,hznameoriginal_30cm_39cm,hznameoriginal_40cm_49cm,hznameoriginal_50cm_59cm,hznameoriginal_60cm_69cm,hznameoriginal_70cm_79cm,hznameoriginal_80cm_89cm,hznameoriginal_90cm_99cm,stratextsflag_0cm_9cm,stratextsflag_100cm_109cm,stratextsflag_10cm_19cm,stratextsflag_110cm_119cm,stratextsflag_120+cm,stratextsflag_20cm_29cm,stratextsflag_30cm_39cm,stratextsflag_40cm_49cm,stratextsflag_50cm_59cm,stratextsflag_60cm_69cm,stratextsflag_70cm_79cm,stratextsflag_80cm_89cm,stratextsflag_90cm_99cm,moistprepstate_0cm_9cm,moistprepstate_100cm_109cm,moistprepstate_10cm_19cm,moistprepstate_110cm_119cm,moistprepstate_120+cm,moistprepstate_20cm_29cm,moistprepstate_30cm_39cm,moistprepstate_40cm_49cm,moistprepstate_50cm_59cm,moistprepstate_60cm_69cm,moistprepstate_70cm_79cm,moistprepstate_80cm_89cm,moistprepstate_90cm_99cm,texcl_0cm_9cm,texcl_100cm_109cm,texcl_10cm_19cm,texcl_110cm_119cm,texcl_120+cm,texcl_20cm_29cm,texcl_30cm_39cm,texcl_40cm_49cm,texcl_50cm_59cm,texcl_60cm_69cm,texcl_70cm_79cm,texcl_80cm_89cm,texcl_90cm_99cm
0,10001,0,100,10,110,120,20,30,40,50,60,70,80,90,120,120,120,120,120,120,120,120,120,120,120,120,120,0.000000,91.0,0.0,91.0,91.0000,0.0,30.0,30.0,30.0,30.0,30.0,30.0,91.0,30.000000,122.0,30.0,122.0,122.0000,30.0,91.0,91.0,91.0,91.0,91.0,91.0,122.0,6.700000,6.5,6.7,6.5,6.500000,6.7,6.7,6.7,6.7,6.7,6.7,6.7,6.5,6.000000,5.8,6.0,5.8,5.800000,6.0,6.1,6.1,6.1,6.1,6.1,6.1,5.8,5.500000,5.3,5.5,5.3,5.3000,5.5,5.4,5.4,5.4,5.4,5.4,5.4,5.3,2.500000,3.1,2.5,3.1,3.10,2.5,3.5,3.5,3.5,3.5,3.5,3.5,3.1,4.000000,4.5,4.0,4.5,4.50000,4.0,6.2,6.2,6.2,6.2,6.2,6.2,4.5,32.000000,24.299999,32.0,24.299999,24.299999,32.0,34.099998,34.099998,34.099998,34.099998,34.099998,34.099998,24.299999,23.400000,20.100000,23.4,20.100000,20.100,23.4,22.900000,22.900000,22.900000,22.900000,22.900000,22.900000,20.100000,67.400002,57.299999,67.400002,57.299999,57.299999,67.400002,72.099998,72.099998,72.099998,72.099998,72.099998,72.099998,57.299999,13.200000,10.400000,13.200000,10.400000,10.40000,13.200000,10.500000,10.500000,10.500000,10.500000,10.500000,10.500000,10.400000,12.500000,11.400000,12.500000,11.4,11.40000,12.500000,9.500000,9.500000,9.500000,9.500000,9.500000,9.500000,11.400000,25.700001,21.799999,25.700001,21.799999,21.799999,25.700001,20.000000,20.000000,20.000000,20.000000,20.000000,20.000000,21.799999,6.900000,20.900000,6.900000,20.900000,20.900000,6.900000,7.900000,7.900000,7.900000,7.900000,7.900000,7.900000,20.900000,0.49,0.19,0.49,0.19,0.1900,0.49,0.32,0.32,0.32,0.32,0.32,0.32,0.19,9.000000,9.0,9.0,9.0,9.00,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,20.000000,23.0,20.0,23.0,23.000,20.0,34.0,34.0,34.0,34.0,34.0,34.0,23.0,5.0,7.0,...,55.0,55.0,39.0,63.000000,62.0,63.0,62.0,62.00,63.0,77.0,77.0,77.0,77.0,77.0,77.0,62.0,34.000000,39.0,34.0,39.0,39.000,34.0,55.0,55.0,55.0,55.0,55.0,55.0,39.0,4.0,2.0,4.0,2.0,2.0000,4.0,5.0,5.0,5.0,5.0,5.0,5.0,2.0,11.400000,27.100000,11.400000,27.100000,27.100,11.400000,10.500000,10.500000,10.500000,10.500000,10.500000,10.500000,27.100000,7.000000,23.100000,7.000000,23.100000,23.10,7.000000,8.000000,8.000000,8.000000,8.000000,8.000000,8.000000,23.100000,9.100000,22.6,9.1,22.6,22.60000,9.1,9.0,9.0,9.0,9.0,9.0,9.0,22.6,80.000000,83.0,80.0,83.0,83.000,80.0,86.0,86.0,86.0,86.0,86.0,86.0,83.0,100.000000,98.0,100.0,98.0,98.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,98.0,,0.0,,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.300000,4.5,2.300000,4.5,4.50000,2.300000,1.500000,1.500000,1.500000,1.500000,1.500000,1.5,4.5,5.900000,13.4,5.9,13.4,13.4,5.9,6.2,6.200000,6.200000,6.200000,6.200000,6.2,13.4,0.860000,0.64,0.86,0.64,0.6400,0.86,0.78,0.78,0.78,0.78,0.78,0.78,0.64,1.007000,1.028,1.007,1.028,1.028000,1.007,1.009,1.009,1.009,1.009,1.009,1.009,1.028,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,A,Bw2,A,Bw2,Bw2,A,Bw1,Bw1,Bw1,Bw1,Bw1,Bw1,Bw2,A,Bw2,A,Bw2,Bw2,A,Bw1,Bw1,Bw1,Bw1,Bw1,Bw1,Bw2,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Fine sandy loam,Sandy clay loam,Fine sandy loam,Sandy clay loam,Sandy clay loam,Fine sandy loam,Fine sandy loam,Fine sandy loam,Fine sandy loam,Fine sandy loam,Fine sandy loam,Fine sandy loam,Sandy clay loam
1,10002,0,100,10,110,120,20,30,40,50,60,70,80,90,110,110,110,110,110,110,110,110,110,110,110,110,110,0.000000,91.0,0.0,91.0,,0.0,0.0,46.0,46.0,46.0,46.0,46.0,91.0,46.000000,117.0,46.0,117.0,,46.0,46.0,91.0,91.0,91.0,91.0,91.0,117.0,5.800000,6.4,5.8,6.4,,5.8,5.8,6.4,6.4,6.4,6.4,6.4,6.4,5.200000,5.8,5.2,5.8,,5.2,5.2,5.8,5.8,5.8,5.8,5.8,5.8,0.400000,0.6,0.4,0.6,,0.4,0.4,0.0,0.0,0.0,0.0,0.0,0.6,0.500000,2.1,0.5,2.1,,0.5,0.5,0.1,0.1,0.1,0.1,0.1,2.1,1.400000,2.4,1.4,2.4,,1.4,1.4,0.6,0.6,0.6,0.6,0.6,2.4,13.600000,10.300000,13.6,10.300000,,13.6,13.600000,13.300000,13.300000,13.300000,13.300000,13.300000,10.300000,27.900000,31.000000,27.9,31.000000,,27.9,27.900000,37.599998,37.599998,37.599998,37.599998,37.599998,31.000000,43.799999,46.400002,43.799999,46.400002,,43.799999,43.799999,51.599998,51.599998,51.599998,51.599998,51.599998,46.400002,23.299999,22.400000,23.299999,22.400000,,23.299999,23.299999,21.900000,21.900000,21.900000,21.900000,21.900000,22.400000,20.900000,15.700000,20.900000,15.7,,20.900000,20.900000,15.600000,15.600000,15.600000,15.600000,15.600000,15.700000,44.200001,38.099998,44.200001,38.099998,,44.200001,44.200001,37.500000,37.500000,37.500000,37.500000,37.500000,38.099998,12.000000,15.500000,12.000000,15.500000,,12.000000,12.000000,10.900000,10.900000,10.900000,10.900000,10.900000,15.500000,1.14,0.53,1.14,0.53,,1.14,1.14,0.46,0.46,0.46,0.46,0.46,0.53,1.000000,6.0,1.0,6.0,,1.0,1.0,2.0,2.0,2.0,2.0,2.0,6.0,1.000000,36.0,1.0,36.0,,1.0,1.0,29.0,29.0,29.0,29.0,29.0,36.0,0.0,33.0,...,33.0,33.0,75.0,18.000000,79.0,18.0,79.0,,18.0,18.0,43.0,43.0,43.0,43.0,43.0,79.0,2.000000,75.0,2.0,75.0,,2.0,2.0,33.0,33.0,33.0,33.0,33.0,75.0,3.0,2.0,3.0,2.0,,3.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0,19.700001,34.599998,19.700001,34.599998,,19.700001,19.700001,25.799999,25.799999,25.799999,25.799999,25.799999,34.599998,15.100000,30.200001,15.100000,30.200001,,15.100000,15.100000,22.700001,22.700001,22.700001,22.700001,22.700001,30.200001,13.700000,29.5,13.7,29.5,,13.7,13.7,21.9,21.9,21.9,21.9,21.9,29.5,70.000000,85.0,70.0,85.0,,70.0,70.0,85.0,85.0,85.0,85.0,85.0,85.0,91.000000,98.0,91.0,98.0,,91.0,91.0,96.0,96.0,96.0,96.0,96.0,98.0,,,,,,,,,,,,,,6.000000,5.1,6.000000,5.1,,6.000000,6.000000,3.900000,3.900000,3.900000,3.900000,3.9,5.1,10.200000,15.5,10.2,15.5,,10.2,10.2,13.200000,13.200000,13.200000,13.200000,13.2,15.5,0.850000,1.00,0.85,1.00,,0.85,0.85,1.21,1.21,1.21,1.21,1.21,1.00,1.016000,1.036,1.016,1.036,,1.016,1.016,1.026,1.026,1.026,1.026,1.026,1.036,Horizon,Horizon,Horizon,Horizon,,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,A,Bw2,A,Bw2,,A,A,Bw1,Bw1,Bw1,Bw1,Bw1,Bw2,A,Bw2,A,Bw2,,A,A,Bw1,Bw1,Bw1,Bw1,Bw1,Bw2,NO,NO,NO,NO,,NO,NO,NO,NO,NO,NO,NO,NO,Air-dry,Air-dry,Air-dry,Air-dry,,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Loam,Loam,Loam,Loam,,Loam,Loam,Loam,Loam,Loam,Loam,Loam,Loam
2,10003,0,100,10,110,120,20,30,40,50,60,70,80,90,120,120,120,120,120,120,120,120,120,120,120,120,120,0.000000,68.0,10.0,68.0,171.5625,10.0,10.0,10.0,10.0,68.0,68.0,68.0,68.0,10.000000,140.0,68.0,140.0,188.4375,68.0,68.0,68.0,68.0,140.0,140.0,140.0,140.0,5.200000,6.7,5.6,6.7,7.088033,5.6,5.6,5.6,5.6,6.7,6.7,6.7,6.7,4.700000,5.9,4.9,5.9,6.288034,4.9,4.9,4.9,4.9,5.9,5.9,5.9,5.9,0.400000,0.5,0.3,0.5,0.2625,0.3,0.3,0.3,0.3,0.5,0.5,0.5,0.5,0.900000,0.9,0.9,0.9,0.45,0.9,0.9,0.9,0.9,0.9,0.9,0.9,0.9,1.900000,1.1,1.0,1.1,0.88125,1.0,1.0,1.0,1.0,1.1,1.1,1.1,1.1,5.000000,3.500000,3.9,3.500000,3.500000,3.9,3.900000,3.900000,3.900000,3.500000,3.500000,3.500000,3.500000,16.299999,17.700001,16.4,17.700001,20.325,16.4,16.400000,16.400000,16.400000,17.700001,17.700001,17.700001,17.700001,24.500000,23.700001,22.500000,23.700001,25.418750,22.500000,22.500000,22.500000,22.500000,23.700001,23.700001,23.700001,23.700001,34.400002,36.799999,34.200001,36.799999,36.15625,34.200001,34.200001,34.200001,34.200001,36.799999,36.799999,36.799999,36.799999,21.700001,22.100000,24.500000,22.1,23.68125,24.500000,24.500000,24.500000,24.500000,22.100000,22.100000,22.100000,22.100000,56.099998,58.900002,58.700001,58.900002,59.837500,58.700001,58.700001,58.700001,58.700001,58.900002,58.900002,58.900002,58.900002,19.400000,17.400000,18.799999,17.400000,14.743750,18.799999,18.799999,18.799999,18.799999,17.400000,17.400000,17.400000,17.400000,3.86,0.49,1.10,0.49,0.1525,1.10,1.10,1.10,1.10,0.49,0.49,0.49,0.49,2.000000,1.0,3.0,1.0,4.25,3.0,3.0,3.0,3.0,1.0,1.0,1.0,1.0,0.000000,0.0,1.0,0.0,1.375,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,10.000000,7.0,10.0,7.0,10.25,10.0,10.0,10.0,10.0,7.0,7.0,7.0,7.0,2.000000,1.0,4.0,1.0,5.625,4.0,4.0,4.0,4.0,1.0,1.0,1.0,1.0,1.0,4.0,2.0,4.0,4.0625,2.0,2.0,2.0,2.0,4.0,4.0,4.0,4.0,26.799999,20.400000,22.900000,20.400000,15.375,22.900000,22.900000,22.900000,22.900000,20.400000,20.400000,20.400000,20.400000,20.400000,15.500000,16.799999,15.500000,11.95,16.799999,16.799999,16.799999,16.799999,15.500000,15.500000,15.500000,15.500000,15.100000,16.3,16.0,16.3,13.39375,16.0,16.0,16.0,16.0,16.3,16.3,16.3,16.3,56.000000,80.0,70.0,80.0,87.125,70.0,70.0,70.0,70.0,80.0,80.0,80.0,80.0,74.000000,100.0,95.0,100.0,100.0,95.0,95.0,95.0,95.0,100.0,100.0,100.0,100.0,,,,,,,,,,,,,,11.700000,4.1,6.900000,4.1,1.98125,6.900000,6.900000,6.900000,6.900000,4.100000,4.100000,4.1,4.1,14.800000,8.7,9.0,8.7,7.0,9.0,9.0,9.000000,9.000000,8.700000,8.700000,8.7,8.7,0.760000,0.50,0.48,0.50,0.4825,0.48,0.48,0.48,0.48,0.50,0.50,0.50,0.50,1.018000,1.015,1.016,1.015,1.012375,1.016,1.016,1.016,1.016,1.015,1.015,1.015,1.015,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,A1,Bt,A2,Bt,2Bw,A2,A2,A2,A2,Bt,Bt,Bt,Bt,A1,Bt,A2,Bt,2Bw,A2,A2,A2,A2,Bt,Bt,Bt,Bt,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam
3,10004,0,100,10,110,120,20,30,40,50,60,70,80,90,100,100,100,100,100,100,100,100,100,100,100,100,100,0.000000,76.0,0.0,,,0.0,30.0,30.0,30.0,30.0,76.0,76.0,76.0,30.000000,107.0,30.0,,,30.0,76.0,76.0,76.0,76.0,107.0,107.0,107.0,6.700000,5.9,6.7,,,6.7,6.0,6.0,6.0,6.0,5.9,5.9,5.9,6.100000,5.4,6.1,,,6.1,5.3,5.3,5.3,5.3,5.4,5.4,5.4,3.900000,4.8,3.9,,,3.9,4.1,4.1,4.1,4.1,4.8,4.8,4.8,3.500000,4.3,3.5,,,3.5,3.7,3.7,3.7,3.7,4.3,4.3,4.3,3.600000,4.6,3.6,,,3.6,3.6,3.6,3.6,3.6,4.6,4.6,4.6,7.600000,8.000000,7.6,,,7.6,7.400000,7.400000,7.400000,7.400000,8.000000,8.000000,8.000000,15.100000,16.000000,15.1,,,15.1,17.299999,17.299999,17.299999,17.299999,16.000000,16.000000,16.000000,33.700001,37.700001,33.700001,,,33.700001,36.099998,36.099998,36.099998,36.099998,37.700001,37.700001,37.700001,27.600000,24.299999,27.600000,,,27.600000,26.799999,26.799999,26.799999,26.799999,24.299999,24.299999,24.299999,27.299999,22.600000,27.299999,,,27.299999,26.100000,26.100000,26.100000,26.100000,22.600000,22.600000,22.600000,54.900002,46.900002,54.900002,,,54.900002,52.900002,52.900002,52.900002,52.900002,46.900002,46.900002,46.900002,11.400000,15.400000,11.400000,,,11.400000,11.000000,11.000000,11.000000,11.000000,15.400000,15.400000,15.400000,5.21,0.87,5.21,,,5.21,0.78,0.78,0.78,0.78,0.87,0.87,0.87,12.000000,12.0,12.0,,,12.0,9.0,9.0,9.0,9.0,12.0,12.0,12.0,10.000000,12.0,10.0,,,10.0,6.0,6.0,6.0,6.0,12.0,12.0,12.0,0.0,5.0,...,29.0,29.0,29.0,37.000000,44.0,37.0,,,37.0,31.0,31.0,31.0,31.0,44.0,44.0,44.0,22.000000,29.0,22.0,,,22.0,15.0,15.0,15.0,15.0,29.0,29.0,29.0,1.0,2.0,1.0,,,1.0,3.0,3.0,3.0,3.0,2.0,2.0,2.0,30.799999,15.900000,30.799999,,,30.799999,16.799999,16.799999,16.799999,16.799999,15.900000,15.900000,15.900000,24.000000,12.700000,24.000000,,,24.000000,11.400000,11.400000,11.400000,11.400000,12.700000,12.700000,12.700000,24.300000,11.4,24.3,,,24.3,12.0,12.0,12.0,12.0,11.4,11.4,11.4,79.000000,72.0,79.0,,,79.0,71.0,71.0,71.0,71.0,72.0,72.0,72.0,100.000000,90.0,100.0,,,100.0,100.0,100.0,100.0,100.0,90.0,90.0,90.0,,,,,,,,,,,,,,6.500000,4.5,6.500000,,,6.500000,4.800000,4.800000,4.800000,4.800000,4.500000,4.5,4.5,14.200000,8.5,14.2,,,14.2,7.3,7.300000,7.300000,7.300000,8.500000,8.5,8.5,1.250000,0.55,1.25,,,1.25,0.66,0.66,0.66,0.66,0.55,0.55,0.55,1.019000,1.012,1.019,,,1.019,1.011,1.011,1.011,1.011,1.012,1.012,1.012,Horizon,Horizon,Horizon,,,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,A,Bw2,A,,,A,Bw1,Bw1,Bw1,Bw1,Bw2,Bw2,Bw2,A,Bw2,A,,,A,Bw1,Bw1,Bw1,Bw1,Bw2,Bw2,Bw2,NO,NO,NO,,,NO,NO,NO,NO,NO,NO,NO,NO,Air-dry,Air-dry,Air-dry,,,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Silt loam,Loam,Silt loam,,,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Loam,Loam,Loam
4,10005,0,100,10,110,120,20,30,40,50,60,70,80,90,100,100,100,100,100,100,100,100,100,100,100,100,100,0.000000,81.0,18.0,,,18.0,18.0,18.0,18.0,18.0,18.0,81.0,81.0,18.000000,107.0,81.0,,,81.0,81.0,81.0,81.0,81.0,81.0,107.0,107.0,5.500000,5.6,5.4,,,5.4,5.4,5.4,5.4,5.4,5.4,5.6,5.6,4.900000,4.9,4.7,,,4.7,4.7,4.7,4.7,4.7,4.7,4.9,4.9,0.900000,0.4,0.6,,,0.6,0.6,0.6,0.6,0.6,0.6,0.4,0.4,0.800000,0.8,0.9,,,0.9,0.9,0.9,0.9,0.9,0.9,0.8,0.8,1.800000,1.3,1.3,,,1.3,1.3,1.3,1.3,1.3,1.3,1.3,1.3,3.300000,3.600000,3.4,,,3.4,3.400000,3.400000,3.400000,3.400000,3.400000,3.600000,3.600000,21.200001,22.100000,21.9,,,21.9,21.900000,21.900000,21.900000,21.900000,21.900000,22.100000,22.100000,28.000000,28.200001,28.100000,,,28.100000,28.100000,28.100000,28.100000,28.100000,28.100000,28.200001,28.200001,34.299999,33.599998,33.500000,,,33.500000,33.500000,33.500000,33.500000,33.500000,33.500000,33.599998,33.599998,19.500000,19.799999,20.700001,,,20.700001,20.700001,20.700001,20.700001,20.700001,20.700001,19.799999,19.799999,53.799999,53.400002,54.200001,,,54.200001,54.200001,54.200001,54.200001,54.200001,54.200001,53.400002,53.400002,18.200001,18.400000,17.700001,,,17.700001,17.700001,17.700001,17.700001,17.700001,17.700001,18.400000,18.400000,2.03,0.33,0.91,,,0.91,0.91,0.91,0.91,0.91,0.91,0.33,0.33,2.000000,2.0,2.0,,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.000000,4.0,2.0,,,2.0,2.0,2.0,2.0,2.0,2.0,4.0,4.0,0.0,0.0,...,4.0,6.0,6.0,11.000000,12.0,10.0,,,10.0,10.0,10.0,10.0,10.0,10.0,12.0,12.0,4.000000,6.0,4.0,,,4.0,4.0,4.0,4.0,4.0,4.0,6.0,6.0,2.0,3.0,4.0,,,4.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,17.900000,13.100000,15.200000,,,15.200000,15.200000,15.200000,15.200000,15.200000,15.200000,13.100000,13.100000,12.400000,10.400000,10.500000,,,10.500000,10.500000,10.500000,10.500000,10.500000,10.500000,10.400000,10.400000,7.600000,8.0,6.3,,,6.3,6.3,6.3,6.3,6.3,6.3,8.0,8.0,42.000000,61.0,41.0,,,41.0,41.0,41.0,41.0,41.0,41.0,61.0,61.0,61.000000,77.0,60.0,,,60.0,60.0,60.0,60.0,60.0,60.0,77.0,77.0,,,,,,,,,,,,,,10.300000,5.1,8.900000,,,8.900000,8.900000,8.900000,8.900000,8.900000,8.900000,5.1,5.1,9.500000,7.7,8.5,,,8.5,8.5,8.500000,8.500000,8.500000,8.500000,7.7,7.7,0.520000,0.42,0.48,,,0.48,0.48,0.48,0.48,0.48,0.48,0.42,0.42,1.012000,1.010,1.011,,,1.011,1.011,1.011,1.011,1.011,1.011,1.010,1.010,Horizon,Horizon,Horizon,,,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,A,Bw,Bt,,,Bt,Bt,Bt,Bt,Bt,Bt,Bw,Bw,A,Bw,Bt,,,Bt,Bt,Bt,Bt,Bt,Bt,Bw,Bw,NO,NO,NO,,,NO,NO,NO,NO,NO,NO,NO,NO,Air-dry,Air-dry,Air-dry,,,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Silt loam,Silt loam,Silt loam,,,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57580,87764,0,100,10,110,120,20,30,40,50,60,70,80,90,60,60,60,60,60,60,60,60,60,60,60,60,60,2.461538,,13.0,,,13.0,13.0,40.0,40.0,40.0,,,,10.538462,,40.0,,,40.0,40.0,60.0,60.0,60.0,,,,6.677795,,6.6,,,6.6,6.6,5.8,5.8,5.8,,,,5.900000,,5.7,,,5.7,5.7,4.8,4.8,4.8,,,,4.863637,,2.8,,,2.8,2.8,6.5,6.5,6.5,,,,6.818182,,5.2,,,5.2,5.2,8.4,8.4,8.4,,,,3.681818,,3.8,,,3.8,3.8,7.9,7.9,7.9,,,,3.445455,,4.4,,,4.4,4.400000,6.900000,6.900000,6.900000,,,,8.709091,,9.6,,,9.6,9.600000,6.100000,6.100000,6.100000,,,,27.518182,,25.799999,,,25.799999,25.799999,35.799999,35.799999,35.799999,,,,21.890909,,24.500000,,,24.500000,24.500000,21.299999,21.299999,21.299999,,,,43.536364,,45.099998,,,45.099998,45.099998,33.500000,33.500000,33.500000,,,,65.427271,,69.599998,,,69.599998,69.599998,54.799999,54.799999,54.799999,,,,7.054546,,4.600000,,,4.600000,4.600000,9.400000,9.400000,9.400000,,,,,,,,,,,,,,,,,3.846154,,3.0,,,3.0,3.0,3.0,3.0,3.0,,,,0.000000,,0.0,,,0.0,0.0,1.0,1.0,1.0,,,,0.0,,...,,,,19.153846,,19.0,,,19.0,19.0,33.0,33.0,33.0,,,,3.846154,,3.0,,,3.0,3.0,4.0,4.0,4.0,,,,0.0,,0.0,,,0.0,0.0,0.0,0.0,0.0,,,,38.081818,,36.099998,,,36.099998,36.099998,11.600000,11.600000,11.600000,,,,,,,,,,,,,,,,,11.781818,,6.0,,,6.0,6.0,5.1,5.1,5.1,,,,28.363636,,17.0,,,17.0,17.0,44.0,44.0,44.0,,,,52.727273,,39.0,,,39.0,39.0,76.0,76.0,76.0,,,,,,,,,,,,,,,,,32.253845,,30.100000,,,30.100000,30.100000,6.500000,6.500000,6.500000,,,,25.538462,,12.3,,,12.3,12.3,5.800000,5.800000,5.800000,,,,1.981818,,2.67,,,2.67,2.67,0.62,0.62,0.62,,,,1.048769,,1.041,,,1.041,1.041,1.013,1.013,1.013,,,,Horizon,,Horizon,,,Horizon,Horizon,Horizon,Horizon,Horizon,,,,Bw1,,Bw2,,,Bw2,Bw2,2Bt,2Bt,2Bt,,,,Bw1,,Bw2,,,Bw2,Bw2,2Bt,2Bt,2Bt,,,,NO,,NO,,,NO,NO,NO,NO,NO,,,,Air-dry,,Air-dry,,,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,,,,Silt loam,,Silt loam,,,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,,,
57581,87765,0,100,10,110,120,20,30,40,50,60,70,80,90,40,40,40,40,40,40,40,40,40,40,40,40,40,3.142857,,14.0,,,29.0,29.0,29.0,,,,,,10.857143,,29.0,,,45.0,45.0,45.0,,,,,,6.280744,,6.1,,,6.2,6.2,6.2,,,,,,5.673763,,5.4,,,5.2,5.2,5.2,,,,,,3.900000,,4.4,,,4.0,4.0,4.0,,,,,,4.916666,,6.4,,,6.0,6.0,6.0,,,,,,5.566667,,7.7,,,8.4,8.4,8.4,,,,,,5.883333,,7.6,,,7.7,7.700000,7.700000,,,,,,8.800000,,8.6,,,7.9,7.900000,7.900000,,,,,,29.066667,,34.700001,,,34.000000,34.000000,34.000000,,,,,,24.600000,,24.100000,,,25.700001,25.700001,25.700001,,,,,,37.649999,,32.500000,,,31.700001,31.700001,31.700001,,,,,,62.250001,,56.599998,,,57.400002,57.400002,57.400002,,,,,,8.683333,,8.700000,,,8.600000,8.600000,8.600000,,,,,,,,,,,,,,,,,,,4.000000,,5.0,,,4.0,4.0,4.0,,,,,,1.714286,,2.0,,,3.0,3.0,3.0,,,,,,0.0,,...,,,,21.714286,,31.0,,,31.0,31.0,31.0,,,,,,5.714286,,7.0,,,7.0,7.0,7.0,,,,,,0.0,,0.0,,,0.0,0.0,0.0,,,,,,25.116666,,19.100000,,,10.000000,10.000000,10.000000,,,,,,,,,,,,,,,,,,,9.416667,,6.2,,,5.1,5.1,5.1,,,,,,36.333333,,32.0,,,51.0,51.0,51.0,,,,,,62.166667,,56.0,,,88.0,88.0,88.0,,,,,,,,,,,,,,,,,,,20.071429,,12.900000,,,4.900000,4.900000,4.900000,,,,,,16.271429,,8.9,,,5.7,5.7,5.700000,,,,,,1.325000,,1.02,,,0.66,0.66,0.66,,,,,,1.035571,,1.026,,,1.013,1.013,1.013,,,,,,Horizon,,Horizon,,,Horizon,Horizon,Horizon,,,,,,Bw1,,Bw2,,,2Bt,2Bt,2Bt,,,,,,Bw1,,Bw2,,,2Bt,2Bt,2Bt,,,,,,NO,,NO,,,NO,NO,NO,,,,,,Air-dry,,Air-dry,,,Air-dry,Air-dry,Air-dry,,,,,,Silt loam,,Silt loam,,,Silt loam,Silt loam,Silt loam,,,,,
57582,87766,0,100,10,110,120,20,30,40,50,60,70,80,90,10,10,10,10,10,10,10,10,10,10,10,10,10,0.000000,,0.0,,,,,,,,,,,15.000000,,15.0,,,,,,,,,,,4.100000,,4.1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,1.0,,,,,,,,,,,29.200001,,29.200001,,,,,,,,,,,31.100000,,31.100000,,,,,,,,,,,29.200000,,29.2,,,,,,,,,,,100.000000,,100.0,,,,,,,,,,,94.000000,,94.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Reporting layer,,Reporting layer,,,,,,,,,,,Oi,,Oi,,,,,,,,,,,,,,,,,,,,,,,,NO,,NO,,,,,,,,,,,Air-dry,,Air-dry,,,,,,,,,,,,,,,,,,,,,,,
57583,87767,0,100,10,110,120,20,30,40,50,60,70,80,90,30,30,30,30,30,30,30,30,30,30,30,30,30,0.000000,,0.0,,,0.0,0.0,,,,,,,32.000000,,32.0,,,32.0,32.0,,,,,,,4.100000,,4.1,,,4.1,4.1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,0.0,,,0.0,0.0,,,,,,,30.200001,,30.200001,,,30.200001,30.200001,,,,,,,33.799999,,33.799999,,,33.799999,33.799999,,,,,,,30.200000,,30.2,,,30.2,30.2,,,,,,,100.000000,,100.0,,,100.0,100.0,,,,,,,89.000000,,89.0,,,89.0,89.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Reporting layer,,Reporting layer,,,Reporting layer,Reporting layer,,,,,,,Oe,,Oe,,,Oe,Oe,,,,,,,,,,,,,,,,,,,,NO,,NO,,,NO,NO,,,,,,,Air-dry,,Air-dry,,,Air-dry,Air-dry,,,,,,,,,,,,,,,,,,,


In [77]:
# Drop columns that are not needed

ncsslayerlab_weighted_filled_pivot_final = ncsslayerlab_weighted_filled_pivot.drop(columns = [
 'lower_band_0cm_9cm',
 'lower_band_100cm_109cm',
 'lower_band_10cm_19cm',
 'lower_band_110cm_119cm',
 'lower_band_120+cm',
 'lower_band_20cm_29cm',
 'lower_band_30cm_39cm',
 'lower_band_40cm_49cm',
 'lower_band_50cm_59cm',
 'lower_band_60cm_69cm',
 'lower_band_70cm_79cm',
 'lower_band_80cm_89cm',
 'lower_band_90cm_99cm',
 'max_depth_bin_final_0cm_9cm',
 'max_depth_bin_final_100cm_109cm',
 'max_depth_bin_final_10cm_19cm',
 'max_depth_bin_final_110cm_119cm',
 'max_depth_bin_final_120+cm',
 'max_depth_bin_final_20cm_29cm',
 'max_depth_bin_final_30cm_39cm',
 'max_depth_bin_final_40cm_49cm',
 'max_depth_bin_final_50cm_59cm',
 'max_depth_bin_final_60cm_69cm',
 'max_depth_bin_final_70cm_79cm',
 'max_depth_bin_final_80cm_89cm',
 'max_depth_bin_final_90cm_99cm'])

ncsslayerlab_weighted_filled_pivot_final

Unnamed: 0,ncsspedonlabdataiidref,hzdept_0cm_9cm,hzdept_100cm_109cm,hzdept_10cm_19cm,hzdept_110cm_119cm,hzdept_120+cm,hzdept_20cm_29cm,hzdept_30cm_39cm,hzdept_40cm_49cm,hzdept_50cm_59cm,hzdept_60cm_69cm,hzdept_70cm_79cm,hzdept_80cm_89cm,hzdept_90cm_99cm,hzdepb_0cm_9cm,hzdepb_100cm_109cm,hzdepb_10cm_19cm,hzdepb_110cm_119cm,hzdepb_120+cm,hzdepb_20cm_29cm,hzdepb_30cm_39cm,hzdepb_40cm_49cm,hzdepb_50cm_59cm,hzdepb_60cm_69cm,hzdepb_70cm_79cm,hzdepb_80cm_89cm,hzdepb_90cm_99cm,ph1to1h2o_0cm_9cm,ph1to1h2o_100cm_109cm,ph1to1h2o_10cm_19cm,ph1to1h2o_110cm_119cm,ph1to1h2o_120+cm,ph1to1h2o_20cm_29cm,ph1to1h2o_30cm_39cm,ph1to1h2o_40cm_49cm,ph1to1h2o_50cm_59cm,ph1to1h2o_60cm_69cm,ph1to1h2o_70cm_79cm,ph1to1h2o_80cm_89cm,ph1to1h2o_90cm_99cm,ph01mcacl2_0cm_9cm,ph01mcacl2_100cm_109cm,ph01mcacl2_10cm_19cm,ph01mcacl2_110cm_119cm,ph01mcacl2_120+cm,ph01mcacl2_20cm_29cm,ph01mcacl2_30cm_39cm,ph01mcacl2_40cm_49cm,ph01mcacl2_50cm_59cm,ph01mcacl2_60cm_69cm,ph01mcacl2_70cm_79cm,ph01mcacl2_80cm_89cm,ph01mcacl2_90cm_99cm,sandvcmeasured_0cm_9cm,sandvcmeasured_100cm_109cm,sandvcmeasured_10cm_19cm,sandvcmeasured_110cm_119cm,sandvcmeasured_120+cm,sandvcmeasured_20cm_29cm,sandvcmeasured_30cm_39cm,sandvcmeasured_40cm_49cm,sandvcmeasured_50cm_59cm,sandvcmeasured_60cm_69cm,sandvcmeasured_70cm_79cm,sandvcmeasured_80cm_89cm,sandvcmeasured_90cm_99cm,sandcomeasured_0cm_9cm,sandcomeasured_100cm_109cm,sandcomeasured_10cm_19cm,sandcomeasured_110cm_119cm,sandcomeasured_120+cm,sandcomeasured_20cm_29cm,sandcomeasured_30cm_39cm,sandcomeasured_40cm_49cm,sandcomeasured_50cm_59cm,sandcomeasured_60cm_69cm,sandcomeasured_70cm_79cm,sandcomeasured_80cm_89cm,sandcomeasured_90cm_99cm,sandmedmeasured_0cm_9cm,sandmedmeasured_100cm_109cm,sandmedmeasured_10cm_19cm,sandmedmeasured_110cm_119cm,sandmedmeasured_120+cm,sandmedmeasured_20cm_29cm,sandmedmeasured_30cm_39cm,sandmedmeasured_40cm_49cm,sandmedmeasured_50cm_59cm,sandmedmeasured_60cm_69cm,sandmedmeasured_70cm_79cm,sandmedmeasured_80cm_89cm,sandmedmeasured_90cm_99cm,sandfinemeasured_0cm_9cm,sandfinemeasured_100cm_109cm,sandfinemeasured_10cm_19cm,sandfinemeasured_110cm_119cm,sandfinemeasured_120+cm,sandfinemeasured_20cm_29cm,sandfinemeasured_30cm_39cm,sandfinemeasured_40cm_49cm,sandfinemeasured_50cm_59cm,sandfinemeasured_60cm_69cm,sandfinemeasured_70cm_79cm,sandfinemeasured_80cm_89cm,sandfinemeasured_90cm_99cm,sandvfmeasured_0cm_9cm,sandvfmeasured_100cm_109cm,sandvfmeasured_10cm_19cm,sandvfmeasured_110cm_119cm,sandvfmeasured_120+cm,sandvfmeasured_20cm_29cm,sandvfmeasured_30cm_39cm,sandvfmeasured_40cm_49cm,sandvfmeasured_50cm_59cm,sandvfmeasured_60cm_69cm,sandvfmeasured_70cm_79cm,sandvfmeasured_80cm_89cm,sandvfmeasured_90cm_99cm,sandtotmeasured_0cm_9cm,sandtotmeasured_100cm_109cm,sandtotmeasured_10cm_19cm,sandtotmeasured_110cm_119cm,sandtotmeasured_120+cm,sandtotmeasured_20cm_29cm,sandtotmeasured_30cm_39cm,sandtotmeasured_40cm_49cm,sandtotmeasured_50cm_59cm,sandtotmeasured_60cm_69cm,sandtotmeasured_70cm_79cm,sandtotmeasured_80cm_89cm,sandtotmeasured_90cm_99cm,siltcomeasured_0cm_9cm,siltcomeasured_100cm_109cm,siltcomeasured_10cm_19cm,siltcomeasured_110cm_119cm,siltcomeasured_120+cm,siltcomeasured_20cm_29cm,siltcomeasured_30cm_39cm,siltcomeasured_40cm_49cm,siltcomeasured_50cm_59cm,siltcomeasured_60cm_69cm,siltcomeasured_70cm_79cm,siltcomeasured_80cm_89cm,siltcomeasured_90cm_99cm,siltfinemeasured_0cm_9cm,siltfinemeasured_100cm_109cm,siltfinemeasured_10cm_19cm,siltfinemeasured_110cm_119cm,siltfinemeasured_120+cm,siltfinemeasured_20cm_29cm,siltfinemeasured_30cm_39cm,siltfinemeasured_40cm_49cm,siltfinemeasured_50cm_59cm,siltfinemeasured_60cm_69cm,siltfinemeasured_70cm_79cm,siltfinemeasured_80cm_89cm,siltfinemeasured_90cm_99cm,silttotmeasured_0cm_9cm,silttotmeasured_100cm_109cm,silttotmeasured_10cm_19cm,silttotmeasured_110cm_119cm,silttotmeasured_120+cm,silttotmeasured_20cm_29cm,silttotmeasured_30cm_39cm,silttotmeasured_40cm_49cm,silttotmeasured_50cm_59cm,silttotmeasured_60cm_69cm,silttotmeasured_70cm_79cm,silttotmeasured_80cm_89cm,silttotmeasured_90cm_99cm,claytotmeasured_0cm_9cm,claytotmeasured_100cm_109cm,claytotmeasured_10cm_19cm,claytotmeasured_110cm_119cm,claytotmeasured_120+cm,claytotmeasured_20cm_29cm,claytotmeasured_30cm_39cm,claytotmeasured_40cm_49cm,claytotmeasured_50cm_59cm,claytotmeasured_60cm_69cm,claytotmeasured_70cm_79cm,claytotmeasured_80cm_89cm,claytotmeasured_90cm_99cm,carbonorganicpctmeasured_0cm_9cm,carbonorganicpctmeasured_100cm_109cm,carbonorganicpctmeasured_10cm_19cm,carbonorganicpctmeasured_110cm_119cm,carbonorganicpctmeasured_120+cm,carbonorganicpctmeasured_20cm_29cm,carbonorganicpctmeasured_30cm_39cm,carbonorganicpctmeasured_40cm_49cm,carbonorganicpctmeasured_50cm_59cm,carbonorganicpctmeasured_60cm_69cm,carbonorganicpctmeasured_70cm_79cm,carbonorganicpctmeasured_80cm_89cm,carbonorganicpctmeasured_90cm_99cm,fragwt25_0cm_9cm,fragwt25_100cm_109cm,fragwt25_10cm_19cm,fragwt25_110cm_119cm,fragwt25_120+cm,fragwt25_20cm_29cm,fragwt25_30cm_39cm,fragwt25_40cm_49cm,fragwt25_50cm_59cm,fragwt25_60cm_69cm,fragwt25_70cm_79cm,fragwt25_80cm_89cm,fragwt25_90cm_99cm,fragwt520_0cm_9cm,fragwt520_100cm_109cm,fragwt520_10cm_19cm,fragwt520_110cm_119cm,fragwt520_120+cm,fragwt520_20cm_29cm,fragwt520_30cm_39cm,fragwt520_40cm_49cm,fragwt520_50cm_59cm,fragwt520_60cm_69cm,fragwt520_70cm_79cm,fragwt520_80cm_89cm,fragwt520_90cm_99cm,fragwt2075_0cm_9cm,fragwt2075_100cm_109cm,fragwt2075_10cm_19cm,fragwt2075_110cm_119cm,fragwt2075_120+cm,fragwt2075_20cm_29cm,fragwt2075_30cm_39cm,fragwt2075_40cm_49cm,fragwt2075_50cm_59cm,fragwt2075_60cm_69cm,fragwt2075_70cm_79cm,fragwt2075_80cm_89cm,fragwt2075_90cm_99cm,fragwt275_0cm_9cm,fragwt275_100cm_109cm,fragwt275_10cm_19cm,fragwt275_110cm_119cm,fragwt275_120+cm,fragwt275_20cm_29cm,fragwt275_30cm_39cm,fragwt275_40cm_49cm,fragwt275_50cm_59cm,fragwt275_60cm_69cm,fragwt275_70cm_79cm,fragwt275_80cm_89cm,fragwt275_90cm_99cm,wtpct0175_0cm_9cm,wtpct0175_100cm_109cm,wtpct0175_10cm_19cm,wtpct0175_110cm_119cm,wtpct0175_120+cm,wtpct0175_20cm_29cm,wtpct0175_30cm_39cm,wtpct0175_40cm_49cm,wtpct0175_50cm_59cm,wtpct0175_60cm_69cm,wtpct0175_70cm_79cm,wtpct0175_80cm_89cm,wtpct0175_90cm_99cm,wtpctgt2ws_0cm_9cm,wtpctgt2ws_100cm_109cm,wtpctgt2ws_10cm_19cm,wtpctgt2ws_110cm_119cm,wtpctgt2ws_120+cm,wtpctgt2ws_20cm_29cm,wtpctgt2ws_30cm_39cm,wtpctgt2ws_40cm_49cm,wtpctgt2ws_50cm_59cm,wtpctgt2ws_60cm_69cm,wtpctgt2ws_70cm_79cm,wtpctgt2ws_80cm_89cm,wtpctgt2ws_90cm_99cm,esp_0cm_9cm,esp_100cm_109cm,esp_10cm_19cm,esp_110cm_119cm,esp_120+cm,esp_20cm_29cm,esp_30cm_39cm,esp_40cm_49cm,esp_50cm_59cm,esp_60cm_69cm,esp_70cm_79cm,esp_80cm_89cm,esp_90cm_99cm,cecsumcations_0cm_9cm,cecsumcations_100cm_109cm,cecsumcations_10cm_19cm,cecsumcations_110cm_119cm,cecsumcations_120+cm,cecsumcations_20cm_29cm,cecsumcations_30cm_39cm,cecsumcations_40cm_49cm,cecsumcations_50cm_59cm,cecsumcations_60cm_69cm,cecsumcations_70cm_79cm,cecsumcations_80cm_89cm,cecsumcations_90cm_99cm,cec7_0cm_9cm,cec7_100cm_109cm,cec7_10cm_19cm,cec7_110cm_119cm,cec7_120+cm,cec7_20cm_29cm,cec7_30cm_39cm,cec7_40cm_49cm,cec7_50cm_59cm,cec7_60cm_69cm,cec7_70cm_79cm,cec7_80cm_89cm,cec7_90cm_99cm,sumbases_0cm_9cm,sumbases_100cm_109cm,sumbases_10cm_19cm,sumbases_110cm_119cm,sumbases_120+cm,sumbases_20cm_29cm,sumbases_30cm_39cm,sumbases_40cm_49cm,sumbases_50cm_59cm,sumbases_60cm_69cm,sumbases_70cm_79cm,sumbases_80cm_89cm,sumbases_90cm_99cm,basesatsumcations_0cm_9cm,basesatsumcations_100cm_109cm,basesatsumcations_10cm_19cm,basesatsumcations_110cm_119cm,basesatsumcations_120+cm,basesatsumcations_20cm_29cm,basesatsumcations_30cm_39cm,basesatsumcations_40cm_49cm,basesatsumcations_50cm_59cm,basesatsumcations_60cm_69cm,basesatsumcations_70cm_79cm,basesatsumcations_80cm_89cm,basesatsumcations_90cm_99cm,basesatnh4oac_0cm_9cm,basesatnh4oac_100cm_109cm,basesatnh4oac_10cm_19cm,basesatnh4oac_110cm_119cm,basesatnh4oac_120+cm,basesatnh4oac_20cm_29cm,basesatnh4oac_30cm_39cm,basesatnh4oac_40cm_49cm,basesatnh4oac_50cm_59cm,basesatnh4oac_60cm_69cm,basesatnh4oac_70cm_79cm,basesatnh4oac_80cm_89cm,basesatnh4oac_90cm_99cm,caco3equivmeasured_0cm_9cm,caco3equivmeasured_100cm_109cm,caco3equivmeasured_10cm_19cm,caco3equivmeasured_110cm_119cm,caco3equivmeasured_120+cm,caco3equivmeasured_20cm_29cm,caco3equivmeasured_30cm_39cm,caco3equivmeasured_40cm_49cm,caco3equivmeasured_50cm_59cm,caco3equivmeasured_60cm_69cm,caco3equivmeasured_70cm_79cm,caco3equivmeasured_80cm_89cm,caco3equivmeasured_90cm_99cm,extracid_0cm_9cm,extracid_100cm_109cm,extracid_10cm_19cm,extracid_110cm_119cm,extracid_120+cm,extracid_20cm_29cm,extracid_30cm_39cm,extracid_40cm_49cm,extracid_50cm_59cm,extracid_60cm_69cm,extracid_70cm_79cm,extracid_80cm_89cm,extracid_90cm_99cm,wfifteenbarmeasured_0cm_9cm,wfifteenbarmeasured_100cm_109cm,wfifteenbarmeasured_10cm_19cm,wfifteenbarmeasured_110cm_119cm,wfifteenbarmeasured_120+cm,wfifteenbarmeasured_20cm_29cm,wfifteenbarmeasured_30cm_39cm,wfifteenbarmeasured_40cm_49cm,wfifteenbarmeasured_50cm_59cm,wfifteenbarmeasured_60cm_69cm,wfifteenbarmeasured_70cm_79cm,wfifteenbarmeasured_80cm_89cm,wfifteenbarmeasured_90cm_99cm,wfifteenbartoclay_0cm_9cm,wfifteenbartoclay_100cm_109cm,wfifteenbartoclay_10cm_19cm,wfifteenbartoclay_110cm_119cm,wfifteenbartoclay_120+cm,wfifteenbartoclay_20cm_29cm,wfifteenbartoclay_30cm_39cm,wfifteenbartoclay_40cm_49cm,wfifteenbartoclay_50cm_59cm,wfifteenbartoclay_60cm_69cm,wfifteenbartoclay_70cm_79cm,wfifteenbartoclay_80cm_89cm,wfifteenbartoclay_90cm_99cm,adod_0cm_9cm,adod_100cm_109cm,adod_10cm_19cm,adod_110cm_119cm,adod_120+cm,adod_20cm_29cm,adod_30cm_39cm,adod_40cm_49cm,adod_50cm_59cm,adod_60cm_69cm,adod_70cm_79cm,adod_80cm_89cm,adod_90cm_99cm,layertype_0cm_9cm,layertype_100cm_109cm,layertype_10cm_19cm,layertype_110cm_119cm,layertype_120+cm,layertype_20cm_29cm,layertype_30cm_39cm,layertype_40cm_49cm,layertype_50cm_59cm,layertype_60cm_69cm,layertype_70cm_79cm,layertype_80cm_89cm,layertype_90cm_99cm,hzname_0cm_9cm,hzname_100cm_109cm,hzname_10cm_19cm,hzname_110cm_119cm,hzname_120+cm,hzname_20cm_29cm,hzname_30cm_39cm,hzname_40cm_49cm,hzname_50cm_59cm,hzname_60cm_69cm,hzname_70cm_79cm,hzname_80cm_89cm,hzname_90cm_99cm,hznameoriginal_0cm_9cm,hznameoriginal_100cm_109cm,hznameoriginal_10cm_19cm,hznameoriginal_110cm_119cm,hznameoriginal_120+cm,hznameoriginal_20cm_29cm,hznameoriginal_30cm_39cm,hznameoriginal_40cm_49cm,hznameoriginal_50cm_59cm,hznameoriginal_60cm_69cm,hznameoriginal_70cm_79cm,hznameoriginal_80cm_89cm,hznameoriginal_90cm_99cm,stratextsflag_0cm_9cm,stratextsflag_100cm_109cm,stratextsflag_10cm_19cm,stratextsflag_110cm_119cm,stratextsflag_120+cm,stratextsflag_20cm_29cm,stratextsflag_30cm_39cm,stratextsflag_40cm_49cm,stratextsflag_50cm_59cm,stratextsflag_60cm_69cm,stratextsflag_70cm_79cm,stratextsflag_80cm_89cm,stratextsflag_90cm_99cm,moistprepstate_0cm_9cm,moistprepstate_100cm_109cm,moistprepstate_10cm_19cm,moistprepstate_110cm_119cm,moistprepstate_120+cm,moistprepstate_20cm_29cm,moistprepstate_30cm_39cm,moistprepstate_40cm_49cm,moistprepstate_50cm_59cm,moistprepstate_60cm_69cm,moistprepstate_70cm_79cm,moistprepstate_80cm_89cm,moistprepstate_90cm_99cm,texcl_0cm_9cm,texcl_100cm_109cm,texcl_10cm_19cm,texcl_110cm_119cm,texcl_120+cm,texcl_20cm_29cm,texcl_30cm_39cm,texcl_40cm_49cm,texcl_50cm_59cm,texcl_60cm_69cm,texcl_70cm_79cm,texcl_80cm_89cm,texcl_90cm_99cm
0,10001,0.000000,91.0,0.0,91.0,91.0000,0.0,30.0,30.0,30.0,30.0,30.0,30.0,91.0,30.000000,122.0,30.0,122.0,122.0000,30.0,91.0,91.0,91.0,91.0,91.0,91.0,122.0,6.700000,6.5,6.7,6.5,6.500000,6.7,6.7,6.7,6.7,6.7,6.7,6.7,6.5,6.000000,5.8,6.0,5.8,5.800000,6.0,6.1,6.1,6.1,6.1,6.1,6.1,5.8,5.500000,5.3,5.5,5.3,5.3000,5.5,5.4,5.4,5.4,5.4,5.4,5.4,5.3,2.500000,3.1,2.5,3.1,3.10,2.5,3.5,3.5,3.5,3.5,3.5,3.5,3.1,4.000000,4.5,4.0,4.5,4.50000,4.0,6.2,6.2,6.2,6.2,6.2,6.2,4.5,32.000000,24.299999,32.0,24.299999,24.299999,32.0,34.099998,34.099998,34.099998,34.099998,34.099998,34.099998,24.299999,23.400000,20.100000,23.4,20.100000,20.100,23.4,22.900000,22.900000,22.900000,22.900000,22.900000,22.900000,20.100000,67.400002,57.299999,67.400002,57.299999,57.299999,67.400002,72.099998,72.099998,72.099998,72.099998,72.099998,72.099998,57.299999,13.200000,10.400000,13.200000,10.400000,10.40000,13.200000,10.500000,10.500000,10.500000,10.500000,10.500000,10.500000,10.400000,12.500000,11.400000,12.500000,11.4,11.40000,12.500000,9.500000,9.500000,9.500000,9.500000,9.500000,9.500000,11.400000,25.700001,21.799999,25.700001,21.799999,21.799999,25.700001,20.000000,20.000000,20.000000,20.000000,20.000000,20.000000,21.799999,6.900000,20.900000,6.900000,20.900000,20.900000,6.900000,7.900000,7.900000,7.900000,7.900000,7.900000,7.900000,20.900000,0.49,0.19,0.49,0.19,0.1900,0.49,0.32,0.32,0.32,0.32,0.32,0.32,0.19,9.000000,9.0,9.0,9.0,9.00,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,20.000000,23.0,20.0,23.0,23.000,20.0,34.0,34.0,34.0,34.0,34.0,34.0,23.0,5.0,7.0,5.0,7.0,7.0,5.0,12.0,12.0,12.0,12.0,12.0,12.0,7.0,34.000000,39.0,34.0,39.0,39.000,34.0,55.0,55.0,55.0,55.0,55.0,55.0,39.0,63.000000,62.0,63.0,62.0,62.00,63.0,77.0,77.0,77.0,77.0,77.0,77.0,62.0,34.000000,39.0,34.0,39.0,39.000,34.0,55.0,55.0,55.0,55.0,55.0,55.0,39.0,4.0,2.0,4.0,2.0,2.0000,4.0,5.0,5.0,5.0,5.0,5.0,5.0,2.0,11.400000,27.100000,11.400000,27.100000,27.100,11.400000,10.500000,10.500000,10.500000,10.500000,10.500000,10.500000,27.100000,7.000000,23.100000,7.000000,23.100000,23.10,7.000000,8.000000,8.000000,8.000000,8.000000,8.000000,8.000000,23.100000,9.100000,22.6,9.1,22.6,22.60000,9.1,9.0,9.0,9.0,9.0,9.0,9.0,22.6,80.000000,83.0,80.0,83.0,83.000,80.0,86.0,86.0,86.0,86.0,86.0,86.0,83.0,100.000000,98.0,100.0,98.0,98.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,98.0,,0.0,,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.300000,4.5,2.300000,4.5,4.50000,2.300000,1.500000,1.500000,1.500000,1.500000,1.500000,1.5,4.5,5.900000,13.4,5.9,13.4,13.4,5.9,6.2,6.200000,6.200000,6.200000,6.200000,6.2,13.4,0.860000,0.64,0.86,0.64,0.6400,0.86,0.78,0.78,0.78,0.78,0.78,0.78,0.64,1.007000,1.028,1.007,1.028,1.028000,1.007,1.009,1.009,1.009,1.009,1.009,1.009,1.028,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,A,Bw2,A,Bw2,Bw2,A,Bw1,Bw1,Bw1,Bw1,Bw1,Bw1,Bw2,A,Bw2,A,Bw2,Bw2,A,Bw1,Bw1,Bw1,Bw1,Bw1,Bw1,Bw2,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Fine sandy loam,Sandy clay loam,Fine sandy loam,Sandy clay loam,Sandy clay loam,Fine sandy loam,Fine sandy loam,Fine sandy loam,Fine sandy loam,Fine sandy loam,Fine sandy loam,Fine sandy loam,Sandy clay loam
1,10002,0.000000,91.0,0.0,91.0,,0.0,0.0,46.0,46.0,46.0,46.0,46.0,91.0,46.000000,117.0,46.0,117.0,,46.0,46.0,91.0,91.0,91.0,91.0,91.0,117.0,5.800000,6.4,5.8,6.4,,5.8,5.8,6.4,6.4,6.4,6.4,6.4,6.4,5.200000,5.8,5.2,5.8,,5.2,5.2,5.8,5.8,5.8,5.8,5.8,5.8,0.400000,0.6,0.4,0.6,,0.4,0.4,0.0,0.0,0.0,0.0,0.0,0.6,0.500000,2.1,0.5,2.1,,0.5,0.5,0.1,0.1,0.1,0.1,0.1,2.1,1.400000,2.4,1.4,2.4,,1.4,1.4,0.6,0.6,0.6,0.6,0.6,2.4,13.600000,10.300000,13.6,10.300000,,13.6,13.600000,13.300000,13.300000,13.300000,13.300000,13.300000,10.300000,27.900000,31.000000,27.9,31.000000,,27.9,27.900000,37.599998,37.599998,37.599998,37.599998,37.599998,31.000000,43.799999,46.400002,43.799999,46.400002,,43.799999,43.799999,51.599998,51.599998,51.599998,51.599998,51.599998,46.400002,23.299999,22.400000,23.299999,22.400000,,23.299999,23.299999,21.900000,21.900000,21.900000,21.900000,21.900000,22.400000,20.900000,15.700000,20.900000,15.7,,20.900000,20.900000,15.600000,15.600000,15.600000,15.600000,15.600000,15.700000,44.200001,38.099998,44.200001,38.099998,,44.200001,44.200001,37.500000,37.500000,37.500000,37.500000,37.500000,38.099998,12.000000,15.500000,12.000000,15.500000,,12.000000,12.000000,10.900000,10.900000,10.900000,10.900000,10.900000,15.500000,1.14,0.53,1.14,0.53,,1.14,1.14,0.46,0.46,0.46,0.46,0.46,0.53,1.000000,6.0,1.0,6.0,,1.0,1.0,2.0,2.0,2.0,2.0,2.0,6.0,1.000000,36.0,1.0,36.0,,1.0,1.0,29.0,29.0,29.0,29.0,29.0,36.0,0.0,33.0,0.0,33.0,,0.0,0.0,2.0,2.0,2.0,2.0,2.0,33.0,2.000000,75.0,2.0,75.0,,2.0,2.0,33.0,33.0,33.0,33.0,33.0,75.0,18.000000,79.0,18.0,79.0,,18.0,18.0,43.0,43.0,43.0,43.0,43.0,79.0,2.000000,75.0,2.0,75.0,,2.0,2.0,33.0,33.0,33.0,33.0,33.0,75.0,3.0,2.0,3.0,2.0,,3.0,3.0,1.0,1.0,1.0,1.0,1.0,2.0,19.700001,34.599998,19.700001,34.599998,,19.700001,19.700001,25.799999,25.799999,25.799999,25.799999,25.799999,34.599998,15.100000,30.200001,15.100000,30.200001,,15.100000,15.100000,22.700001,22.700001,22.700001,22.700001,22.700001,30.200001,13.700000,29.5,13.7,29.5,,13.7,13.7,21.9,21.9,21.9,21.9,21.9,29.5,70.000000,85.0,70.0,85.0,,70.0,70.0,85.0,85.0,85.0,85.0,85.0,85.0,91.000000,98.0,91.0,98.0,,91.0,91.0,96.0,96.0,96.0,96.0,96.0,98.0,,,,,,,,,,,,,,6.000000,5.1,6.000000,5.1,,6.000000,6.000000,3.900000,3.900000,3.900000,3.900000,3.9,5.1,10.200000,15.5,10.2,15.5,,10.2,10.2,13.200000,13.200000,13.200000,13.200000,13.2,15.5,0.850000,1.00,0.85,1.00,,0.85,0.85,1.21,1.21,1.21,1.21,1.21,1.00,1.016000,1.036,1.016,1.036,,1.016,1.016,1.026,1.026,1.026,1.026,1.026,1.036,Horizon,Horizon,Horizon,Horizon,,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,A,Bw2,A,Bw2,,A,A,Bw1,Bw1,Bw1,Bw1,Bw1,Bw2,A,Bw2,A,Bw2,,A,A,Bw1,Bw1,Bw1,Bw1,Bw1,Bw2,NO,NO,NO,NO,,NO,NO,NO,NO,NO,NO,NO,NO,Air-dry,Air-dry,Air-dry,Air-dry,,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Loam,Loam,Loam,Loam,,Loam,Loam,Loam,Loam,Loam,Loam,Loam,Loam
2,10003,0.000000,68.0,10.0,68.0,171.5625,10.0,10.0,10.0,10.0,68.0,68.0,68.0,68.0,10.000000,140.0,68.0,140.0,188.4375,68.0,68.0,68.0,68.0,140.0,140.0,140.0,140.0,5.200000,6.7,5.6,6.7,7.088033,5.6,5.6,5.6,5.6,6.7,6.7,6.7,6.7,4.700000,5.9,4.9,5.9,6.288034,4.9,4.9,4.9,4.9,5.9,5.9,5.9,5.9,0.400000,0.5,0.3,0.5,0.2625,0.3,0.3,0.3,0.3,0.5,0.5,0.5,0.5,0.900000,0.9,0.9,0.9,0.45,0.9,0.9,0.9,0.9,0.9,0.9,0.9,0.9,1.900000,1.1,1.0,1.1,0.88125,1.0,1.0,1.0,1.0,1.1,1.1,1.1,1.1,5.000000,3.500000,3.9,3.500000,3.500000,3.9,3.900000,3.900000,3.900000,3.500000,3.500000,3.500000,3.500000,16.299999,17.700001,16.4,17.700001,20.325,16.4,16.400000,16.400000,16.400000,17.700001,17.700001,17.700001,17.700001,24.500000,23.700001,22.500000,23.700001,25.418750,22.500000,22.500000,22.500000,22.500000,23.700001,23.700001,23.700001,23.700001,34.400002,36.799999,34.200001,36.799999,36.15625,34.200001,34.200001,34.200001,34.200001,36.799999,36.799999,36.799999,36.799999,21.700001,22.100000,24.500000,22.1,23.68125,24.500000,24.500000,24.500000,24.500000,22.100000,22.100000,22.100000,22.100000,56.099998,58.900002,58.700001,58.900002,59.837500,58.700001,58.700001,58.700001,58.700001,58.900002,58.900002,58.900002,58.900002,19.400000,17.400000,18.799999,17.400000,14.743750,18.799999,18.799999,18.799999,18.799999,17.400000,17.400000,17.400000,17.400000,3.86,0.49,1.10,0.49,0.1525,1.10,1.10,1.10,1.10,0.49,0.49,0.49,0.49,2.000000,1.0,3.0,1.0,4.25,3.0,3.0,3.0,3.0,1.0,1.0,1.0,1.0,0.000000,0.0,1.0,0.0,1.375,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.000000,1.0,4.0,1.0,5.625,4.0,4.0,4.0,4.0,1.0,1.0,1.0,1.0,10.000000,7.0,10.0,7.0,10.25,10.0,10.0,10.0,10.0,7.0,7.0,7.0,7.0,2.000000,1.0,4.0,1.0,5.625,4.0,4.0,4.0,4.0,1.0,1.0,1.0,1.0,1.0,4.0,2.0,4.0,4.0625,2.0,2.0,2.0,2.0,4.0,4.0,4.0,4.0,26.799999,20.400000,22.900000,20.400000,15.375,22.900000,22.900000,22.900000,22.900000,20.400000,20.400000,20.400000,20.400000,20.400000,15.500000,16.799999,15.500000,11.95,16.799999,16.799999,16.799999,16.799999,15.500000,15.500000,15.500000,15.500000,15.100000,16.3,16.0,16.3,13.39375,16.0,16.0,16.0,16.0,16.3,16.3,16.3,16.3,56.000000,80.0,70.0,80.0,87.125,70.0,70.0,70.0,70.0,80.0,80.0,80.0,80.0,74.000000,100.0,95.0,100.0,100.0,95.0,95.0,95.0,95.0,100.0,100.0,100.0,100.0,,,,,,,,,,,,,,11.700000,4.1,6.900000,4.1,1.98125,6.900000,6.900000,6.900000,6.900000,4.100000,4.100000,4.1,4.1,14.800000,8.7,9.0,8.7,7.0,9.0,9.0,9.000000,9.000000,8.700000,8.700000,8.7,8.7,0.760000,0.50,0.48,0.50,0.4825,0.48,0.48,0.48,0.48,0.50,0.50,0.50,0.50,1.018000,1.015,1.016,1.015,1.012375,1.016,1.016,1.016,1.016,1.015,1.015,1.015,1.015,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,A1,Bt,A2,Bt,2Bw,A2,A2,A2,A2,Bt,Bt,Bt,Bt,A1,Bt,A2,Bt,2Bw,A2,A2,A2,A2,Bt,Bt,Bt,Bt,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,NO,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam
3,10004,0.000000,76.0,0.0,,,0.0,30.0,30.0,30.0,30.0,76.0,76.0,76.0,30.000000,107.0,30.0,,,30.0,76.0,76.0,76.0,76.0,107.0,107.0,107.0,6.700000,5.9,6.7,,,6.7,6.0,6.0,6.0,6.0,5.9,5.9,5.9,6.100000,5.4,6.1,,,6.1,5.3,5.3,5.3,5.3,5.4,5.4,5.4,3.900000,4.8,3.9,,,3.9,4.1,4.1,4.1,4.1,4.8,4.8,4.8,3.500000,4.3,3.5,,,3.5,3.7,3.7,3.7,3.7,4.3,4.3,4.3,3.600000,4.6,3.6,,,3.6,3.6,3.6,3.6,3.6,4.6,4.6,4.6,7.600000,8.000000,7.6,,,7.6,7.400000,7.400000,7.400000,7.400000,8.000000,8.000000,8.000000,15.100000,16.000000,15.1,,,15.1,17.299999,17.299999,17.299999,17.299999,16.000000,16.000000,16.000000,33.700001,37.700001,33.700001,,,33.700001,36.099998,36.099998,36.099998,36.099998,37.700001,37.700001,37.700001,27.600000,24.299999,27.600000,,,27.600000,26.799999,26.799999,26.799999,26.799999,24.299999,24.299999,24.299999,27.299999,22.600000,27.299999,,,27.299999,26.100000,26.100000,26.100000,26.100000,22.600000,22.600000,22.600000,54.900002,46.900002,54.900002,,,54.900002,52.900002,52.900002,52.900002,52.900002,46.900002,46.900002,46.900002,11.400000,15.400000,11.400000,,,11.400000,11.000000,11.000000,11.000000,11.000000,15.400000,15.400000,15.400000,5.21,0.87,5.21,,,5.21,0.78,0.78,0.78,0.78,0.87,0.87,0.87,12.000000,12.0,12.0,,,12.0,9.0,9.0,9.0,9.0,12.0,12.0,12.0,10.000000,12.0,10.0,,,10.0,6.0,6.0,6.0,6.0,12.0,12.0,12.0,0.0,5.0,0.0,,,0.0,0.0,0.0,0.0,0.0,5.0,5.0,5.0,22.000000,29.0,22.0,,,22.0,15.0,15.0,15.0,15.0,29.0,29.0,29.0,37.000000,44.0,37.0,,,37.0,31.0,31.0,31.0,31.0,44.0,44.0,44.0,22.000000,29.0,22.0,,,22.0,15.0,15.0,15.0,15.0,29.0,29.0,29.0,1.0,2.0,1.0,,,1.0,3.0,3.0,3.0,3.0,2.0,2.0,2.0,30.799999,15.900000,30.799999,,,30.799999,16.799999,16.799999,16.799999,16.799999,15.900000,15.900000,15.900000,24.000000,12.700000,24.000000,,,24.000000,11.400000,11.400000,11.400000,11.400000,12.700000,12.700000,12.700000,24.300000,11.4,24.3,,,24.3,12.0,12.0,12.0,12.0,11.4,11.4,11.4,79.000000,72.0,79.0,,,79.0,71.0,71.0,71.0,71.0,72.0,72.0,72.0,100.000000,90.0,100.0,,,100.0,100.0,100.0,100.0,100.0,90.0,90.0,90.0,,,,,,,,,,,,,,6.500000,4.5,6.500000,,,6.500000,4.800000,4.800000,4.800000,4.800000,4.500000,4.5,4.5,14.200000,8.5,14.2,,,14.2,7.3,7.300000,7.300000,7.300000,8.500000,8.5,8.5,1.250000,0.55,1.25,,,1.25,0.66,0.66,0.66,0.66,0.55,0.55,0.55,1.019000,1.012,1.019,,,1.019,1.011,1.011,1.011,1.011,1.012,1.012,1.012,Horizon,Horizon,Horizon,,,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,A,Bw2,A,,,A,Bw1,Bw1,Bw1,Bw1,Bw2,Bw2,Bw2,A,Bw2,A,,,A,Bw1,Bw1,Bw1,Bw1,Bw2,Bw2,Bw2,NO,NO,NO,,,NO,NO,NO,NO,NO,NO,NO,NO,Air-dry,Air-dry,Air-dry,,,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Silt loam,Loam,Silt loam,,,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Loam,Loam,Loam
4,10005,0.000000,81.0,18.0,,,18.0,18.0,18.0,18.0,18.0,18.0,81.0,81.0,18.000000,107.0,81.0,,,81.0,81.0,81.0,81.0,81.0,81.0,107.0,107.0,5.500000,5.6,5.4,,,5.4,5.4,5.4,5.4,5.4,5.4,5.6,5.6,4.900000,4.9,4.7,,,4.7,4.7,4.7,4.7,4.7,4.7,4.9,4.9,0.900000,0.4,0.6,,,0.6,0.6,0.6,0.6,0.6,0.6,0.4,0.4,0.800000,0.8,0.9,,,0.9,0.9,0.9,0.9,0.9,0.9,0.8,0.8,1.800000,1.3,1.3,,,1.3,1.3,1.3,1.3,1.3,1.3,1.3,1.3,3.300000,3.600000,3.4,,,3.4,3.400000,3.400000,3.400000,3.400000,3.400000,3.600000,3.600000,21.200001,22.100000,21.9,,,21.9,21.900000,21.900000,21.900000,21.900000,21.900000,22.100000,22.100000,28.000000,28.200001,28.100000,,,28.100000,28.100000,28.100000,28.100000,28.100000,28.100000,28.200001,28.200001,34.299999,33.599998,33.500000,,,33.500000,33.500000,33.500000,33.500000,33.500000,33.500000,33.599998,33.599998,19.500000,19.799999,20.700001,,,20.700001,20.700001,20.700001,20.700001,20.700001,20.700001,19.799999,19.799999,53.799999,53.400002,54.200001,,,54.200001,54.200001,54.200001,54.200001,54.200001,54.200001,53.400002,53.400002,18.200001,18.400000,17.700001,,,17.700001,17.700001,17.700001,17.700001,17.700001,17.700001,18.400000,18.400000,2.03,0.33,0.91,,,0.91,0.91,0.91,0.91,0.91,0.91,0.33,0.33,2.000000,2.0,2.0,,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.000000,4.0,2.0,,,2.0,2.0,2.0,2.0,2.0,2.0,4.0,4.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.000000,6.0,4.0,,,4.0,4.0,4.0,4.0,4.0,4.0,6.0,6.0,11.000000,12.0,10.0,,,10.0,10.0,10.0,10.0,10.0,10.0,12.0,12.0,4.000000,6.0,4.0,,,4.0,4.0,4.0,4.0,4.0,4.0,6.0,6.0,2.0,3.0,4.0,,,4.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,17.900000,13.100000,15.200000,,,15.200000,15.200000,15.200000,15.200000,15.200000,15.200000,13.100000,13.100000,12.400000,10.400000,10.500000,,,10.500000,10.500000,10.500000,10.500000,10.500000,10.500000,10.400000,10.400000,7.600000,8.0,6.3,,,6.3,6.3,6.3,6.3,6.3,6.3,8.0,8.0,42.000000,61.0,41.0,,,41.0,41.0,41.0,41.0,41.0,41.0,61.0,61.0,61.000000,77.0,60.0,,,60.0,60.0,60.0,60.0,60.0,60.0,77.0,77.0,,,,,,,,,,,,,,10.300000,5.1,8.900000,,,8.900000,8.900000,8.900000,8.900000,8.900000,8.900000,5.1,5.1,9.500000,7.7,8.5,,,8.5,8.5,8.500000,8.500000,8.500000,8.500000,7.7,7.7,0.520000,0.42,0.48,,,0.48,0.48,0.48,0.48,0.48,0.48,0.42,0.42,1.012000,1.010,1.011,,,1.011,1.011,1.011,1.011,1.011,1.011,1.010,1.010,Horizon,Horizon,Horizon,,,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,Horizon,A,Bw,Bt,,,Bt,Bt,Bt,Bt,Bt,Bt,Bw,Bw,A,Bw,Bt,,,Bt,Bt,Bt,Bt,Bt,Bt,Bw,Bw,NO,NO,NO,,,NO,NO,NO,NO,NO,NO,NO,NO,Air-dry,Air-dry,Air-dry,,,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,Silt loam,Silt loam,Silt loam,,,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57580,87764,2.461538,,13.0,,,13.0,13.0,40.0,40.0,40.0,,,,10.538462,,40.0,,,40.0,40.0,60.0,60.0,60.0,,,,6.677795,,6.6,,,6.6,6.6,5.8,5.8,5.8,,,,5.900000,,5.7,,,5.7,5.7,4.8,4.8,4.8,,,,4.863637,,2.8,,,2.8,2.8,6.5,6.5,6.5,,,,6.818182,,5.2,,,5.2,5.2,8.4,8.4,8.4,,,,3.681818,,3.8,,,3.8,3.8,7.9,7.9,7.9,,,,3.445455,,4.4,,,4.4,4.400000,6.900000,6.900000,6.900000,,,,8.709091,,9.6,,,9.6,9.600000,6.100000,6.100000,6.100000,,,,27.518182,,25.799999,,,25.799999,25.799999,35.799999,35.799999,35.799999,,,,21.890909,,24.500000,,,24.500000,24.500000,21.299999,21.299999,21.299999,,,,43.536364,,45.099998,,,45.099998,45.099998,33.500000,33.500000,33.500000,,,,65.427271,,69.599998,,,69.599998,69.599998,54.799999,54.799999,54.799999,,,,7.054546,,4.600000,,,4.600000,4.600000,9.400000,9.400000,9.400000,,,,,,,,,,,,,,,,,3.846154,,3.0,,,3.0,3.0,3.0,3.0,3.0,,,,0.000000,,0.0,,,0.0,0.0,1.0,1.0,1.0,,,,0.0,,0.0,,,0.0,0.0,0.0,0.0,0.0,,,,3.846154,,3.0,,,3.0,3.0,4.0,4.0,4.0,,,,19.153846,,19.0,,,19.0,19.0,33.0,33.0,33.0,,,,3.846154,,3.0,,,3.0,3.0,4.0,4.0,4.0,,,,0.0,,0.0,,,0.0,0.0,0.0,0.0,0.0,,,,38.081818,,36.099998,,,36.099998,36.099998,11.600000,11.600000,11.600000,,,,,,,,,,,,,,,,,11.781818,,6.0,,,6.0,6.0,5.1,5.1,5.1,,,,28.363636,,17.0,,,17.0,17.0,44.0,44.0,44.0,,,,52.727273,,39.0,,,39.0,39.0,76.0,76.0,76.0,,,,,,,,,,,,,,,,,32.253845,,30.100000,,,30.100000,30.100000,6.500000,6.500000,6.500000,,,,25.538462,,12.3,,,12.3,12.3,5.800000,5.800000,5.800000,,,,1.981818,,2.67,,,2.67,2.67,0.62,0.62,0.62,,,,1.048769,,1.041,,,1.041,1.041,1.013,1.013,1.013,,,,Horizon,,Horizon,,,Horizon,Horizon,Horizon,Horizon,Horizon,,,,Bw1,,Bw2,,,Bw2,Bw2,2Bt,2Bt,2Bt,,,,Bw1,,Bw2,,,Bw2,Bw2,2Bt,2Bt,2Bt,,,,NO,,NO,,,NO,NO,NO,NO,NO,,,,Air-dry,,Air-dry,,,Air-dry,Air-dry,Air-dry,Air-dry,Air-dry,,,,Silt loam,,Silt loam,,,Silt loam,Silt loam,Silt loam,Silt loam,Silt loam,,,
57581,87765,3.142857,,14.0,,,29.0,29.0,29.0,,,,,,10.857143,,29.0,,,45.0,45.0,45.0,,,,,,6.280744,,6.1,,,6.2,6.2,6.2,,,,,,5.673763,,5.4,,,5.2,5.2,5.2,,,,,,3.900000,,4.4,,,4.0,4.0,4.0,,,,,,4.916666,,6.4,,,6.0,6.0,6.0,,,,,,5.566667,,7.7,,,8.4,8.4,8.4,,,,,,5.883333,,7.6,,,7.7,7.700000,7.700000,,,,,,8.800000,,8.6,,,7.9,7.900000,7.900000,,,,,,29.066667,,34.700001,,,34.000000,34.000000,34.000000,,,,,,24.600000,,24.100000,,,25.700001,25.700001,25.700001,,,,,,37.649999,,32.500000,,,31.700001,31.700001,31.700001,,,,,,62.250001,,56.599998,,,57.400002,57.400002,57.400002,,,,,,8.683333,,8.700000,,,8.600000,8.600000,8.600000,,,,,,,,,,,,,,,,,,,4.000000,,5.0,,,4.0,4.0,4.0,,,,,,1.714286,,2.0,,,3.0,3.0,3.0,,,,,,0.0,,0.0,,,0.0,0.0,0.0,,,,,,5.714286,,7.0,,,7.0,7.0,7.0,,,,,,21.714286,,31.0,,,31.0,31.0,31.0,,,,,,5.714286,,7.0,,,7.0,7.0,7.0,,,,,,0.0,,0.0,,,0.0,0.0,0.0,,,,,,25.116666,,19.100000,,,10.000000,10.000000,10.000000,,,,,,,,,,,,,,,,,,,9.416667,,6.2,,,5.1,5.1,5.1,,,,,,36.333333,,32.0,,,51.0,51.0,51.0,,,,,,62.166667,,56.0,,,88.0,88.0,88.0,,,,,,,,,,,,,,,,,,,20.071429,,12.900000,,,4.900000,4.900000,4.900000,,,,,,16.271429,,8.9,,,5.7,5.7,5.700000,,,,,,1.325000,,1.02,,,0.66,0.66,0.66,,,,,,1.035571,,1.026,,,1.013,1.013,1.013,,,,,,Horizon,,Horizon,,,Horizon,Horizon,Horizon,,,,,,Bw1,,Bw2,,,2Bt,2Bt,2Bt,,,,,,Bw1,,Bw2,,,2Bt,2Bt,2Bt,,,,,,NO,,NO,,,NO,NO,NO,,,,,,Air-dry,,Air-dry,,,Air-dry,Air-dry,Air-dry,,,,,,Silt loam,,Silt loam,,,Silt loam,Silt loam,Silt loam,,,,,
57582,87766,0.000000,,0.0,,,,,,,,,,,15.000000,,15.0,,,,,,,,,,,4.100000,,4.1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,1.0,,,,,,,,,,,29.200001,,29.200001,,,,,,,,,,,31.100000,,31.100000,,,,,,,,,,,29.200000,,29.2,,,,,,,,,,,100.000000,,100.0,,,,,,,,,,,94.000000,,94.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Reporting layer,,Reporting layer,,,,,,,,,,,Oi,,Oi,,,,,,,,,,,,,,,,,,,,,,,,NO,,NO,,,,,,,,,,,Air-dry,,Air-dry,,,,,,,,,,,,,,,,,,,,,,,
57583,87767,0.000000,,0.0,,,0.0,0.0,,,,,,,32.000000,,32.0,,,32.0,32.0,,,,,,,4.100000,,4.1,,,4.1,4.1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,0.0,,,0.0,0.0,,,,,,,30.200001,,30.200001,,,30.200001,30.200001,,,,,,,33.799999,,33.799999,,,33.799999,33.799999,,,,,,,30.200000,,30.2,,,30.2,30.2,,,,,,,100.000000,,100.0,,,100.0,100.0,,,,,,,89.000000,,89.0,,,89.0,89.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Reporting layer,,Reporting layer,,,Reporting layer,Reporting layer,,,,,,,Oe,,Oe,,,Oe,Oe,,,,,,,,,,,,,,,,,,,,NO,,NO,,,NO,NO,,,,,,,Air-dry,,Air-dry,,,Air-dry,Air-dry,,,,,,,,,,,,,,,,,,,


#### join ncss layer and pedon data

In [78]:
ncss_data = pd.merge(ncsspedonlabdata_final,
                     ncsslayerlab_weighted_filled_pivot_final,
                     left_on = ['ncsspedonlabdataiid'],
                     right_on= ['ncsspedonlabdataiidref'],
                     how='left')

### climate

In [79]:
# preview of the raw data
site_prism.head()

Unnamed: 0.1,Unnamed: 0,siteiid,latstddecimaldegrees,longstddecimaldegrees,ppt01,ppt02,ppt03,ppt04,ppt05,ppt06,ppt07,ppt08,ppt09,ppt10,ppt11,ppt12,pptannual,tdmean01,tdmean02,tdmean03,tdmean04,tdmean05,tdmean06,tdmean07,tdmean08,tdmean09,tdmean10,tdmean11,tdmean12,tdmeanannual,tmax01,tmax02,tmax03,tmax04,tmax05,tmax06,tmax07,tmax08,tmax09,tmax10,tmax11,tmax12,tmaxannual,tmean01,tmean02,tmean03,tmean04,tmean05,tmean06,tmean07,tmean08,tmean09,tmean10,tmean11,tmean12,tmeanannual,tmin01,tmin02,tmin03,tmin04,tmin05,tmin06,tmin07,tmin08,tmin09,tmin10,tmin11,tmin12,tminannual,vpdmax01,vpdmax02,vpdmax03,vpdmax04,vpdmax05,vpdmax06,vpdmax07,vpdmax08,vpdmax09,vpdmax10,vpdmax11,vpdmax12,vpdmaxannual,vpdmin01,vpdmin02,vpdmin03,vpdmin04,vpdmin05,vpdmin06,vpdmin07,vpdmin08,vpdmin09,vpdmin10,vpdmin11,vpdmin12,vpdminannual
0,8,14572,38.683333,-94.9,28.75,40.279999,66.230003,95.389999,145.039993,151.050003,102.360001,103.690002,108.949997,89.629997,66.129997,43.509998,1041.01001,-6.22,-4.1,0.37,5.51,11.98,17.299999,19.530001,18.66,14.12,7.35,1.44,-4.13,6.8175,4.01,7.13,13.17,18.780001,23.959999,28.76,31.57,31.25,26.700001,20.07,12.28,5.16,18.57,-1.2,1.48,7.08,12.695001,18.195,23.225,25.985001,25.375,20.455,13.925,6.86,0.15,12.852083,-6.41,-4.17,0.99,6.61,12.43,17.690001,20.4,19.5,14.21,7.78,1.44,-4.86,7.134167,4.14,5.7,8.9,12.65,15.14,19.25,23.66,24.200001,18.93,12.9,7.4,4.2,13.089167,0.54,0.65,0.98,1.6,1.64,2.07,2.76,2.44,1.58,1.24,0.88,0.53,1.409167
1,9,14573,38.708408,-94.927522,28.66,39.93,65.709999,95.32,143.520004,149.419998,104.040001,104.150002,110.080002,89.25,65.480003,43.009998,1038.570068,-6.24,-4.15,0.29,5.54,12.02,17.27,19.49,18.67,14.12,7.35,1.49,-4.11,6.811666,3.84,6.95,12.95,18.42,23.67,28.5,31.32,31.09,26.530001,19.790001,12.24,5.16,18.371666,-1.27,1.375,6.935,12.54,18.084999,23.095001,25.834999,25.325001,20.450001,13.855,6.89,0.19,12.775416,-6.38,-4.2,0.92,6.66,12.5,17.690001,20.35,19.559999,14.37,7.92,1.54,-4.78,7.179167,4.03,5.57,8.67,12.09,14.6,18.700001,23.07,23.65,18.51,12.4,7.32,4.19,12.733334,0.55,0.66,0.98,1.61,1.66,2.08,2.74,2.46,1.65,1.29,0.89,0.54,1.425833
2,18,14582,38.704589,-94.930542,28.66,39.93,65.709999,95.32,143.520004,149.419998,104.040001,104.150002,110.080002,89.25,65.480003,43.009998,1038.570068,-6.24,-4.15,0.29,5.54,12.02,17.27,19.49,18.67,14.12,7.35,1.49,-4.11,6.811666,3.84,6.95,12.95,18.42,23.67,28.5,31.32,31.09,26.530001,19.790001,12.24,5.16,18.371666,-1.27,1.375,6.935,12.54,18.084999,23.095001,25.834999,25.325001,20.450001,13.855,6.89,0.19,12.775416,-6.38,-4.2,0.92,6.66,12.5,17.690001,20.35,19.559999,14.37,7.92,1.54,-4.78,7.179167,4.03,5.57,8.67,12.09,14.6,18.700001,23.07,23.65,18.51,12.4,7.32,4.19,12.733334,0.55,0.66,0.98,1.61,1.66,2.08,2.74,2.46,1.65,1.29,0.89,0.54,1.425833
3,19,14583,38.714886,-94.962772,28.74,39.580002,65.889999,95.57,142.460007,149.160004,104.360001,103.57,109.839996,88.809998,65.110001,42.709999,1035.800049,-6.25,-4.13,0.31,5.53,12.03,17.280001,19.52,18.66,14.11,7.35,1.45,-4.14,6.81,3.9,7.01,13.05,18.57,23.799999,28.65,31.459999,31.17,26.59,19.870001,12.23,5.17,18.455833,-1.26,1.41,6.995,12.605,18.154999,23.195,25.93,25.355,20.440001,13.885,6.845,0.17,12.810416,-6.42,-4.19,0.94,6.64,12.51,17.74,20.4,19.540001,14.29,7.9,1.46,-4.83,7.165,4.08,5.61,8.79,12.34,14.82,19.040001,23.379999,23.950001,18.67,12.56,7.35,4.21,12.9,0.54,0.66,0.98,1.6,1.65,2.11,2.75,2.45,1.61,1.28,0.88,0.54,1.420833
4,28,14592,38.714856,-94.964592,28.74,39.580002,65.889999,95.57,142.460007,149.160004,104.360001,103.57,109.839996,88.809998,65.110001,42.709999,1035.800049,-6.25,-4.13,0.31,5.53,12.03,17.280001,19.52,18.66,14.11,7.35,1.45,-4.14,6.81,3.9,7.01,13.05,18.57,23.799999,28.65,31.459999,31.17,26.59,19.870001,12.23,5.17,18.455833,-1.26,1.41,6.995,12.605,18.154999,23.195,25.93,25.355,20.440001,13.885,6.845,0.17,12.810416,-6.42,-4.19,0.94,6.64,12.51,17.74,20.4,19.540001,14.29,7.9,1.46,-4.83,7.165,4.08,5.61,8.79,12.34,14.82,19.040001,23.379999,23.950001,18.67,12.56,7.35,4.21,12.9,0.54,0.66,0.98,1.6,1.65,2.11,2.75,2.45,1.61,1.28,0.88,0.54,1.420833


In [80]:
# Drop the column Unnamed: 0
site_prism = site_prism.drop(columns = 'Unnamed: 0')

# Drop duplicate rows except first occurence based on all columns
site_prism = site_prism.drop_duplicates()

# Reset index
site_prism = site_prism.reset_index(drop=True)

### satellite

In [81]:
# preview of the raw data
satellite.head()

Unnamed: 0.1,Unnamed: 0,siteiid,siteobsiid,peiid,NDVI_5Pct,NDVI_IQR90,NDVI_95Pct,SATVI_5Pct,SATVI_IQR90,SATVI_95Pct
0,1,113293,112565,113948,4689,3603,8292,540,1034,1574
1,2,118824,117906,120568,4179,4693,8872,351,1959,2310
2,3,118873,117955,120617,3379,4852,8231,667,1820,2487
3,4,878296,853055,862602,5205,3880,9085,751,1411,2162
4,5,975038,957297,956744,3628,4038,7666,-119,1735,1616


In [82]:
# Drop the column Unnamed: 0 and two ids that are not needed
satellite = satellite.drop(columns=['Unnamed: 0', 'siteobsiid', 'peiid'])

# Drop duplicate rows except first occurence based on all columns
satellite = satellite.drop_duplicates()

## Join sitepm + geomorph + ncss + climate + satellite

### Join ssp + sitepm

In [84]:
# read in siteiidref as string
ssp_final['siteiid'] = ssp_final['siteiid'].astype('str')

ssp_sitepm = pd.merge(ssp_final, sitepm_final, how='left',
                                    left_on=['siteiid'], right_on=['siteiidref'],
                                   suffixes=('_ssp','_sitepm'))

### + ncss data

In [85]:
# join ssp + sitepm + ncss

ssp_sitepm['peiid'] = ssp_sitepm['peiid'].astype('str') # change peiid to string

ssp_sitepm_ncss = pd.merge(ssp_sitepm, ncss_data,
                     left_on = ['peiid'],
                     right_on= ['peiidref'],
                     how='left')

# drop the below 6 categorical variables from analysis consideration
ssp_sitepm_ncss_v2 = ssp_sitepm_ncss.drop(columns = ['obsdate', 'obsdatekin', 'pedrecorig', 'pedontype',
                                                    'pedonpurpo', 'pedlabsampnum'])

In [86]:
ssp_sitepm_ncss_v2.head()

Unnamed: 0,horizdatnm,plsssectio,plsstownsh,plssrange,plssmeridi,utmnorthin,utmeasting,elev,hillslopep,slope,aspect,shapeacros,shapedown,drainagecl,pmgroupnam,benchmarks,flodfreqcl,latstddeci,longstddec,sdbiidref,siteiid,ecostateid,commphasei,pedodermco_site_siteobs,drainedfla,beddingfla,plantation,siteiidref_ssp,siteobsiid,upedonid,taxonname,taxclname,taxonkind,earthcovki,earthcov_1,tsectiidre,soilreplic,pedodermco_pedon,pedonhydri,pedbiidref,grpiidref_x,objuseriid,recuseriid,peiid,peiidref_x,pmorder,pmmodifier,pmkind,pmorigin,siteiidref_sitepm,peiidref_y,psctopdepth,pscbotdepth,noncarbclaywtavg,claytotwtavg,le0to100,wf0175wtavgpsc,volfractgt2wtavg,cec7clayratiowtavg,ncsspedbiidref,grpiidref_y,ncsspedonlabdataiid,ncsspedonlabdataiidref,hzdept_0cm_9cm,hzdept_100cm_109cm,hzdept_10cm_19cm,hzdept_110cm_119cm,hzdept_120+cm,hzdept_20cm_29cm,hzdept_30cm_39cm,hzdept_40cm_49cm,hzdept_50cm_59cm,hzdept_60cm_69cm,hzdept_70cm_79cm,hzdept_80cm_89cm,hzdept_90cm_99cm,hzdepb_0cm_9cm,hzdepb_100cm_109cm,hzdepb_10cm_19cm,hzdepb_110cm_119cm,hzdepb_120+cm,hzdepb_20cm_29cm,hzdepb_30cm_39cm,hzdepb_40cm_49cm,hzdepb_50cm_59cm,hzdepb_60cm_69cm,hzdepb_70cm_79cm,hzdepb_80cm_89cm,hzdepb_90cm_99cm,ph1to1h2o_0cm_9cm,ph1to1h2o_100cm_109cm,ph1to1h2o_10cm_19cm,ph1to1h2o_110cm_119cm,ph1to1h2o_120+cm,ph1to1h2o_20cm_29cm,ph1to1h2o_30cm_39cm,ph1to1h2o_40cm_49cm,ph1to1h2o_50cm_59cm,ph1to1h2o_60cm_69cm,ph1to1h2o_70cm_79cm,ph1to1h2o_80cm_89cm,ph1to1h2o_90cm_99cm,ph01mcacl2_0cm_9cm,ph01mcacl2_100cm_109cm,ph01mcacl2_10cm_19cm,ph01mcacl2_110cm_119cm,ph01mcacl2_120+cm,ph01mcacl2_20cm_29cm,ph01mcacl2_30cm_39cm,ph01mcacl2_40cm_49cm,ph01mcacl2_50cm_59cm,ph01mcacl2_60cm_69cm,ph01mcacl2_70cm_79cm,ph01mcacl2_80cm_89cm,ph01mcacl2_90cm_99cm,sandvcmeasured_0cm_9cm,sandvcmeasured_100cm_109cm,sandvcmeasured_10cm_19cm,sandvcmeasured_110cm_119cm,sandvcmeasured_120+cm,sandvcmeasured_20cm_29cm,sandvcmeasured_30cm_39cm,sandvcmeasured_40cm_49cm,sandvcmeasured_50cm_59cm,sandvcmeasured_60cm_69cm,sandvcmeasured_70cm_79cm,sandvcmeasured_80cm_89cm,sandvcmeasured_90cm_99cm,sandcomeasured_0cm_9cm,sandcomeasured_100cm_109cm,sandcomeasured_10cm_19cm,sandcomeasured_110cm_119cm,sandcomeasured_120+cm,sandcomeasured_20cm_29cm,sandcomeasured_30cm_39cm,sandcomeasured_40cm_49cm,sandcomeasured_50cm_59cm,sandcomeasured_60cm_69cm,sandcomeasured_70cm_79cm,sandcomeasured_80cm_89cm,sandcomeasured_90cm_99cm,sandmedmeasured_0cm_9cm,sandmedmeasured_100cm_109cm,sandmedmeasured_10cm_19cm,sandmedmeasured_110cm_119cm,sandmedmeasured_120+cm,sandmedmeasured_20cm_29cm,sandmedmeasured_30cm_39cm,sandmedmeasured_40cm_49cm,sandmedmeasured_50cm_59cm,sandmedmeasured_60cm_69cm,sandmedmeasured_70cm_79cm,sandmedmeasured_80cm_89cm,sandmedmeasured_90cm_99cm,sandfinemeasured_0cm_9cm,sandfinemeasured_100cm_109cm,sandfinemeasured_10cm_19cm,sandfinemeasured_110cm_119cm,sandfinemeasured_120+cm,sandfinemeasured_20cm_29cm,sandfinemeasured_30cm_39cm,sandfinemeasured_40cm_49cm,sandfinemeasured_50cm_59cm,sandfinemeasured_60cm_69cm,sandfinemeasured_70cm_79cm,sandfinemeasured_80cm_89cm,sandfinemeasured_90cm_99cm,sandvfmeasured_0cm_9cm,sandvfmeasured_100cm_109cm,sandvfmeasured_10cm_19cm,sandvfmeasured_110cm_119cm,sandvfmeasured_120+cm,sandvfmeasured_20cm_29cm,sandvfmeasured_30cm_39cm,sandvfmeasured_40cm_49cm,sandvfmeasured_50cm_59cm,sandvfmeasured_60cm_69cm,sandvfmeasured_70cm_79cm,sandvfmeasured_80cm_89cm,sandvfmeasured_90cm_99cm,sandtotmeasured_0cm_9cm,sandtotmeasured_100cm_109cm,sandtotmeasured_10cm_19cm,sandtotmeasured_110cm_119cm,sandtotmeasured_120+cm,sandtotmeasured_20cm_29cm,sandtotmeasured_30cm_39cm,sandtotmeasured_40cm_49cm,sandtotmeasured_50cm_59cm,sandtotmeasured_60cm_69cm,sandtotmeasured_70cm_79cm,sandtotmeasured_80cm_89cm,sandtotmeasured_90cm_99cm,siltcomeasured_0cm_9cm,siltcomeasured_100cm_109cm,siltcomeasured_10cm_19cm,siltcomeasured_110cm_119cm,siltcomeasured_120+cm,siltcomeasured_20cm_29cm,siltcomeasured_30cm_39cm,siltcomeasured_40cm_49cm,siltcomeasured_50cm_59cm,siltcomeasured_60cm_69cm,siltcomeasured_70cm_79cm,siltcomeasured_80cm_89cm,siltcomeasured_90cm_99cm,siltfinemeasured_0cm_9cm,siltfinemeasured_100cm_109cm,siltfinemeasured_10cm_19cm,siltfinemeasured_110cm_119cm,siltfinemeasured_120+cm,siltfinemeasured_20cm_29cm,siltfinemeasured_30cm_39cm,siltfinemeasured_40cm_49cm,siltfinemeasured_50cm_59cm,siltfinemeasured_60cm_69cm,siltfinemeasured_70cm_79cm,siltfinemeasured_80cm_89cm,siltfinemeasured_90cm_99cm,silttotmeasured_0cm_9cm,silttotmeasured_100cm_109cm,silttotmeasured_10cm_19cm,silttotmeasured_110cm_119cm,silttotmeasured_120+cm,silttotmeasured_20cm_29cm,silttotmeasured_30cm_39cm,silttotmeasured_40cm_49cm,silttotmeasured_50cm_59cm,silttotmeasured_60cm_69cm,silttotmeasured_70cm_79cm,silttotmeasured_80cm_89cm,silttotmeasured_90cm_99cm,claytotmeasured_0cm_9cm,claytotmeasured_100cm_109cm,claytotmeasured_10cm_19cm,claytotmeasured_110cm_119cm,claytotmeasured_120+cm,claytotmeasured_20cm_29cm,claytotmeasured_30cm_39cm,claytotmeasured_40cm_49cm,claytotmeasured_50cm_59cm,claytotmeasured_60cm_69cm,claytotmeasured_70cm_79cm,claytotmeasured_80cm_89cm,claytotmeasured_90cm_99cm,carbonorganicpctmeasured_0cm_9cm,carbonorganicpctmeasured_100cm_109cm,carbonorganicpctmeasured_10cm_19cm,carbonorganicpctmeasured_110cm_119cm,carbonorganicpctmeasured_120+cm,...,fragwt275_70cm_79cm,fragwt275_80cm_89cm,fragwt275_90cm_99cm,wtpct0175_0cm_9cm,wtpct0175_100cm_109cm,wtpct0175_10cm_19cm,wtpct0175_110cm_119cm,wtpct0175_120+cm,wtpct0175_20cm_29cm,wtpct0175_30cm_39cm,wtpct0175_40cm_49cm,wtpct0175_50cm_59cm,wtpct0175_60cm_69cm,wtpct0175_70cm_79cm,wtpct0175_80cm_89cm,wtpct0175_90cm_99cm,wtpctgt2ws_0cm_9cm,wtpctgt2ws_100cm_109cm,wtpctgt2ws_10cm_19cm,wtpctgt2ws_110cm_119cm,wtpctgt2ws_120+cm,wtpctgt2ws_20cm_29cm,wtpctgt2ws_30cm_39cm,wtpctgt2ws_40cm_49cm,wtpctgt2ws_50cm_59cm,wtpctgt2ws_60cm_69cm,wtpctgt2ws_70cm_79cm,wtpctgt2ws_80cm_89cm,wtpctgt2ws_90cm_99cm,esp_0cm_9cm,esp_100cm_109cm,esp_10cm_19cm,esp_110cm_119cm,esp_120+cm,esp_20cm_29cm,esp_30cm_39cm,esp_40cm_49cm,esp_50cm_59cm,esp_60cm_69cm,esp_70cm_79cm,esp_80cm_89cm,esp_90cm_99cm,cecsumcations_0cm_9cm,cecsumcations_100cm_109cm,cecsumcations_10cm_19cm,cecsumcations_110cm_119cm,cecsumcations_120+cm,cecsumcations_20cm_29cm,cecsumcations_30cm_39cm,cecsumcations_40cm_49cm,cecsumcations_50cm_59cm,cecsumcations_60cm_69cm,cecsumcations_70cm_79cm,cecsumcations_80cm_89cm,cecsumcations_90cm_99cm,cec7_0cm_9cm,cec7_100cm_109cm,cec7_10cm_19cm,cec7_110cm_119cm,cec7_120+cm,cec7_20cm_29cm,cec7_30cm_39cm,cec7_40cm_49cm,cec7_50cm_59cm,cec7_60cm_69cm,cec7_70cm_79cm,cec7_80cm_89cm,cec7_90cm_99cm,sumbases_0cm_9cm,sumbases_100cm_109cm,sumbases_10cm_19cm,sumbases_110cm_119cm,sumbases_120+cm,sumbases_20cm_29cm,sumbases_30cm_39cm,sumbases_40cm_49cm,sumbases_50cm_59cm,sumbases_60cm_69cm,sumbases_70cm_79cm,sumbases_80cm_89cm,sumbases_90cm_99cm,basesatsumcations_0cm_9cm,basesatsumcations_100cm_109cm,basesatsumcations_10cm_19cm,basesatsumcations_110cm_119cm,basesatsumcations_120+cm,basesatsumcations_20cm_29cm,basesatsumcations_30cm_39cm,basesatsumcations_40cm_49cm,basesatsumcations_50cm_59cm,basesatsumcations_60cm_69cm,basesatsumcations_70cm_79cm,basesatsumcations_80cm_89cm,basesatsumcations_90cm_99cm,basesatnh4oac_0cm_9cm,basesatnh4oac_100cm_109cm,basesatnh4oac_10cm_19cm,basesatnh4oac_110cm_119cm,basesatnh4oac_120+cm,basesatnh4oac_20cm_29cm,basesatnh4oac_30cm_39cm,basesatnh4oac_40cm_49cm,basesatnh4oac_50cm_59cm,basesatnh4oac_60cm_69cm,basesatnh4oac_70cm_79cm,basesatnh4oac_80cm_89cm,basesatnh4oac_90cm_99cm,caco3equivmeasured_0cm_9cm,caco3equivmeasured_100cm_109cm,caco3equivmeasured_10cm_19cm,caco3equivmeasured_110cm_119cm,caco3equivmeasured_120+cm,caco3equivmeasured_20cm_29cm,caco3equivmeasured_30cm_39cm,caco3equivmeasured_40cm_49cm,caco3equivmeasured_50cm_59cm,caco3equivmeasured_60cm_69cm,caco3equivmeasured_70cm_79cm,caco3equivmeasured_80cm_89cm,caco3equivmeasured_90cm_99cm,extracid_0cm_9cm,extracid_100cm_109cm,extracid_10cm_19cm,extracid_110cm_119cm,extracid_120+cm,extracid_20cm_29cm,extracid_30cm_39cm,extracid_40cm_49cm,extracid_50cm_59cm,extracid_60cm_69cm,extracid_70cm_79cm,extracid_80cm_89cm,extracid_90cm_99cm,wfifteenbarmeasured_0cm_9cm,wfifteenbarmeasured_100cm_109cm,wfifteenbarmeasured_10cm_19cm,wfifteenbarmeasured_110cm_119cm,wfifteenbarmeasured_120+cm,wfifteenbarmeasured_20cm_29cm,wfifteenbarmeasured_30cm_39cm,wfifteenbarmeasured_40cm_49cm,wfifteenbarmeasured_50cm_59cm,wfifteenbarmeasured_60cm_69cm,wfifteenbarmeasured_70cm_79cm,wfifteenbarmeasured_80cm_89cm,wfifteenbarmeasured_90cm_99cm,wfifteenbartoclay_0cm_9cm,wfifteenbartoclay_100cm_109cm,wfifteenbartoclay_10cm_19cm,wfifteenbartoclay_110cm_119cm,wfifteenbartoclay_120+cm,wfifteenbartoclay_20cm_29cm,wfifteenbartoclay_30cm_39cm,wfifteenbartoclay_40cm_49cm,wfifteenbartoclay_50cm_59cm,wfifteenbartoclay_60cm_69cm,wfifteenbartoclay_70cm_79cm,wfifteenbartoclay_80cm_89cm,wfifteenbartoclay_90cm_99cm,adod_0cm_9cm,adod_100cm_109cm,adod_10cm_19cm,adod_110cm_119cm,adod_120+cm,adod_20cm_29cm,adod_30cm_39cm,adod_40cm_49cm,adod_50cm_59cm,adod_60cm_69cm,adod_70cm_79cm,adod_80cm_89cm,adod_90cm_99cm,layertype_0cm_9cm,layertype_100cm_109cm,layertype_10cm_19cm,layertype_110cm_119cm,layertype_120+cm,layertype_20cm_29cm,layertype_30cm_39cm,layertype_40cm_49cm,layertype_50cm_59cm,layertype_60cm_69cm,layertype_70cm_79cm,layertype_80cm_89cm,layertype_90cm_99cm,hzname_0cm_9cm,hzname_100cm_109cm,hzname_10cm_19cm,hzname_110cm_119cm,hzname_120+cm,hzname_20cm_29cm,hzname_30cm_39cm,hzname_40cm_49cm,hzname_50cm_59cm,hzname_60cm_69cm,hzname_70cm_79cm,hzname_80cm_89cm,hzname_90cm_99cm,hznameoriginal_0cm_9cm,hznameoriginal_100cm_109cm,hznameoriginal_10cm_19cm,hznameoriginal_110cm_119cm,hznameoriginal_120+cm,hznameoriginal_20cm_29cm,hznameoriginal_30cm_39cm,hznameoriginal_40cm_49cm,hznameoriginal_50cm_59cm,hznameoriginal_60cm_69cm,hznameoriginal_70cm_79cm,hznameoriginal_80cm_89cm,hznameoriginal_90cm_99cm,stratextsflag_0cm_9cm,stratextsflag_100cm_109cm,stratextsflag_10cm_19cm,stratextsflag_110cm_119cm,stratextsflag_120+cm,stratextsflag_20cm_29cm,stratextsflag_30cm_39cm,stratextsflag_40cm_49cm,stratextsflag_50cm_59cm,stratextsflag_60cm_69cm,stratextsflag_70cm_79cm,stratextsflag_80cm_89cm,stratextsflag_90cm_99cm,moistprepstate_0cm_9cm,moistprepstate_100cm_109cm,moistprepstate_10cm_19cm,moistprepstate_110cm_119cm,moistprepstate_120+cm,moistprepstate_20cm_29cm,moistprepstate_30cm_39cm,moistprepstate_40cm_49cm,moistprepstate_50cm_59cm,moistprepstate_60cm_69cm,moistprepstate_70cm_79cm,moistprepstate_80cm_89cm,moistprepstate_90cm_99cm,texcl_0cm_9cm,texcl_100cm_109cm,texcl_10cm_19cm,texcl_110cm_119cm,texcl_120+cm,texcl_20cm_29cm,texcl_30cm_39cm,texcl_40cm_49cm,texcl_50cm_59cm,texcl_60cm_69cm,texcl_70cm_79cm,texcl_80cm_89cm,texcl_90cm_99cm
0,North American Datum of 1983,6,15 S,23 E,,0.0,0.0,290.0,Backslope,4.0,225,Linear,Convex,Moderately well drained,clayey residuum weathered from clayey shale,0,,38.683333,-94.9,139,14572,0,0,0,0,0,0,14572,14517,93KS121030,Summit,"Fine, smectitic, thermic Vertic Argiudolls",Series,Crop cover,,7571,0,0,No,139,19808,2542,2542,14549,14549,,Clayey,Residuum,Clayey shale,14572,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,North American Datum of 1983,35,15 S,23 E,,0.0,0.0,320.0,Shoulder,2.0,45,Linear,Convex,Moderately well drained,clayey residuum weathered from clayey shale,0,,38.708408,-94.927522,139,14573,0,0,0,0,0,0,14573,14518,93KS121031,Eram,"Fine, smectitic, thermic Aquic Argiudolls",Series,Grass/herbaceous cover,,4182,0,0,No,139,19808,2542,2542,14550,14550,,Clayey,Residuum,Clayey shale,14573,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,North American Datum of 1983,35,15 S,23 E,,0.0,0.0,320.0,Shoulder,3.0,225,Linear,Convex,Moderately well drained,silty and clayey residuum weathered from limes...,0,,38.704589,-94.930542,139,14582,0,0,0,0,0,0,14582,14527,93KS121040,Lula,"Fine, smectitic, thermic Vertic Paleudolls",Series,Grass/herbaceous cover,,4182,0,0,No,139,19808,2542,2542,14559,14559,,Silty and clayey,Residuum,Limestone and shale,14582,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,North American Datum of 1983,27,15 S,22 E,,0.0,0.0,290.0,Footslope,4.0,0,Linear,Linear,Well drained,sandstone and shale,0,,38.714886,-94.962772,139,14583,0,0,0,0,0,0,14583,14528,93KS121041,Bates,"Fine-loamy, sesquic Typic Argiudolls",Series,Grass/herbaceous cover,,5209,0,0,No,139,19808,2542,2542,14560,14560,,,,Sandstone and shale,14583,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,North American Datum of 1983,27,15 S,22 E,,0.0,0.0,290.0,Footslope,3.0,0,Linear,Linear,Moderately well drained,silty and clayey residuum weathered from claye...,0,,38.714856,-94.964592,139,14592,0,0,0,0,0,0,14592,14537,93KS121050,Summit,"Fine, smectitic, thermic Vertic Argiudolls",Series,Grass/herbaceous cover,,5209,0,0,No,139,19808,2542,2542,14569,14569,,Silty and clayey,Residuum,Clayey shale,14592,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### One Hot Encode ssp+sp+ncss

In [87]:
#Method to roll up table specific features, by grouping categories that account for less than 1% of the data
#Inputs: dataframe and cat_Column


def categorical_GetDummies(df, cat_Column):
    print("One-Hot Encoded Product-specific feature:", cat_Column)
    #Get the normalized value counts of each category
    valueCounts = df[cat_Column].value_counts(normalize=True).reset_index()
    #Figure out the categories to keep and the ones to group using the 99% rule
    valuesToKeep = valueCounts[valueCounts[cat_Column] >= 0.01]
    valuesToGroup = valueCounts[valueCounts[cat_Column] < 0.01]
    #Reset the values to group to 'OTHER'
    df.loc[df[cat_Column].isin(valuesToGroup['index']), cat_Column] = 'OTHER'
    #One-hot encode the variables
    df = pd.get_dummies(df, columns = [cat_Column], dummy_na=True)
    return df


cat_var_list = ['horizdatnm',
'plsstownsh',
'plssrange',
'plssmeridi',
'hillslopep',
'shapeacros',
'shapedown',
'drainagecl',
'pmgroupnam',
'flodfreqcl',
'taxonname',
'taxclname',
'taxonkind',
'earthcovki',
'earthcov_1',
'pedonhydri',
'pmmodifier',
'pmkind',
'pmorigin',
'layertype_0cm_9cm',
'layertype_100cm_109cm',
'layertype_10cm_19cm',
'layertype_110cm_119cm',
'layertype_120+cm',
'layertype_20cm_29cm',
'layertype_30cm_39cm',
'layertype_40cm_49cm',
'layertype_50cm_59cm',
'layertype_60cm_69cm',
'layertype_70cm_79cm',
'layertype_80cm_89cm',
'layertype_90cm_99cm',
'hzname_0cm_9cm',
'hzname_100cm_109cm',
'hzname_10cm_19cm',
'hzname_110cm_119cm',
'hzname_120+cm',
'hzname_20cm_29cm',
'hzname_30cm_39cm',
'hzname_40cm_49cm',
'hzname_50cm_59cm',
'hzname_60cm_69cm',
'hzname_70cm_79cm',
'hzname_80cm_89cm',
'hzname_90cm_99cm',
'hznameoriginal_0cm_9cm',
'hznameoriginal_100cm_109cm',
'hznameoriginal_10cm_19cm',
'hznameoriginal_110cm_119cm',
'hznameoriginal_120+cm',
'hznameoriginal_20cm_29cm',
'hznameoriginal_30cm_39cm',
'hznameoriginal_40cm_49cm',
'hznameoriginal_50cm_59cm',
'hznameoriginal_60cm_69cm',
'hznameoriginal_70cm_79cm',
'hznameoriginal_80cm_89cm',
'hznameoriginal_90cm_99cm',
'stratextsflag_0cm_9cm',
'stratextsflag_100cm_109cm',
'stratextsflag_10cm_19cm',
'stratextsflag_110cm_119cm',
'stratextsflag_120+cm',
'stratextsflag_20cm_29cm',
'stratextsflag_30cm_39cm',
'stratextsflag_40cm_49cm',
'stratextsflag_50cm_59cm',
'stratextsflag_60cm_69cm',
'stratextsflag_70cm_79cm',
'stratextsflag_80cm_89cm',
'stratextsflag_90cm_99cm',
'moistprepstate_0cm_9cm',
'moistprepstate_100cm_109cm',
'moistprepstate_10cm_19cm',
'moistprepstate_110cm_119cm',
'moistprepstate_120+cm',
'moistprepstate_20cm_29cm',
'moistprepstate_30cm_39cm',
'moistprepstate_40cm_49cm',
'moistprepstate_50cm_59cm',
'moistprepstate_60cm_69cm',
'moistprepstate_70cm_79cm',
'moistprepstate_80cm_89cm',
'moistprepstate_90cm_99cm',
'texcl_0cm_9cm',
'texcl_100cm_109cm',
'texcl_10cm_19cm',
'texcl_110cm_119cm',
'texcl_120+cm',
'texcl_20cm_29cm',
'texcl_30cm_39cm',
'texcl_40cm_49cm',
'texcl_50cm_59cm',
'texcl_60cm_69cm',
'texcl_70cm_79cm',
'texcl_80cm_89cm',
'texcl_90cm_99cm']

df_transformed = ssp_sitepm_ncss_v2.copy()

for cat_column in cat_var_list:
    df_transformed = categorical_GetDummies(df_transformed, cat_column)

One-Hot Encoded Product-specific feature: horizdatnm
One-Hot Encoded Product-specific feature: plsstownsh
One-Hot Encoded Product-specific feature: plssrange
One-Hot Encoded Product-specific feature: plssmeridi
One-Hot Encoded Product-specific feature: hillslopep
One-Hot Encoded Product-specific feature: shapeacros
One-Hot Encoded Product-specific feature: shapedown
One-Hot Encoded Product-specific feature: drainagecl
One-Hot Encoded Product-specific feature: pmgroupnam
One-Hot Encoded Product-specific feature: flodfreqcl
One-Hot Encoded Product-specific feature: taxonname
One-Hot Encoded Product-specific feature: taxclname
One-Hot Encoded Product-specific feature: taxonkind
One-Hot Encoded Product-specific feature: earthcovki
One-Hot Encoded Product-specific feature: earthcov_1
One-Hot Encoded Product-specific feature: pedonhydri
One-Hot Encoded Product-specific feature: pmmodifier
One-Hot Encoded Product-specific feature: pmkind
One-Hot Encoded Product-specific feature: pmorigin
One-

In [88]:
df_transformed.head()

Unnamed: 0,plsssectio,utmnorthin,utmeasting,elev,slope,aspect,benchmarks,latstddeci,longstddec,sdbiidref,siteiid,ecostateid,commphasei,pedodermco_site_siteobs,drainedfla,beddingfla,plantation,siteiidref_ssp,siteobsiid,upedonid,tsectiidre,soilreplic,pedodermco_pedon,pedbiidref,grpiidref_x,objuseriid,recuseriid,peiid,peiidref_x,pmorder,siteiidref_sitepm,peiidref_y,psctopdepth,pscbotdepth,noncarbclaywtavg,claytotwtavg,le0to100,wf0175wtavgpsc,volfractgt2wtavg,cec7clayratiowtavg,ncsspedbiidref,grpiidref_y,ncsspedonlabdataiid,ncsspedonlabdataiidref,hzdept_0cm_9cm,hzdept_100cm_109cm,hzdept_10cm_19cm,hzdept_110cm_119cm,hzdept_120+cm,hzdept_20cm_29cm,hzdept_30cm_39cm,hzdept_40cm_49cm,hzdept_50cm_59cm,hzdept_60cm_69cm,hzdept_70cm_79cm,hzdept_80cm_89cm,hzdept_90cm_99cm,hzdepb_0cm_9cm,hzdepb_100cm_109cm,hzdepb_10cm_19cm,hzdepb_110cm_119cm,hzdepb_120+cm,hzdepb_20cm_29cm,hzdepb_30cm_39cm,hzdepb_40cm_49cm,hzdepb_50cm_59cm,hzdepb_60cm_69cm,hzdepb_70cm_79cm,hzdepb_80cm_89cm,hzdepb_90cm_99cm,ph1to1h2o_0cm_9cm,ph1to1h2o_100cm_109cm,ph1to1h2o_10cm_19cm,ph1to1h2o_110cm_119cm,ph1to1h2o_120+cm,ph1to1h2o_20cm_29cm,ph1to1h2o_30cm_39cm,ph1to1h2o_40cm_49cm,ph1to1h2o_50cm_59cm,ph1to1h2o_60cm_69cm,ph1to1h2o_70cm_79cm,ph1to1h2o_80cm_89cm,ph1to1h2o_90cm_99cm,ph01mcacl2_0cm_9cm,ph01mcacl2_100cm_109cm,ph01mcacl2_10cm_19cm,ph01mcacl2_110cm_119cm,ph01mcacl2_120+cm,ph01mcacl2_20cm_29cm,ph01mcacl2_30cm_39cm,ph01mcacl2_40cm_49cm,ph01mcacl2_50cm_59cm,ph01mcacl2_60cm_69cm,ph01mcacl2_70cm_79cm,ph01mcacl2_80cm_89cm,ph01mcacl2_90cm_99cm,sandvcmeasured_0cm_9cm,sandvcmeasured_100cm_109cm,sandvcmeasured_10cm_19cm,sandvcmeasured_110cm_119cm,sandvcmeasured_120+cm,sandvcmeasured_20cm_29cm,sandvcmeasured_30cm_39cm,sandvcmeasured_40cm_49cm,sandvcmeasured_50cm_59cm,sandvcmeasured_60cm_69cm,sandvcmeasured_70cm_79cm,sandvcmeasured_80cm_89cm,sandvcmeasured_90cm_99cm,sandcomeasured_0cm_9cm,sandcomeasured_100cm_109cm,sandcomeasured_10cm_19cm,sandcomeasured_110cm_119cm,sandcomeasured_120+cm,sandcomeasured_20cm_29cm,sandcomeasured_30cm_39cm,sandcomeasured_40cm_49cm,sandcomeasured_50cm_59cm,sandcomeasured_60cm_69cm,sandcomeasured_70cm_79cm,sandcomeasured_80cm_89cm,sandcomeasured_90cm_99cm,sandmedmeasured_0cm_9cm,sandmedmeasured_100cm_109cm,sandmedmeasured_10cm_19cm,sandmedmeasured_110cm_119cm,sandmedmeasured_120+cm,sandmedmeasured_20cm_29cm,sandmedmeasured_30cm_39cm,sandmedmeasured_40cm_49cm,sandmedmeasured_50cm_59cm,sandmedmeasured_60cm_69cm,sandmedmeasured_70cm_79cm,sandmedmeasured_80cm_89cm,sandmedmeasured_90cm_99cm,sandfinemeasured_0cm_9cm,sandfinemeasured_100cm_109cm,sandfinemeasured_10cm_19cm,sandfinemeasured_110cm_119cm,sandfinemeasured_120+cm,sandfinemeasured_20cm_29cm,sandfinemeasured_30cm_39cm,sandfinemeasured_40cm_49cm,sandfinemeasured_50cm_59cm,sandfinemeasured_60cm_69cm,sandfinemeasured_70cm_79cm,sandfinemeasured_80cm_89cm,sandfinemeasured_90cm_99cm,sandvfmeasured_0cm_9cm,sandvfmeasured_100cm_109cm,sandvfmeasured_10cm_19cm,sandvfmeasured_110cm_119cm,sandvfmeasured_120+cm,sandvfmeasured_20cm_29cm,sandvfmeasured_30cm_39cm,sandvfmeasured_40cm_49cm,sandvfmeasured_50cm_59cm,sandvfmeasured_60cm_69cm,sandvfmeasured_70cm_79cm,sandvfmeasured_80cm_89cm,sandvfmeasured_90cm_99cm,sandtotmeasured_0cm_9cm,sandtotmeasured_100cm_109cm,sandtotmeasured_10cm_19cm,sandtotmeasured_110cm_119cm,sandtotmeasured_120+cm,sandtotmeasured_20cm_29cm,sandtotmeasured_30cm_39cm,sandtotmeasured_40cm_49cm,sandtotmeasured_50cm_59cm,sandtotmeasured_60cm_69cm,sandtotmeasured_70cm_79cm,sandtotmeasured_80cm_89cm,sandtotmeasured_90cm_99cm,siltcomeasured_0cm_9cm,siltcomeasured_100cm_109cm,siltcomeasured_10cm_19cm,siltcomeasured_110cm_119cm,siltcomeasured_120+cm,siltcomeasured_20cm_29cm,siltcomeasured_30cm_39cm,siltcomeasured_40cm_49cm,siltcomeasured_50cm_59cm,siltcomeasured_60cm_69cm,siltcomeasured_70cm_79cm,siltcomeasured_80cm_89cm,siltcomeasured_90cm_99cm,siltfinemeasured_0cm_9cm,siltfinemeasured_100cm_109cm,siltfinemeasured_10cm_19cm,siltfinemeasured_110cm_119cm,siltfinemeasured_120+cm,siltfinemeasured_20cm_29cm,siltfinemeasured_30cm_39cm,siltfinemeasured_40cm_49cm,siltfinemeasured_50cm_59cm,siltfinemeasured_60cm_69cm,siltfinemeasured_70cm_79cm,siltfinemeasured_80cm_89cm,siltfinemeasured_90cm_99cm,silttotmeasured_0cm_9cm,silttotmeasured_100cm_109cm,silttotmeasured_10cm_19cm,silttotmeasured_110cm_119cm,silttotmeasured_120+cm,silttotmeasured_20cm_29cm,silttotmeasured_30cm_39cm,silttotmeasured_40cm_49cm,silttotmeasured_50cm_59cm,silttotmeasured_60cm_69cm,silttotmeasured_70cm_79cm,silttotmeasured_80cm_89cm,silttotmeasured_90cm_99cm,claytotmeasured_0cm_9cm,claytotmeasured_100cm_109cm,claytotmeasured_10cm_19cm,claytotmeasured_110cm_119cm,claytotmeasured_120+cm,claytotmeasured_20cm_29cm,claytotmeasured_30cm_39cm,claytotmeasured_40cm_49cm,claytotmeasured_50cm_59cm,claytotmeasured_60cm_69cm,claytotmeasured_70cm_79cm,claytotmeasured_80cm_89cm,claytotmeasured_90cm_99cm,carbonorganicpctmeasured_0cm_9cm,carbonorganicpctmeasured_100cm_109cm,carbonorganicpctmeasured_10cm_19cm,carbonorganicpctmeasured_110cm_119cm,carbonorganicpctmeasured_120+cm,carbonorganicpctmeasured_20cm_29cm,carbonorganicpctmeasured_30cm_39cm,carbonorganicpctmeasured_40cm_49cm,carbonorganicpctmeasured_50cm_59cm,carbonorganicpctmeasured_60cm_69cm,carbonorganicpctmeasured_70cm_79cm,carbonorganicpctmeasured_80cm_89cm,carbonorganicpctmeasured_90cm_99cm,fragwt25_0cm_9cm,fragwt25_100cm_109cm,fragwt25_10cm_19cm,fragwt25_110cm_119cm,fragwt25_120+cm,fragwt25_20cm_29cm,fragwt25_30cm_39cm,fragwt25_40cm_49cm,fragwt25_50cm_59cm,fragwt25_60cm_69cm,fragwt25_70cm_79cm,...,moistprepstate_80cm_89cm_Air-dry,moistprepstate_80cm_89cm_OTHER,moistprepstate_80cm_89cm_nan,moistprepstate_90cm_99cm_Air-dry,moistprepstate_90cm_99cm_OTHER,moistprepstate_90cm_99cm_nan,texcl_0cm_9cm_Clay,texcl_0cm_9cm_Clay loam,texcl_0cm_9cm_Coarse sandy loam,texcl_0cm_9cm_Fine sand,texcl_0cm_9cm_Fine sandy loam,texcl_0cm_9cm_Loam,texcl_0cm_9cm_Loamy fine sand,texcl_0cm_9cm_Loamy sand,texcl_0cm_9cm_OTHER,texcl_0cm_9cm_Sand,texcl_0cm_9cm_Sandy clay loam,texcl_0cm_9cm_Sandy loam,texcl_0cm_9cm_Silt,texcl_0cm_9cm_Silt loam,texcl_0cm_9cm_Silty clay,texcl_0cm_9cm_Silty clay loam,texcl_0cm_9cm_Very fine sandy loam,texcl_0cm_9cm_nan,texcl_100cm_109cm_Clay,texcl_100cm_109cm_Clay loam,texcl_100cm_109cm_Coarse sand,texcl_100cm_109cm_Coarse sandy loam,texcl_100cm_109cm_Fine sand,texcl_100cm_109cm_Fine sandy loam,texcl_100cm_109cm_Loam,texcl_100cm_109cm_Loamy coarse sand,texcl_100cm_109cm_Loamy fine sand,texcl_100cm_109cm_Loamy sand,texcl_100cm_109cm_OTHER,texcl_100cm_109cm_Sand,texcl_100cm_109cm_Sandy clay loam,texcl_100cm_109cm_Sandy loam,texcl_100cm_109cm_Silt loam,texcl_100cm_109cm_Silty clay,texcl_100cm_109cm_Silty clay loam,texcl_100cm_109cm_Very fine sandy loam,texcl_100cm_109cm_nan,texcl_10cm_19cm_Clay,texcl_10cm_19cm_Clay loam,texcl_10cm_19cm_Coarse sandy loam,texcl_10cm_19cm_Fine sand,texcl_10cm_19cm_Fine sandy loam,texcl_10cm_19cm_Loam,texcl_10cm_19cm_Loamy coarse sand,texcl_10cm_19cm_Loamy fine sand,texcl_10cm_19cm_Loamy sand,texcl_10cm_19cm_OTHER,texcl_10cm_19cm_Sand,texcl_10cm_19cm_Sandy clay loam,texcl_10cm_19cm_Sandy loam,texcl_10cm_19cm_Silt,texcl_10cm_19cm_Silt loam,texcl_10cm_19cm_Silty clay,texcl_10cm_19cm_Silty clay loam,texcl_10cm_19cm_Very fine sandy loam,texcl_10cm_19cm_nan,texcl_110cm_119cm_Clay,texcl_110cm_119cm_Clay loam,texcl_110cm_119cm_Coarse sand,texcl_110cm_119cm_Coarse sandy loam,texcl_110cm_119cm_Fine sand,texcl_110cm_119cm_Fine sandy loam,texcl_110cm_119cm_Loam,texcl_110cm_119cm_Loamy coarse sand,texcl_110cm_119cm_Loamy fine sand,texcl_110cm_119cm_Loamy sand,texcl_110cm_119cm_OTHER,texcl_110cm_119cm_Sand,texcl_110cm_119cm_Sandy clay loam,texcl_110cm_119cm_Sandy loam,texcl_110cm_119cm_Silt loam,texcl_110cm_119cm_Silty clay,texcl_110cm_119cm_Silty clay loam,texcl_110cm_119cm_Very fine sandy loam,texcl_110cm_119cm_nan,texcl_120+cm_Clay,texcl_120+cm_Clay loam,texcl_120+cm_Coarse sand,texcl_120+cm_Coarse sandy loam,texcl_120+cm_Fine sand,texcl_120+cm_Fine sandy loam,texcl_120+cm_Loam,texcl_120+cm_Loamy coarse sand,texcl_120+cm_Loamy fine sand,texcl_120+cm_Loamy sand,texcl_120+cm_OTHER,texcl_120+cm_Sand,texcl_120+cm_Sandy clay loam,texcl_120+cm_Sandy loam,texcl_120+cm_Silt loam,texcl_120+cm_Silty clay,texcl_120+cm_Silty clay loam,texcl_120+cm_Very fine sandy loam,texcl_120+cm_nan,texcl_20cm_29cm_Clay,texcl_20cm_29cm_Clay loam,texcl_20cm_29cm_Coarse sandy loam,texcl_20cm_29cm_Fine sand,texcl_20cm_29cm_Fine sandy loam,texcl_20cm_29cm_Loam,texcl_20cm_29cm_Loamy coarse sand,texcl_20cm_29cm_Loamy fine sand,texcl_20cm_29cm_Loamy sand,texcl_20cm_29cm_OTHER,texcl_20cm_29cm_Sand,texcl_20cm_29cm_Sandy clay loam,texcl_20cm_29cm_Sandy loam,texcl_20cm_29cm_Silt loam,texcl_20cm_29cm_Silty clay,texcl_20cm_29cm_Silty clay loam,texcl_20cm_29cm_Very fine sandy loam,texcl_20cm_29cm_nan,texcl_30cm_39cm_Clay,texcl_30cm_39cm_Clay loam,texcl_30cm_39cm_Coarse sandy loam,texcl_30cm_39cm_Fine sand,texcl_30cm_39cm_Fine sandy loam,texcl_30cm_39cm_Loam,texcl_30cm_39cm_Loamy coarse sand,texcl_30cm_39cm_Loamy fine sand,texcl_30cm_39cm_Loamy sand,texcl_30cm_39cm_OTHER,texcl_30cm_39cm_Sand,texcl_30cm_39cm_Sandy clay loam,texcl_30cm_39cm_Sandy loam,texcl_30cm_39cm_Silt loam,texcl_30cm_39cm_Silty clay,texcl_30cm_39cm_Silty clay loam,texcl_30cm_39cm_Very fine sandy loam,texcl_30cm_39cm_nan,texcl_40cm_49cm_Clay,texcl_40cm_49cm_Clay loam,texcl_40cm_49cm_Coarse sand,texcl_40cm_49cm_Coarse sandy loam,texcl_40cm_49cm_Fine sand,texcl_40cm_49cm_Fine sandy loam,texcl_40cm_49cm_Loam,texcl_40cm_49cm_Loamy coarse sand,texcl_40cm_49cm_Loamy fine sand,texcl_40cm_49cm_Loamy sand,texcl_40cm_49cm_OTHER,texcl_40cm_49cm_Sand,texcl_40cm_49cm_Sandy clay loam,texcl_40cm_49cm_Sandy loam,texcl_40cm_49cm_Silt loam,texcl_40cm_49cm_Silty clay,texcl_40cm_49cm_Silty clay loam,texcl_40cm_49cm_Very fine sandy loam,texcl_40cm_49cm_nan,texcl_50cm_59cm_Clay,texcl_50cm_59cm_Clay loam,texcl_50cm_59cm_Coarse sand,texcl_50cm_59cm_Coarse sandy loam,texcl_50cm_59cm_Fine sand,texcl_50cm_59cm_Fine sandy loam,texcl_50cm_59cm_Loam,texcl_50cm_59cm_Loamy coarse sand,texcl_50cm_59cm_Loamy fine sand,texcl_50cm_59cm_Loamy sand,texcl_50cm_59cm_OTHER,texcl_50cm_59cm_Sand,texcl_50cm_59cm_Sandy clay loam,texcl_50cm_59cm_Sandy loam,texcl_50cm_59cm_Silt loam,texcl_50cm_59cm_Silty clay,texcl_50cm_59cm_Silty clay loam,texcl_50cm_59cm_Very fine sandy loam,texcl_50cm_59cm_nan,texcl_60cm_69cm_Clay,texcl_60cm_69cm_Clay loam,texcl_60cm_69cm_Coarse sand,texcl_60cm_69cm_Coarse sandy loam,texcl_60cm_69cm_Fine sand,texcl_60cm_69cm_Fine sandy loam,texcl_60cm_69cm_Loam,texcl_60cm_69cm_Loamy coarse sand,texcl_60cm_69cm_Loamy fine sand,texcl_60cm_69cm_Loamy sand,texcl_60cm_69cm_OTHER,texcl_60cm_69cm_Sand,texcl_60cm_69cm_Sandy clay loam,texcl_60cm_69cm_Sandy loam,texcl_60cm_69cm_Silt loam,texcl_60cm_69cm_Silty clay,texcl_60cm_69cm_Silty clay loam,texcl_60cm_69cm_Very fine sandy loam,texcl_60cm_69cm_nan,texcl_70cm_79cm_Clay,texcl_70cm_79cm_Clay loam,texcl_70cm_79cm_Coarse sand,texcl_70cm_79cm_Coarse sandy loam,texcl_70cm_79cm_Fine sand,texcl_70cm_79cm_Fine sandy loam,texcl_70cm_79cm_Loam,texcl_70cm_79cm_Loamy coarse sand,texcl_70cm_79cm_Loamy fine sand,texcl_70cm_79cm_Loamy sand,texcl_70cm_79cm_OTHER,texcl_70cm_79cm_Sand,texcl_70cm_79cm_Sandy clay loam,texcl_70cm_79cm_Sandy loam,texcl_70cm_79cm_Silt loam,texcl_70cm_79cm_Silty clay,texcl_70cm_79cm_Silty clay loam,texcl_70cm_79cm_Very fine sandy loam,texcl_70cm_79cm_nan,texcl_80cm_89cm_Clay,texcl_80cm_89cm_Clay loam,texcl_80cm_89cm_Coarse sand,texcl_80cm_89cm_Coarse sandy loam,texcl_80cm_89cm_Fine sand,texcl_80cm_89cm_Fine sandy loam,texcl_80cm_89cm_Loam,texcl_80cm_89cm_Loamy coarse sand,texcl_80cm_89cm_Loamy fine sand,texcl_80cm_89cm_Loamy sand,texcl_80cm_89cm_OTHER,texcl_80cm_89cm_Sand,texcl_80cm_89cm_Sandy clay loam,texcl_80cm_89cm_Sandy loam,texcl_80cm_89cm_Silt loam,texcl_80cm_89cm_Silty clay,texcl_80cm_89cm_Silty clay loam,texcl_80cm_89cm_Very fine sandy loam,texcl_80cm_89cm_nan,texcl_90cm_99cm_Clay,texcl_90cm_99cm_Clay loam,texcl_90cm_99cm_Coarse sand,texcl_90cm_99cm_Coarse sandy loam,texcl_90cm_99cm_Fine sand,texcl_90cm_99cm_Fine sandy loam,texcl_90cm_99cm_Loam,texcl_90cm_99cm_Loamy coarse sand,texcl_90cm_99cm_Loamy fine sand,texcl_90cm_99cm_Loamy sand,texcl_90cm_99cm_OTHER,texcl_90cm_99cm_Sand,texcl_90cm_99cm_Sandy clay loam,texcl_90cm_99cm_Sandy loam,texcl_90cm_99cm_Silt loam,texcl_90cm_99cm_Silty clay,texcl_90cm_99cm_Silty clay loam,texcl_90cm_99cm_Very fine sandy loam,texcl_90cm_99cm_nan
0,6,0.0,0.0,290.0,4.0,225,0,38.683333,-94.9,139,14572,0,0,0,0,0,0,14572,14517,93KS121030,7571,0,0,139,19808,2542,2542,14549,14549,,14572,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,35,0.0,0.0,320.0,2.0,45,0,38.708408,-94.927522,139,14573,0,0,0,0,0,0,14573,14518,93KS121031,4182,0,0,139,19808,2542,2542,14550,14550,,14573,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
2,35,0.0,0.0,320.0,3.0,225,0,38.704589,-94.930542,139,14582,0,0,0,0,0,0,14582,14527,93KS121040,4182,0,0,139,19808,2542,2542,14559,14559,,14582,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,27,0.0,0.0,290.0,4.0,0,0,38.714886,-94.962772,139,14583,0,0,0,0,0,0,14583,14528,93KS121041,5209,0,0,139,19808,2542,2542,14560,14560,,14583,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,27,0.0,0.0,290.0,3.0,0,0,38.714856,-94.964592,139,14592,0,0,0,0,0,0,14592,14537,93KS121050,5209,0,0,139,19808,2542,2542,14569,14569,,14592,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


### + geomorph

In [89]:
ssp_sitepm_ncss_geomorph = pd.merge(df_transformed,
                                    geomorfeat_final,
                     left_on = ['siteiid'],
                     right_on= ['siteiidref'],
                     how='left')

### Rolling up ssp + sitepm + ncss + geomorph

In [90]:
def create_agg_dic(NUM_LST,CAT_LST):
    AGG_DICT = {}
    for var in NUM_LST:
        AGG_DICT[var] = 'median'
    for var in CAT_LST:
        AGG_DICT[var] = 'max'
    return(AGG_DICT)


def aggregate_to_site(df,num_var_list,cat_var_list):
    df_in = df.copy()
    AGG_DICT = create_agg_dic(num_var_list,cat_var_list)
    df_agg = df_in.groupby('siteiid').agg(AGG_DICT).reset_index()
    return df_agg

In [91]:
# additional id variables to drop
drop_list = ['sdbiidref',
'ecostateid',
'siteiidref_ssp',
'siteobsiid',
'upedonid',
'tsectiidre',
'pedbiidref',
'grpiidref_x',
'objuseriid',
'recuseriid',
'peiid',
'siteiidref_sitepm',
'ncsspedbiidref',
'grpiidref_y',
'ncsspedonlabdataiid',
'ncsspedonlabdataiidref',
'siteiidref']

# drop extraneous id variables
ssp_sitepm_ncss_geomorph_v2 = ssp_sitepm_ncss_geomorph.drop(columns = drop_list)
ssp_sitepm_ncss_geomorph_v2.shape

(728563, 1573)

In [92]:
# categorical variables in a list

cat_var_list = [
'horizdatnm_North American Datum of 1927',
'horizdatnm_North American Datum of 1983',
'horizdatnm_OTHER',
'horizdatnm_World Geodetic System 1984',
'horizdatnm_nan',
'plsstownsh_111N',
'plsstownsh_18N',
'plsstownsh_20N',
'plsstownsh_22N',
'plsstownsh_4N',
'plsstownsh_5N',
'plsstownsh_6N',
'plsstownsh_7N',
'plsstownsh_8N',
'plsstownsh_9N',
'plsstownsh_OTHER',
'plsstownsh_nan',
'plssrange_10W',
'plssrange_11W',
'plssrange_12W',
'plssrange_13W',
'plssrange_14W',
'plssrange_15W',
'plssrange_16W',
'plssrange_17W',
'plssrange_19W',
'plssrange_1E',
'plssrange_1W',
'plssrange_2E',
'plssrange_2W',
'plssrange_3E',
'plssrange_3W',
'plssrange_4E',
'plssrange_4W',
'plssrange_5E',
'plssrange_5W',
'plssrange_6E',
'plssrange_6W',
'plssrange_7E',
'plssrange_7W',
'plssrange_8E',
'plssrange_8W',
'plssrange_9W',
'plssrange_OTHER',
'plssrange_nan',
'plssmeridi_Boise',
'plssmeridi_Cimarron',
'plssmeridi_Fifth Principal',
'plssmeridi_Fourth Principal',
'plssmeridi_Fourth Principal Extended',
'plssmeridi_Humboldt',
'plssmeridi_Indian',
'plssmeridi_Michigan',
'plssmeridi_Mount Diablo',
'plssmeridi_New Mexico Principal',
'plssmeridi_OTHER',
'plssmeridi_Salt Lake',
'plssmeridi_San Bernardino',
'plssmeridi_Second Principal',
'plssmeridi_Sixth Principal',
'plssmeridi_Tallahassee',
'plssmeridi_Third Principal',
'plssmeridi_Willamette',
'plssmeridi_nan',
'hillslopep_Backslope',
'hillslopep_Footslope',
'hillslopep_Shoulder',
'hillslopep_Summit',
'hillslopep_Toeslope',
'hillslopep_nan',
'shapeacros_Concave',
'shapeacros_Convex',
'shapeacros_Linear',
'shapeacros_OTHER',
'shapeacros_nan',
'shapedown_Concave',
'shapedown_Convex',
'shapedown_Linear',
'shapedown_OTHER',
'shapedown_nan',
'drainagecl_Excessively drained',
'drainagecl_Moderately well drained',
'drainagecl_OTHER',
'drainagecl_Poorly drained',
'drainagecl_Somewhat excessively drained',
'drainagecl_Somewhat poorly drained',
'drainagecl_Very poorly drained',
'drainagecl_Well drained',
'drainagecl_nan',
'pmgroupnam_OTHER',
'pmgroupnam_alluvium',
'pmgroupnam_loess',
'pmgroupnam_till',
'pmgroupnam_nan',
'flodfreqcl_Frequent',
'flodfreqcl_None',
'flodfreqcl_OTHER',
'flodfreqcl_Occasional',
'flodfreqcl_Rare',
'flodfreqcl_Very rare',
'flodfreqcl_nan',
'taxonname_OTHER',
'taxonname_SND',
'taxonname_nan',
'taxclname_Mesic',
'taxclname_OTHER',
'taxclname_nan',
'taxonkind_Family',
'taxonkind_OTHER',
'taxonkind_Series',
'taxonkind_Taxadjunct',
'taxonkind_Taxon above family',
'taxonkind_nan',
'earthcovki_Crop cover',
'earthcovki_Grass/herbaceous cover',
'earthcovki_OTHER',
'earthcovki_Shrub cover',
'earthcovki_Tree cover',
'earthcovki_nan',
'earthcov_1_Close-grown crop',
'earthcov_1_Conifers',
'earthcov_1_Grassland rangeland',
'earthcov_1_Hardwoods',
'earthcov_1_Hayland',
'earthcov_1_Intermixed conifers and hardwoods',
'earthcov_1_Marshland',
'earthcov_1_Native shrubs',
'earthcov_1_OTHER',
'earthcov_1_Other grass/herbaceous cover',
'earthcov_1_Other tree cover',
'earthcov_1_Row crop',
'earthcov_1_Savanna rangeland',
'earthcov_1_Shrubby rangeland',
'earthcov_1_Tame pastureland',
'earthcov_1_nan',
'pedonhydri_No',
'pedonhydri_YES',
'pedonhydri_nan',
'pmmodifier_Clayey',
'pmmodifier_Coarse-loamy',
'pmmodifier_Coarse-silty',
'pmmodifier_Fine-loamy',
'pmmodifier_Fine-silty',
'pmmodifier_Gravelly',
'pmmodifier_Loamy',
'pmmodifier_OTHER',
'pmmodifier_Sandy',
'pmmodifier_Sandy and gravelly',
'pmmodifier_Silty',
'pmmodifier_Silty and clayey',
'pmmodifier_nan',
'pmkind_Alluvium',
'pmkind_Colluvium',
'pmkind_Eolian deposits',
'pmkind_Eolian sands',
'pmkind_Fluviomarine deposits',
'pmkind_Glaciofluvial deposits',
'pmkind_Glaciolacustrine deposits',
'pmkind_Lacustrine deposits',
'pmkind_Loess',
'pmkind_Marine deposits',
'pmkind_OTHER',
'pmkind_Organic material',
'pmkind_Outwash',
'pmkind_Residuum',
'pmkind_Slope alluvium',
'pmkind_Till',
'pmkind_Volcanic ash',
'pmkind_nan',
'pmorigin_Andesite',
'pmorigin_Basalt',
'pmorigin_Cherty limestone',
'pmorigin_Dolomite',
'pmorigin_Gneiss',
'pmorigin_Granite',
'pmorigin_Granitoid',
'pmorigin_Igneous and metamorphic rock',
'pmorigin_Igneous rock',
'pmorigin_Igneous, metamorphic and sedimentary rock',
'pmorigin_Limestone',
'pmorigin_Limestone and dolomite',
'pmorigin_Limestone and shale',
'pmorigin_Metasedimentary rock',
'pmorigin_Mixed',
'pmorigin_Mudstone',
'pmorigin_OTHER',
'pmorigin_Quartzite',
'pmorigin_Sandstone',
'pmorigin_Sandstone and shale',
'pmorigin_Sandstone and siltstone',
'pmorigin_Schist',
'pmorigin_Sedimentary rock',
'pmorigin_Shale',
'pmorigin_Shale and siltstone',
'pmorigin_Siltstone',
'pmorigin_Tuff',
'pmorigin_Volcanic rock',
'pmorigin_nan',
'layertype_0cm_9cm_Horizon',
'layertype_0cm_9cm_Reporting layer',
'layertype_0cm_9cm_nan',
'layertype_100cm_109cm_Horizon',
'layertype_100cm_109cm_Reporting layer',
'layertype_100cm_109cm_nan',
'layertype_10cm_19cm_Horizon',
'layertype_10cm_19cm_Reporting layer',
'layertype_10cm_19cm_nan',
'layertype_110cm_119cm_Horizon',
'layertype_110cm_119cm_Reporting layer',
'layertype_110cm_119cm_nan',
'layertype_120+cm_Horizon',
'layertype_120+cm_Reporting layer',
'layertype_120+cm_nan',
'layertype_20cm_29cm_Horizon',
'layertype_20cm_29cm_Reporting layer',
'layertype_20cm_29cm_nan',
'layertype_30cm_39cm_Horizon',
'layertype_30cm_39cm_Reporting layer',
'layertype_30cm_39cm_nan',
'layertype_40cm_49cm_Horizon',
'layertype_40cm_49cm_Reporting layer',
'layertype_40cm_49cm_nan',
'layertype_50cm_59cm_Horizon',
'layertype_50cm_59cm_Reporting layer',
'layertype_50cm_59cm_nan',
'layertype_60cm_69cm_Horizon',
'layertype_60cm_69cm_Reporting layer',
'layertype_60cm_69cm_nan',
'layertype_70cm_79cm_Horizon',
'layertype_70cm_79cm_Reporting layer',
'layertype_70cm_79cm_nan',
'layertype_80cm_89cm_Horizon',
'layertype_80cm_89cm_Reporting layer',
'layertype_80cm_89cm_nan',
'layertype_90cm_99cm_Horizon',
'layertype_90cm_99cm_Reporting layer',
'layertype_90cm_99cm_nan',
'hzname_0cm_9cm_A',
'hzname_0cm_9cm_A1',
'hzname_0cm_9cm_A2',
'hzname_0cm_9cm_AP',
'hzname_0cm_9cm_Ap',
'hzname_0cm_9cm_Ap1',
'hzname_0cm_9cm_Ap2',
'hzname_0cm_9cm_Bt1',
'hzname_0cm_9cm_Bw',
'hzname_0cm_9cm_Bw1',
'hzname_0cm_9cm_E',
'hzname_0cm_9cm_OTHER',
'hzname_0cm_9cm_nan',
'hzname_100cm_109cm_2Bt2',
'hzname_100cm_109cm_2Bt3',
'hzname_100cm_109cm_2Bt4',
'hzname_100cm_109cm_2Bt5',
'hzname_100cm_109cm_2Btx2',
'hzname_100cm_109cm_2C',
'hzname_100cm_109cm_2C1',
'hzname_100cm_109cm_2C2',
'hzname_100cm_109cm_BC',
'hzname_100cm_109cm_Bk2',
'hzname_100cm_109cm_Bt2',
'hzname_100cm_109cm_Bt3',
'hzname_100cm_109cm_Bt4',
'hzname_100cm_109cm_Btg2',
'hzname_100cm_109cm_Bw2',
'hzname_100cm_109cm_Bw3',
'hzname_100cm_109cm_C',
'hzname_100cm_109cm_C1',
'hzname_100cm_109cm_C2',
'hzname_100cm_109cm_C3',
'hzname_100cm_109cm_OTHER',
'hzname_100cm_109cm_nan',
'hzname_10cm_19cm_A',
'hzname_10cm_19cm_A1',
'hzname_10cm_19cm_A2',
'hzname_10cm_19cm_AB',
'hzname_10cm_19cm_AP',
'hzname_10cm_19cm_Ap',
'hzname_10cm_19cm_Ap2',
'hzname_10cm_19cm_B1',
'hzname_10cm_19cm_BA',
'hzname_10cm_19cm_BE',
'hzname_10cm_19cm_Bt',
'hzname_10cm_19cm_Bt1',
'hzname_10cm_19cm_Bw',
'hzname_10cm_19cm_Bw1',
'hzname_10cm_19cm_C1',
'hzname_10cm_19cm_E',
'hzname_10cm_19cm_E1',
'hzname_10cm_19cm_OTHER',
'hzname_10cm_19cm_nan',
'hzname_110cm_119cm_2Bt2',
'hzname_110cm_119cm_2Bt3',
'hzname_110cm_119cm_2Bt4',
'hzname_110cm_119cm_2Bt5',
'hzname_110cm_119cm_2C',
'hzname_110cm_119cm_2C1',
'hzname_110cm_119cm_2C2',
'hzname_110cm_119cm_BC',
'hzname_110cm_119cm_Bt2',
'hzname_110cm_119cm_Bt3',
'hzname_110cm_119cm_Bt4',
'hzname_110cm_119cm_C',
'hzname_110cm_119cm_C1',
'hzname_110cm_119cm_C2',
'hzname_110cm_119cm_C3',
'hzname_110cm_119cm_Cg',
'hzname_110cm_119cm_OTHER',
'hzname_110cm_119cm_nan',
'hzname_120+cm_2Bt3',
'hzname_120+cm_2Bt4',
'hzname_120+cm_2Bt5',
'hzname_120+cm_2Bt6',
'hzname_120+cm_2C',
'hzname_120+cm_2C2',
'hzname_120+cm_3Bt2',
'hzname_120+cm_3C',
'hzname_120+cm_BC',
'hzname_120+cm_Bt3',
'hzname_120+cm_Bt4',
'hzname_120+cm_Bt5',
'hzname_120+cm_C',
'hzname_120+cm_C1',
'hzname_120+cm_C2',
'hzname_120+cm_C3',
'hzname_120+cm_C4',
'hzname_120+cm_Cg',
'hzname_120+cm_Cr',
'hzname_120+cm_OTHER',
'hzname_120+cm_nan',
'hzname_20cm_29cm_A',
'hzname_20cm_29cm_A1',
'hzname_20cm_29cm_A12',
'hzname_20cm_29cm_A2',
'hzname_20cm_29cm_AB',
'hzname_20cm_29cm_Ap',
'hzname_20cm_29cm_Ap2',
'hzname_20cm_29cm_B1',
'hzname_20cm_29cm_BA',
'hzname_20cm_29cm_BE',
'hzname_20cm_29cm_Bk1',
'hzname_20cm_29cm_Bt',
'hzname_20cm_29cm_Bt1',
'hzname_20cm_29cm_Bt2',
'hzname_20cm_29cm_Btg1',
'hzname_20cm_29cm_Bw',
'hzname_20cm_29cm_Bw1',
'hzname_20cm_29cm_Bw2',
'hzname_20cm_29cm_C1',
'hzname_20cm_29cm_E',
'hzname_20cm_29cm_OTHER',
'hzname_20cm_29cm_nan',
'hzname_30cm_39cm_2Bt1',
'hzname_30cm_39cm_2Bt2',
'hzname_30cm_39cm_A',
'hzname_30cm_39cm_A2',
'hzname_30cm_39cm_AB',
'hzname_30cm_39cm_B1',
'hzname_30cm_39cm_B21T',
'hzname_30cm_39cm_BA',
'hzname_30cm_39cm_BE',
'hzname_30cm_39cm_Bk1',
'hzname_30cm_39cm_Bt',
'hzname_30cm_39cm_Bt1',
'hzname_30cm_39cm_Bt2',
'hzname_30cm_39cm_Btg1',
'hzname_30cm_39cm_Bw',
'hzname_30cm_39cm_Bw1',
'hzname_30cm_39cm_Bw2',
'hzname_30cm_39cm_C',
'hzname_30cm_39cm_C1',
'hzname_30cm_39cm_E',
'hzname_30cm_39cm_OTHER',
'hzname_30cm_39cm_nan',
'hzname_40cm_49cm_2Bt1',
'hzname_40cm_49cm_2Bt2',
'hzname_40cm_49cm_A2',
'hzname_40cm_49cm_B1',
'hzname_40cm_49cm_B2',
'hzname_40cm_49cm_Bk',
'hzname_40cm_49cm_Bk1',
'hzname_40cm_49cm_Bt',
'hzname_40cm_49cm_Bt1',
'hzname_40cm_49cm_Bt2',
'hzname_40cm_49cm_Bt3',
'hzname_40cm_49cm_Btg1',
'hzname_40cm_49cm_Bw',
'hzname_40cm_49cm_Bw1',
'hzname_40cm_49cm_Bw2',
'hzname_40cm_49cm_C',
'hzname_40cm_49cm_C1',
'hzname_40cm_49cm_C2',
'hzname_40cm_49cm_OTHER',
'hzname_40cm_49cm_nan',
'hzname_50cm_59cm_2Bt1',
'hzname_50cm_59cm_2Bt2',
'hzname_50cm_59cm_2Bt3',
'hzname_50cm_59cm_A2',
'hzname_50cm_59cm_B2',
'hzname_50cm_59cm_BC',
'hzname_50cm_59cm_Bk',
'hzname_50cm_59cm_Bk1',
'hzname_50cm_59cm_Bk2',
'hzname_50cm_59cm_Bt',
'hzname_50cm_59cm_Bt1',
'hzname_50cm_59cm_Bt2',
'hzname_50cm_59cm_Bt3',
'hzname_50cm_59cm_Btg1',
'hzname_50cm_59cm_Btg2',
'hzname_50cm_59cm_Bw',
'hzname_50cm_59cm_Bw1',
'hzname_50cm_59cm_Bw2',
'hzname_50cm_59cm_Bw3',
'hzname_50cm_59cm_C',
'hzname_50cm_59cm_C1',
'hzname_50cm_59cm_C2',
'hzname_50cm_59cm_OTHER',
'hzname_50cm_59cm_nan',
'hzname_60cm_69cm_2Bt2',
'hzname_60cm_69cm_2Bt3',
'hzname_60cm_69cm_2Btx1',
'hzname_60cm_69cm_BC',
'hzname_60cm_69cm_Bk',
'hzname_60cm_69cm_Bk1',
'hzname_60cm_69cm_Bk2',
'hzname_60cm_69cm_Bt',
'hzname_60cm_69cm_Bt1',
'hzname_60cm_69cm_Bt2',
'hzname_60cm_69cm_Bt3',
'hzname_60cm_69cm_Btg1',
'hzname_60cm_69cm_Btg2',
'hzname_60cm_69cm_Bw',
'hzname_60cm_69cm_Bw1',
'hzname_60cm_69cm_Bw2',
'hzname_60cm_69cm_Bw3',
'hzname_60cm_69cm_C',
'hzname_60cm_69cm_C1',
'hzname_60cm_69cm_C2',
'hzname_60cm_69cm_OTHER',
'hzname_60cm_69cm_nan',
'hzname_70cm_79cm_2Bt2',
'hzname_70cm_79cm_2Bt3',
'hzname_70cm_79cm_2Bt4',
'hzname_70cm_79cm_2Btx1',
'hzname_70cm_79cm_2C',
'hzname_70cm_79cm_BC',
'hzname_70cm_79cm_Bk2',
'hzname_70cm_79cm_Bt',
'hzname_70cm_79cm_Bt1',
'hzname_70cm_79cm_Bt2',
'hzname_70cm_79cm_Bt3',
'hzname_70cm_79cm_Bt4',
'hzname_70cm_79cm_Btg2',
'hzname_70cm_79cm_Bw2',
'hzname_70cm_79cm_Bw3',
'hzname_70cm_79cm_C',
'hzname_70cm_79cm_C1',
'hzname_70cm_79cm_C2',
'hzname_70cm_79cm_C3',
'hzname_70cm_79cm_OTHER',
'hzname_70cm_79cm_nan',
'hzname_80cm_89cm_2Bt2',
'hzname_80cm_89cm_2Bt3',
'hzname_80cm_89cm_2Bt4',
'hzname_80cm_89cm_2C',
'hzname_80cm_89cm_2C1',
'hzname_80cm_89cm_BC',
'hzname_80cm_89cm_Bk2',
'hzname_80cm_89cm_Bt',
'hzname_80cm_89cm_Bt1',
'hzname_80cm_89cm_Bt2',
'hzname_80cm_89cm_Bt3',
'hzname_80cm_89cm_Bt4',
'hzname_80cm_89cm_Btg2',
'hzname_80cm_89cm_Bw2',
'hzname_80cm_89cm_Bw3',
'hzname_80cm_89cm_C',
'hzname_80cm_89cm_C1',
'hzname_80cm_89cm_C2',
'hzname_80cm_89cm_C3',
'hzname_80cm_89cm_OTHER',
'hzname_80cm_89cm_nan',
'hzname_90cm_99cm_2Bt2',
'hzname_90cm_99cm_2Bt3',
'hzname_90cm_99cm_2Bt4',
'hzname_90cm_99cm_2C',
'hzname_90cm_99cm_2C1',
'hzname_90cm_99cm_BC',
'hzname_90cm_99cm_Bk2',
'hzname_90cm_99cm_Bt1',
'hzname_90cm_99cm_Bt2',
'hzname_90cm_99cm_Bt3',
'hzname_90cm_99cm_Bt4',
'hzname_90cm_99cm_Btg2',
'hzname_90cm_99cm_Btg3',
'hzname_90cm_99cm_Bw2',
'hzname_90cm_99cm_Bw3',
'hzname_90cm_99cm_C',
'hzname_90cm_99cm_C1',
'hzname_90cm_99cm_C2',
'hzname_90cm_99cm_C3',
'hzname_90cm_99cm_OTHER',
'hzname_90cm_99cm_nan',
'hznameoriginal_0cm_9cm_A',
'hznameoriginal_0cm_9cm_A1',
'hznameoriginal_0cm_9cm_A11',
'hznameoriginal_0cm_9cm_A12',
'hznameoriginal_0cm_9cm_A2',
'hznameoriginal_0cm_9cm_AP1',
'hznameoriginal_0cm_9cm_Ap',
'hznameoriginal_0cm_9cm_Ap1',
'hznameoriginal_0cm_9cm_Ap2',
'hznameoriginal_0cm_9cm_Bt1',
'hznameoriginal_0cm_9cm_Bw1',
'hznameoriginal_0cm_9cm_E',
'hznameoriginal_0cm_9cm_OTHER',
'hznameoriginal_0cm_9cm_nan',
'hznameoriginal_100cm_109cm_2Bt2',
'hznameoriginal_100cm_109cm_2Bt3',
'hznameoriginal_100cm_109cm_2Bt4',
'hznameoriginal_100cm_109cm_2Bt5',
'hznameoriginal_100cm_109cm_2Btx2',
'hznameoriginal_100cm_109cm_2C',
'hznameoriginal_100cm_109cm_2C1',
'hznameoriginal_100cm_109cm_2C2',
'hznameoriginal_100cm_109cm_B3',
'hznameoriginal_100cm_109cm_BC',
'hznameoriginal_100cm_109cm_Bt2',
'hznameoriginal_100cm_109cm_Bt3',
'hznameoriginal_100cm_109cm_Bt4',
'hznameoriginal_100cm_109cm_Bw2',
'hznameoriginal_100cm_109cm_Bw3',
'hznameoriginal_100cm_109cm_C',
'hznameoriginal_100cm_109cm_C1',
'hznameoriginal_100cm_109cm_C2',
'hznameoriginal_100cm_109cm_C3',
'hznameoriginal_100cm_109cm_OTHER',
'hznameoriginal_100cm_109cm_nan',
'hznameoriginal_10cm_19cm_A',
'hznameoriginal_10cm_19cm_A1',
'hznameoriginal_10cm_19cm_A12',
'hznameoriginal_10cm_19cm_A2',
'hznameoriginal_10cm_19cm_AB',
'hznameoriginal_10cm_19cm_Ap',
'hznameoriginal_10cm_19cm_Ap2',
'hznameoriginal_10cm_19cm_B1',
'hznameoriginal_10cm_19cm_B21t',
'hznameoriginal_10cm_19cm_BA',
'hznameoriginal_10cm_19cm_BE',
'hznameoriginal_10cm_19cm_Bt',
'hznameoriginal_10cm_19cm_Bt1',
'hznameoriginal_10cm_19cm_Bw',
'hznameoriginal_10cm_19cm_Bw1',
'hznameoriginal_10cm_19cm_C1',
'hznameoriginal_10cm_19cm_E',
'hznameoriginal_10cm_19cm_E1',
'hznameoriginal_10cm_19cm_OTHER',
'hznameoriginal_10cm_19cm_nan',
'hznameoriginal_110cm_119cm_2Bt2',
'hznameoriginal_110cm_119cm_2Bt3',
'hznameoriginal_110cm_119cm_2Bt4',
'hznameoriginal_110cm_119cm_2Bt5',
'hznameoriginal_110cm_119cm_2C',
'hznameoriginal_110cm_119cm_2C1',
'hznameoriginal_110cm_119cm_2C2',
'hznameoriginal_110cm_119cm_B3',
'hznameoriginal_110cm_119cm_BC',
'hznameoriginal_110cm_119cm_Bt2',
'hznameoriginal_110cm_119cm_Bt3',
'hznameoriginal_110cm_119cm_Bt4',
'hznameoriginal_110cm_119cm_C',
'hznameoriginal_110cm_119cm_C1',
'hznameoriginal_110cm_119cm_C2',
'hznameoriginal_110cm_119cm_C3',
'hznameoriginal_110cm_119cm_Cg',
'hznameoriginal_110cm_119cm_OTHER',
'hznameoriginal_110cm_119cm_nan',
'hznameoriginal_120+cm_2Bt3',
'hznameoriginal_120+cm_2Bt4',
'hznameoriginal_120+cm_2Bt5',
'hznameoriginal_120+cm_2Bt6',
'hznameoriginal_120+cm_2C',
'hznameoriginal_120+cm_2C2',
'hznameoriginal_120+cm_3Bt2',
'hznameoriginal_120+cm_BC',
'hznameoriginal_120+cm_Bt3',
'hznameoriginal_120+cm_Bt4',
'hznameoriginal_120+cm_Bt5',
'hznameoriginal_120+cm_C',
'hznameoriginal_120+cm_C1',
'hznameoriginal_120+cm_C2',
'hznameoriginal_120+cm_C3',
'hznameoriginal_120+cm_C4',
'hznameoriginal_120+cm_Cg',
'hznameoriginal_120+cm_Cr',
'hznameoriginal_120+cm_OTHER',
'hznameoriginal_120+cm_nan',
'hznameoriginal_20cm_29cm_A',
'hznameoriginal_20cm_29cm_A1',
'hznameoriginal_20cm_29cm_A12',
'hznameoriginal_20cm_29cm_A2',
'hznameoriginal_20cm_29cm_A3',
'hznameoriginal_20cm_29cm_AB',
'hznameoriginal_20cm_29cm_Ap',
'hznameoriginal_20cm_29cm_Ap2',
'hznameoriginal_20cm_29cm_B1',
'hznameoriginal_20cm_29cm_B21',
'hznameoriginal_20cm_29cm_B21t',
'hznameoriginal_20cm_29cm_BA',
'hznameoriginal_20cm_29cm_BE',
'hznameoriginal_20cm_29cm_Bt',
'hznameoriginal_20cm_29cm_Bt1',
'hznameoriginal_20cm_29cm_Bt2',
'hznameoriginal_20cm_29cm_Bw',
'hznameoriginal_20cm_29cm_Bw1',
'hznameoriginal_20cm_29cm_Bw2',
'hznameoriginal_20cm_29cm_C1',
'hznameoriginal_20cm_29cm_E',
'hznameoriginal_20cm_29cm_OTHER',
'hznameoriginal_20cm_29cm_nan',
'hznameoriginal_30cm_39cm_2Bt1',
'hznameoriginal_30cm_39cm_2Bt2',
'hznameoriginal_30cm_39cm_A',
'hznameoriginal_30cm_39cm_A1',
'hznameoriginal_30cm_39cm_A12',
'hznameoriginal_30cm_39cm_A2',
'hznameoriginal_30cm_39cm_A3',
'hznameoriginal_30cm_39cm_AB',
'hznameoriginal_30cm_39cm_B1',
'hznameoriginal_30cm_39cm_B21',
'hznameoriginal_30cm_39cm_B21t',
'hznameoriginal_30cm_39cm_B22t',
'hznameoriginal_30cm_39cm_BA',
'hznameoriginal_30cm_39cm_Bt',
'hznameoriginal_30cm_39cm_Bt1',
'hznameoriginal_30cm_39cm_Bt2',
'hznameoriginal_30cm_39cm_Btg1',
'hznameoriginal_30cm_39cm_Bw',
'hznameoriginal_30cm_39cm_Bw1',
'hznameoriginal_30cm_39cm_Bw2',
'hznameoriginal_30cm_39cm_C1',
'hznameoriginal_30cm_39cm_C2',
'hznameoriginal_30cm_39cm_E',
'hznameoriginal_30cm_39cm_OTHER',
'hznameoriginal_30cm_39cm_nan',
'hznameoriginal_40cm_49cm_2Bt1',
'hznameoriginal_40cm_49cm_2Bt2',
'hznameoriginal_40cm_49cm_A2',
'hznameoriginal_40cm_49cm_B21',
'hznameoriginal_40cm_49cm_B21t',
'hznameoriginal_40cm_49cm_B22',
'hznameoriginal_40cm_49cm_B22t',
'hznameoriginal_40cm_49cm_Bt',
'hznameoriginal_40cm_49cm_Bt1',
'hznameoriginal_40cm_49cm_Bt2',
'hznameoriginal_40cm_49cm_Bt3',
'hznameoriginal_40cm_49cm_Btg1',
'hznameoriginal_40cm_49cm_Bw',
'hznameoriginal_40cm_49cm_Bw1',
'hznameoriginal_40cm_49cm_Bw2',
'hznameoriginal_40cm_49cm_C',
'hznameoriginal_40cm_49cm_C1',
'hznameoriginal_40cm_49cm_C2',
'hznameoriginal_40cm_49cm_OTHER',
'hznameoriginal_40cm_49cm_nan',
'hznameoriginal_50cm_59cm_2Bt2',
'hznameoriginal_50cm_59cm_2Bt3',
'hznameoriginal_50cm_59cm_B21',
'hznameoriginal_50cm_59cm_B21t',
'hznameoriginal_50cm_59cm_B22',
'hznameoriginal_50cm_59cm_B22t',
'hznameoriginal_50cm_59cm_BC',
'hznameoriginal_50cm_59cm_Bt',
'hznameoriginal_50cm_59cm_Bt1',
'hznameoriginal_50cm_59cm_Bt2',
'hznameoriginal_50cm_59cm_Bt3',
'hznameoriginal_50cm_59cm_Btg1',
'hznameoriginal_50cm_59cm_Btg2',
'hznameoriginal_50cm_59cm_Bw',
'hznameoriginal_50cm_59cm_Bw1',
'hznameoriginal_50cm_59cm_Bw2',
'hznameoriginal_50cm_59cm_C',
'hznameoriginal_50cm_59cm_C1',
'hznameoriginal_50cm_59cm_C2',
'hznameoriginal_50cm_59cm_OTHER',
'hznameoriginal_50cm_59cm_nan',
'hznameoriginal_60cm_69cm_2Bt2',
'hznameoriginal_60cm_69cm_2Bt3',
'hznameoriginal_60cm_69cm_2Btx1',
'hznameoriginal_60cm_69cm_B22',
'hznameoriginal_60cm_69cm_B22t',
'hznameoriginal_60cm_69cm_B23t',
'hznameoriginal_60cm_69cm_BC',
'hznameoriginal_60cm_69cm_Bt',
'hznameoriginal_60cm_69cm_Bt1',
'hznameoriginal_60cm_69cm_Bt2',
'hznameoriginal_60cm_69cm_Bt3',
'hznameoriginal_60cm_69cm_Bt4',
'hznameoriginal_60cm_69cm_Btg2',
'hznameoriginal_60cm_69cm_Bw',
'hznameoriginal_60cm_69cm_Bw1',
'hznameoriginal_60cm_69cm_Bw2',
'hznameoriginal_60cm_69cm_Bw3',
'hznameoriginal_60cm_69cm_C',
'hznameoriginal_60cm_69cm_C1',
'hznameoriginal_60cm_69cm_C2',
'hznameoriginal_60cm_69cm_OTHER',
'hznameoriginal_60cm_69cm_nan',
'hznameoriginal_70cm_79cm_2Bt2',
'hznameoriginal_70cm_79cm_2Bt3',
'hznameoriginal_70cm_79cm_2Bt4',
'hznameoriginal_70cm_79cm_2Btx1',
'hznameoriginal_70cm_79cm_B22',
'hznameoriginal_70cm_79cm_B22t',
'hznameoriginal_70cm_79cm_B23t',
'hznameoriginal_70cm_79cm_B3',
'hznameoriginal_70cm_79cm_BC',
'hznameoriginal_70cm_79cm_Bt',
'hznameoriginal_70cm_79cm_Bt1',
'hznameoriginal_70cm_79cm_Bt2',
'hznameoriginal_70cm_79cm_Bt3',
'hznameoriginal_70cm_79cm_Bt4',
'hznameoriginal_70cm_79cm_Btg2',
'hznameoriginal_70cm_79cm_Bw2',
'hznameoriginal_70cm_79cm_Bw3',
'hznameoriginal_70cm_79cm_C',
'hznameoriginal_70cm_79cm_C1',
'hznameoriginal_70cm_79cm_C2',
'hznameoriginal_70cm_79cm_C3',
'hznameoriginal_70cm_79cm_OTHER',
'hznameoriginal_70cm_79cm_nan',
'hznameoriginal_80cm_89cm_2Bt2',
'hznameoriginal_80cm_89cm_2Bt3',
'hznameoriginal_80cm_89cm_2Bt4',
'hznameoriginal_80cm_89cm_2C',
'hznameoriginal_80cm_89cm_2C1',
'hznameoriginal_80cm_89cm_B22t',
'hznameoriginal_80cm_89cm_B23t',
'hznameoriginal_80cm_89cm_B3',
'hznameoriginal_80cm_89cm_BC',
'hznameoriginal_80cm_89cm_Bt1',
'hznameoriginal_80cm_89cm_Bt2',
'hznameoriginal_80cm_89cm_Bt3',
'hznameoriginal_80cm_89cm_Bt4',
'hznameoriginal_80cm_89cm_Btg2',
'hznameoriginal_80cm_89cm_Bw2',
'hznameoriginal_80cm_89cm_Bw3',
'hznameoriginal_80cm_89cm_C',
'hznameoriginal_80cm_89cm_C1',
'hznameoriginal_80cm_89cm_C2',
'hznameoriginal_80cm_89cm_C3',
'hznameoriginal_80cm_89cm_OTHER',
'hznameoriginal_80cm_89cm_nan',
'hznameoriginal_90cm_99cm_2BC',
'hznameoriginal_90cm_99cm_2Bt2',
'hznameoriginal_90cm_99cm_2Bt3',
'hznameoriginal_90cm_99cm_2Bt4',
'hznameoriginal_90cm_99cm_2Bt5',
'hznameoriginal_90cm_99cm_2C',
'hznameoriginal_90cm_99cm_2C1',
'hznameoriginal_90cm_99cm_B23t',
'hznameoriginal_90cm_99cm_B3',
'hznameoriginal_90cm_99cm_BC',
'hznameoriginal_90cm_99cm_Bt2',
'hznameoriginal_90cm_99cm_Bt3',
'hznameoriginal_90cm_99cm_Bt4',
'hznameoriginal_90cm_99cm_Btg2',
'hznameoriginal_90cm_99cm_Bw2',
'hznameoriginal_90cm_99cm_Bw3',
'hznameoriginal_90cm_99cm_C',
'hznameoriginal_90cm_99cm_C1',
'hznameoriginal_90cm_99cm_C2',
'hznameoriginal_90cm_99cm_C3',
'hznameoriginal_90cm_99cm_OTHER',
'hznameoriginal_90cm_99cm_nan',
'stratextsflag_0cm_9cm_NO',
'stratextsflag_0cm_9cm_OTHER',
'stratextsflag_0cm_9cm_nan',
'stratextsflag_100cm_109cm_NO',
'stratextsflag_100cm_109cm_OTHER',
'stratextsflag_100cm_109cm_nan',
'stratextsflag_10cm_19cm_NO',
'stratextsflag_10cm_19cm_OTHER',
'stratextsflag_10cm_19cm_nan',
'stratextsflag_110cm_119cm_NO',
'stratextsflag_110cm_119cm_OTHER',
'stratextsflag_110cm_119cm_nan',
'stratextsflag_120+cm_NO',
'stratextsflag_120+cm_OTHER',
'stratextsflag_120+cm_nan',
'stratextsflag_20cm_29cm_NO',
'stratextsflag_20cm_29cm_OTHER',
'stratextsflag_20cm_29cm_nan',
'stratextsflag_30cm_39cm_NO',
'stratextsflag_30cm_39cm_OTHER',
'stratextsflag_30cm_39cm_nan',
'stratextsflag_40cm_49cm_NO',
'stratextsflag_40cm_49cm_OTHER',
'stratextsflag_40cm_49cm_nan',
'stratextsflag_50cm_59cm_NO',
'stratextsflag_50cm_59cm_OTHER',
'stratextsflag_50cm_59cm_nan',
'stratextsflag_60cm_69cm_NO',
'stratextsflag_60cm_69cm_OTHER',
'stratextsflag_60cm_69cm_nan',
'stratextsflag_70cm_79cm_NO',
'stratextsflag_70cm_79cm_OTHER',
'stratextsflag_70cm_79cm_nan',
'stratextsflag_80cm_89cm_NO',
'stratextsflag_80cm_89cm_OTHER',
'stratextsflag_80cm_89cm_nan',
'stratextsflag_90cm_99cm_NO',
'stratextsflag_90cm_99cm_OTHER',
'stratextsflag_90cm_99cm_nan',
'moistprepstate_0cm_9cm_Air-dry',
'moistprepstate_0cm_9cm_OTHER',
'moistprepstate_0cm_9cm_nan',
'moistprepstate_100cm_109cm_Air-dry',
'moistprepstate_100cm_109cm_OTHER',
'moistprepstate_100cm_109cm_nan',
'moistprepstate_10cm_19cm_Air-dry',
'moistprepstate_10cm_19cm_OTHER',
'moistprepstate_10cm_19cm_nan',
'moistprepstate_110cm_119cm_Air-dry',
'moistprepstate_110cm_119cm_OTHER',
'moistprepstate_110cm_119cm_nan',
'moistprepstate_120+cm_Air-dry',
'moistprepstate_120+cm_OTHER',
'moistprepstate_120+cm_nan',
'moistprepstate_20cm_29cm_Air-dry',
'moistprepstate_20cm_29cm_OTHER',
'moistprepstate_20cm_29cm_nan',
'moistprepstate_30cm_39cm_Air-dry',
'moistprepstate_30cm_39cm_OTHER',
'moistprepstate_30cm_39cm_nan',
'moistprepstate_40cm_49cm_Air-dry',
'moistprepstate_40cm_49cm_OTHER',
'moistprepstate_40cm_49cm_nan',
'moistprepstate_50cm_59cm_Air-dry',
'moistprepstate_50cm_59cm_OTHER',
'moistprepstate_50cm_59cm_nan',
'moistprepstate_60cm_69cm_Air-dry',
'moistprepstate_60cm_69cm_OTHER',
'moistprepstate_60cm_69cm_nan',
'moistprepstate_70cm_79cm_Air-dry',
'moistprepstate_70cm_79cm_OTHER',
'moistprepstate_70cm_79cm_nan',
'moistprepstate_80cm_89cm_Air-dry',
'moistprepstate_80cm_89cm_OTHER',
'moistprepstate_80cm_89cm_nan',
'moistprepstate_90cm_99cm_Air-dry',
'moistprepstate_90cm_99cm_OTHER',
'moistprepstate_90cm_99cm_nan',
'texcl_0cm_9cm_Clay',
'texcl_0cm_9cm_Clay loam',
'texcl_0cm_9cm_Coarse sandy loam',
'texcl_0cm_9cm_Fine sand',
'texcl_0cm_9cm_Fine sandy loam',
'texcl_0cm_9cm_Loam',
'texcl_0cm_9cm_Loamy fine sand',
'texcl_0cm_9cm_Loamy sand',
'texcl_0cm_9cm_OTHER',
'texcl_0cm_9cm_Sand',
'texcl_0cm_9cm_Sandy clay loam',
'texcl_0cm_9cm_Sandy loam',
'texcl_0cm_9cm_Silt',
'texcl_0cm_9cm_Silt loam',
'texcl_0cm_9cm_Silty clay',
'texcl_0cm_9cm_Silty clay loam',
'texcl_0cm_9cm_Very fine sandy loam',
'texcl_0cm_9cm_nan',
'texcl_100cm_109cm_Clay',
'texcl_100cm_109cm_Clay loam',
'texcl_100cm_109cm_Coarse sand',
'texcl_100cm_109cm_Coarse sandy loam',
'texcl_100cm_109cm_Fine sand',
'texcl_100cm_109cm_Fine sandy loam',
'texcl_100cm_109cm_Loam',
'texcl_100cm_109cm_Loamy coarse sand',
'texcl_100cm_109cm_Loamy fine sand',
'texcl_100cm_109cm_Loamy sand',
'texcl_100cm_109cm_OTHER',
'texcl_100cm_109cm_Sand',
'texcl_100cm_109cm_Sandy clay loam',
'texcl_100cm_109cm_Sandy loam',
'texcl_100cm_109cm_Silt loam',
'texcl_100cm_109cm_Silty clay',
'texcl_100cm_109cm_Silty clay loam',
'texcl_100cm_109cm_Very fine sandy loam',
'texcl_100cm_109cm_nan',
'texcl_10cm_19cm_Clay',
'texcl_10cm_19cm_Clay loam',
'texcl_10cm_19cm_Coarse sandy loam',
'texcl_10cm_19cm_Fine sand',
'texcl_10cm_19cm_Fine sandy loam',
'texcl_10cm_19cm_Loam',
'texcl_10cm_19cm_Loamy coarse sand',
'texcl_10cm_19cm_Loamy fine sand',
'texcl_10cm_19cm_Loamy sand',
'texcl_10cm_19cm_OTHER',
'texcl_10cm_19cm_Sand',
'texcl_10cm_19cm_Sandy clay loam',
'texcl_10cm_19cm_Sandy loam',
'texcl_10cm_19cm_Silt',
'texcl_10cm_19cm_Silt loam',
'texcl_10cm_19cm_Silty clay',
'texcl_10cm_19cm_Silty clay loam',
'texcl_10cm_19cm_Very fine sandy loam',
'texcl_10cm_19cm_nan',
'texcl_110cm_119cm_Clay',
'texcl_110cm_119cm_Clay loam',
'texcl_110cm_119cm_Coarse sand',
'texcl_110cm_119cm_Coarse sandy loam',
'texcl_110cm_119cm_Fine sand',
'texcl_110cm_119cm_Fine sandy loam',
'texcl_110cm_119cm_Loam',
'texcl_110cm_119cm_Loamy coarse sand',
'texcl_110cm_119cm_Loamy fine sand',
'texcl_110cm_119cm_Loamy sand',
'texcl_110cm_119cm_OTHER',
'texcl_110cm_119cm_Sand',
'texcl_110cm_119cm_Sandy clay loam',
'texcl_110cm_119cm_Sandy loam',
'texcl_110cm_119cm_Silt loam',
'texcl_110cm_119cm_Silty clay',
'texcl_110cm_119cm_Silty clay loam',
'texcl_110cm_119cm_Very fine sandy loam',
'texcl_110cm_119cm_nan',
'texcl_120+cm_Clay',
'texcl_120+cm_Clay loam',
'texcl_120+cm_Coarse sand',
'texcl_120+cm_Coarse sandy loam',
'texcl_120+cm_Fine sand',
'texcl_120+cm_Fine sandy loam',
'texcl_120+cm_Loam',
'texcl_120+cm_Loamy coarse sand',
'texcl_120+cm_Loamy fine sand',
'texcl_120+cm_Loamy sand',
'texcl_120+cm_OTHER',
'texcl_120+cm_Sand',
'texcl_120+cm_Sandy clay loam',
'texcl_120+cm_Sandy loam',
'texcl_120+cm_Silt loam',
'texcl_120+cm_Silty clay',
'texcl_120+cm_Silty clay loam',
'texcl_120+cm_Very fine sandy loam',
'texcl_120+cm_nan',
'texcl_20cm_29cm_Clay',
'texcl_20cm_29cm_Clay loam',
'texcl_20cm_29cm_Coarse sandy loam',
'texcl_20cm_29cm_Fine sand',
'texcl_20cm_29cm_Fine sandy loam',
'texcl_20cm_29cm_Loam',
'texcl_20cm_29cm_Loamy coarse sand',
'texcl_20cm_29cm_Loamy fine sand',
'texcl_20cm_29cm_Loamy sand',
'texcl_20cm_29cm_OTHER',
'texcl_20cm_29cm_Sand',
'texcl_20cm_29cm_Sandy clay loam',
'texcl_20cm_29cm_Sandy loam',
'texcl_20cm_29cm_Silt loam',
'texcl_20cm_29cm_Silty clay',
'texcl_20cm_29cm_Silty clay loam',
'texcl_20cm_29cm_Very fine sandy loam',
'texcl_20cm_29cm_nan',
'texcl_30cm_39cm_Clay',
'texcl_30cm_39cm_Clay loam',
'texcl_30cm_39cm_Coarse sandy loam',
'texcl_30cm_39cm_Fine sand',
'texcl_30cm_39cm_Fine sandy loam',
'texcl_30cm_39cm_Loam',
'texcl_30cm_39cm_Loamy coarse sand',
'texcl_30cm_39cm_Loamy fine sand',
'texcl_30cm_39cm_Loamy sand',
'texcl_30cm_39cm_OTHER',
'texcl_30cm_39cm_Sand',
'texcl_30cm_39cm_Sandy clay loam',
'texcl_30cm_39cm_Sandy loam',
'texcl_30cm_39cm_Silt loam',
'texcl_30cm_39cm_Silty clay',
'texcl_30cm_39cm_Silty clay loam',
'texcl_30cm_39cm_Very fine sandy loam',
'texcl_30cm_39cm_nan',
'texcl_40cm_49cm_Clay',
'texcl_40cm_49cm_Clay loam',
'texcl_40cm_49cm_Coarse sand',
'texcl_40cm_49cm_Coarse sandy loam',
'texcl_40cm_49cm_Fine sand',
'texcl_40cm_49cm_Fine sandy loam',
'texcl_40cm_49cm_Loam',
'texcl_40cm_49cm_Loamy coarse sand',
'texcl_40cm_49cm_Loamy fine sand',
'texcl_40cm_49cm_Loamy sand',
'texcl_40cm_49cm_OTHER',
'texcl_40cm_49cm_Sand',
'texcl_40cm_49cm_Sandy clay loam',
'texcl_40cm_49cm_Sandy loam',
'texcl_40cm_49cm_Silt loam',
'texcl_40cm_49cm_Silty clay',
'texcl_40cm_49cm_Silty clay loam',
'texcl_40cm_49cm_Very fine sandy loam',
'texcl_40cm_49cm_nan',
'texcl_50cm_59cm_Clay',
'texcl_50cm_59cm_Clay loam',
'texcl_50cm_59cm_Coarse sand',
'texcl_50cm_59cm_Coarse sandy loam',
'texcl_50cm_59cm_Fine sand',
'texcl_50cm_59cm_Fine sandy loam',
'texcl_50cm_59cm_Loam',
'texcl_50cm_59cm_Loamy coarse sand',
'texcl_50cm_59cm_Loamy fine sand',
'texcl_50cm_59cm_Loamy sand',
'texcl_50cm_59cm_OTHER',
'texcl_50cm_59cm_Sand',
'texcl_50cm_59cm_Sandy clay loam',
'texcl_50cm_59cm_Sandy loam',
'texcl_50cm_59cm_Silt loam',
'texcl_50cm_59cm_Silty clay',
'texcl_50cm_59cm_Silty clay loam',
'texcl_50cm_59cm_Very fine sandy loam',
'texcl_50cm_59cm_nan',
'texcl_60cm_69cm_Clay',
'texcl_60cm_69cm_Clay loam',
'texcl_60cm_69cm_Coarse sand',
'texcl_60cm_69cm_Coarse sandy loam',
'texcl_60cm_69cm_Fine sand',
'texcl_60cm_69cm_Fine sandy loam',
'texcl_60cm_69cm_Loam',
'texcl_60cm_69cm_Loamy coarse sand',
'texcl_60cm_69cm_Loamy fine sand',
'texcl_60cm_69cm_Loamy sand',
'texcl_60cm_69cm_OTHER',
'texcl_60cm_69cm_Sand',
'texcl_60cm_69cm_Sandy clay loam',
'texcl_60cm_69cm_Sandy loam',
'texcl_60cm_69cm_Silt loam',
'texcl_60cm_69cm_Silty clay',
'texcl_60cm_69cm_Silty clay loam',
'texcl_60cm_69cm_Very fine sandy loam',
'texcl_60cm_69cm_nan',
'texcl_70cm_79cm_Clay',
'texcl_70cm_79cm_Clay loam',
'texcl_70cm_79cm_Coarse sand',
'texcl_70cm_79cm_Coarse sandy loam',
'texcl_70cm_79cm_Fine sand',
'texcl_70cm_79cm_Fine sandy loam',
'texcl_70cm_79cm_Loam',
'texcl_70cm_79cm_Loamy coarse sand',
'texcl_70cm_79cm_Loamy fine sand',
'texcl_70cm_79cm_Loamy sand',
'texcl_70cm_79cm_OTHER',
'texcl_70cm_79cm_Sand',
'texcl_70cm_79cm_Sandy clay loam',
'texcl_70cm_79cm_Sandy loam',
'texcl_70cm_79cm_Silt loam',
'texcl_70cm_79cm_Silty clay',
'texcl_70cm_79cm_Silty clay loam',
'texcl_70cm_79cm_Very fine sandy loam',
'texcl_70cm_79cm_nan',
'texcl_80cm_89cm_Clay',
'texcl_80cm_89cm_Clay loam',
'texcl_80cm_89cm_Coarse sand',
'texcl_80cm_89cm_Coarse sandy loam',
'texcl_80cm_89cm_Fine sand',
'texcl_80cm_89cm_Fine sandy loam',
'texcl_80cm_89cm_Loam',
'texcl_80cm_89cm_Loamy coarse sand',
'texcl_80cm_89cm_Loamy fine sand',
'texcl_80cm_89cm_Loamy sand',
'texcl_80cm_89cm_OTHER',
'texcl_80cm_89cm_Sand',
'texcl_80cm_89cm_Sandy clay loam',
'texcl_80cm_89cm_Sandy loam',
'texcl_80cm_89cm_Silt loam',
'texcl_80cm_89cm_Silty clay',
'texcl_80cm_89cm_Silty clay loam',
'texcl_80cm_89cm_Very fine sandy loam',
'texcl_80cm_89cm_nan',
'texcl_90cm_99cm_Clay',
'texcl_90cm_99cm_Clay loam',
'texcl_90cm_99cm_Coarse sand',
'texcl_90cm_99cm_Coarse sandy loam',
'texcl_90cm_99cm_Fine sand',
'texcl_90cm_99cm_Fine sandy loam',
'texcl_90cm_99cm_Loam',
'texcl_90cm_99cm_Loamy coarse sand',
'texcl_90cm_99cm_Loamy fine sand',
'texcl_90cm_99cm_Loamy sand',
'texcl_90cm_99cm_OTHER',
'texcl_90cm_99cm_Sand',
'texcl_90cm_99cm_Sandy clay loam',
'texcl_90cm_99cm_Sandy loam',
'texcl_90cm_99cm_Silt loam',
'texcl_90cm_99cm_Silty clay',
'texcl_90cm_99cm_Silty clay loam',
'texcl_90cm_99cm_Very fine sandy loam',
'texcl_90cm_99cm_nan',
'Feature_Type_Anthropogenic Feature',
'Feature_Type_Landform',
'Feature_Type_Landscape',
'Feature_Type_Microfeature',
'Feature_alluvial fan',
'Feature_coastal plain',
'Feature_drainageway',
'Feature_flood plain',
'Feature_foothills',
'Feature_ground moraine',
'Feature_hill',
'Feature_hills',
'Feature_hillslope',
'Feature_interfluve',
'Feature_intermontane basin',
'Feature_lake plain',
'Feature_mountain',
'Feature_mountain slope',
'Feature_mountains',
'Feature_other',
'Feature_outwash plain',
'Feature_piedmont',
'Feature_plain',
'Feature_plains',
'Feature_plateau',
'Feature_ridge',
'Feature_river valley',
'Feature_stream terrace',
'Feature_terrace',
'Feature_till plain',
'Feature_upland',
'Feature_valley']

In [93]:
# numerical variables in a list
num_var_list = [
'plsssectio',
'utmnorthin',
'utmeasting',
'elev',
'slope',
'aspect',
'benchmarks',
'latstddeci',
'longstddec',
'commphasei',
'pedodermco_site_siteobs',
'drainedfla',
'beddingfla',
'plantation',
'soilreplic',
'pedodermco_pedon',
'pmorder',
'psctopdepth',
'pscbotdepth',
'noncarbclaywtavg',
'claytotwtavg',
'le0to100',
'wf0175wtavgpsc',
'volfractgt2wtavg',
'cec7clayratiowtavg',
'hzdept_0cm_9cm',
'hzdept_100cm_109cm',
'hzdept_10cm_19cm',
'hzdept_110cm_119cm',
'hzdept_120+cm',
'hzdept_20cm_29cm',
'hzdept_30cm_39cm',
'hzdept_40cm_49cm',
'hzdept_50cm_59cm',
'hzdept_60cm_69cm',
'hzdept_70cm_79cm',
'hzdept_80cm_89cm',
'hzdept_90cm_99cm',
'hzdepb_0cm_9cm',
'hzdepb_100cm_109cm',
'hzdepb_10cm_19cm',
'hzdepb_110cm_119cm',
'hzdepb_120+cm',
'hzdepb_20cm_29cm',
'hzdepb_30cm_39cm',
'hzdepb_40cm_49cm',
'hzdepb_50cm_59cm',
'hzdepb_60cm_69cm',
'hzdepb_70cm_79cm',
'hzdepb_80cm_89cm',
'hzdepb_90cm_99cm',
'ph1to1h2o_0cm_9cm',
'ph1to1h2o_100cm_109cm',
'ph1to1h2o_10cm_19cm',
'ph1to1h2o_110cm_119cm',
'ph1to1h2o_120+cm',
'ph1to1h2o_20cm_29cm',
'ph1to1h2o_30cm_39cm',
'ph1to1h2o_40cm_49cm',
'ph1to1h2o_50cm_59cm',
'ph1to1h2o_60cm_69cm',
'ph1to1h2o_70cm_79cm',
'ph1to1h2o_80cm_89cm',
'ph1to1h2o_90cm_99cm',
'ph01mcacl2_0cm_9cm',
'ph01mcacl2_100cm_109cm',
'ph01mcacl2_10cm_19cm',
'ph01mcacl2_110cm_119cm',
'ph01mcacl2_120+cm',
'ph01mcacl2_20cm_29cm',
'ph01mcacl2_30cm_39cm',
'ph01mcacl2_40cm_49cm',
'ph01mcacl2_50cm_59cm',
'ph01mcacl2_60cm_69cm',
'ph01mcacl2_70cm_79cm',
'ph01mcacl2_80cm_89cm',
'ph01mcacl2_90cm_99cm',
'sandvcmeasured_0cm_9cm',
'sandvcmeasured_100cm_109cm',
'sandvcmeasured_10cm_19cm',
'sandvcmeasured_110cm_119cm',
'sandvcmeasured_120+cm',
'sandvcmeasured_20cm_29cm',
'sandvcmeasured_30cm_39cm',
'sandvcmeasured_40cm_49cm',
'sandvcmeasured_50cm_59cm',
'sandvcmeasured_60cm_69cm',
'sandvcmeasured_70cm_79cm',
'sandvcmeasured_80cm_89cm',
'sandvcmeasured_90cm_99cm',
'sandcomeasured_0cm_9cm',
'sandcomeasured_100cm_109cm',
'sandcomeasured_10cm_19cm',
'sandcomeasured_110cm_119cm',
'sandcomeasured_120+cm',
'sandcomeasured_20cm_29cm',
'sandcomeasured_30cm_39cm',
'sandcomeasured_40cm_49cm',
'sandcomeasured_50cm_59cm',
'sandcomeasured_60cm_69cm',
'sandcomeasured_70cm_79cm',
'sandcomeasured_80cm_89cm',
'sandcomeasured_90cm_99cm',
'sandmedmeasured_0cm_9cm',
'sandmedmeasured_100cm_109cm',
'sandmedmeasured_10cm_19cm',
'sandmedmeasured_110cm_119cm',
'sandmedmeasured_120+cm',
'sandmedmeasured_20cm_29cm',
'sandmedmeasured_30cm_39cm',
'sandmedmeasured_40cm_49cm',
'sandmedmeasured_50cm_59cm',
'sandmedmeasured_60cm_69cm',
'sandmedmeasured_70cm_79cm',
'sandmedmeasured_80cm_89cm',
'sandmedmeasured_90cm_99cm',
'sandfinemeasured_0cm_9cm',
'sandfinemeasured_100cm_109cm',
'sandfinemeasured_10cm_19cm',
'sandfinemeasured_110cm_119cm',
'sandfinemeasured_120+cm',
'sandfinemeasured_20cm_29cm',
'sandfinemeasured_30cm_39cm',
'sandfinemeasured_40cm_49cm',
'sandfinemeasured_50cm_59cm',
'sandfinemeasured_60cm_69cm',
'sandfinemeasured_70cm_79cm',
'sandfinemeasured_80cm_89cm',
'sandfinemeasured_90cm_99cm',
'sandvfmeasured_0cm_9cm',
'sandvfmeasured_100cm_109cm',
'sandvfmeasured_10cm_19cm',
'sandvfmeasured_110cm_119cm',
'sandvfmeasured_120+cm',
'sandvfmeasured_20cm_29cm',
'sandvfmeasured_30cm_39cm',
'sandvfmeasured_40cm_49cm',
'sandvfmeasured_50cm_59cm',
'sandvfmeasured_60cm_69cm',
'sandvfmeasured_70cm_79cm',
'sandvfmeasured_80cm_89cm',
'sandvfmeasured_90cm_99cm',
'sandtotmeasured_0cm_9cm',
'sandtotmeasured_100cm_109cm',
'sandtotmeasured_10cm_19cm',
'sandtotmeasured_110cm_119cm',
'sandtotmeasured_120+cm',
'sandtotmeasured_20cm_29cm',
'sandtotmeasured_30cm_39cm',
'sandtotmeasured_40cm_49cm',
'sandtotmeasured_50cm_59cm',
'sandtotmeasured_60cm_69cm',
'sandtotmeasured_70cm_79cm',
'sandtotmeasured_80cm_89cm',
'sandtotmeasured_90cm_99cm',
'siltcomeasured_0cm_9cm',
'siltcomeasured_100cm_109cm',
'siltcomeasured_10cm_19cm',
'siltcomeasured_110cm_119cm',
'siltcomeasured_120+cm',
'siltcomeasured_20cm_29cm',
'siltcomeasured_30cm_39cm',
'siltcomeasured_40cm_49cm',
'siltcomeasured_50cm_59cm',
'siltcomeasured_60cm_69cm',
'siltcomeasured_70cm_79cm',
'siltcomeasured_80cm_89cm',
'siltcomeasured_90cm_99cm',
'siltfinemeasured_0cm_9cm',
'siltfinemeasured_100cm_109cm',
'siltfinemeasured_10cm_19cm',
'siltfinemeasured_110cm_119cm',
'siltfinemeasured_120+cm',
'siltfinemeasured_20cm_29cm',
'siltfinemeasured_30cm_39cm',
'siltfinemeasured_40cm_49cm',
'siltfinemeasured_50cm_59cm',
'siltfinemeasured_60cm_69cm',
'siltfinemeasured_70cm_79cm',
'siltfinemeasured_80cm_89cm',
'siltfinemeasured_90cm_99cm',
'silttotmeasured_0cm_9cm',
'silttotmeasured_100cm_109cm',
'silttotmeasured_10cm_19cm',
'silttotmeasured_110cm_119cm',
'silttotmeasured_120+cm',
'silttotmeasured_20cm_29cm',
'silttotmeasured_30cm_39cm',
'silttotmeasured_40cm_49cm',
'silttotmeasured_50cm_59cm',
'silttotmeasured_60cm_69cm',
'silttotmeasured_70cm_79cm',
'silttotmeasured_80cm_89cm',
'silttotmeasured_90cm_99cm',
'claytotmeasured_0cm_9cm',
'claytotmeasured_100cm_109cm',
'claytotmeasured_10cm_19cm',
'claytotmeasured_110cm_119cm',
'claytotmeasured_120+cm',
'claytotmeasured_20cm_29cm',
'claytotmeasured_30cm_39cm',
'claytotmeasured_40cm_49cm',
'claytotmeasured_50cm_59cm',
'claytotmeasured_60cm_69cm',
'claytotmeasured_70cm_79cm',
'claytotmeasured_80cm_89cm',
'claytotmeasured_90cm_99cm',
'carbonorganicpctmeasured_0cm_9cm',
'carbonorganicpctmeasured_100cm_109cm',
'carbonorganicpctmeasured_10cm_19cm',
'carbonorganicpctmeasured_110cm_119cm',
'carbonorganicpctmeasured_120+cm',
'carbonorganicpctmeasured_20cm_29cm',
'carbonorganicpctmeasured_30cm_39cm',
'carbonorganicpctmeasured_40cm_49cm',
'carbonorganicpctmeasured_50cm_59cm',
'carbonorganicpctmeasured_60cm_69cm',
'carbonorganicpctmeasured_70cm_79cm',
'carbonorganicpctmeasured_80cm_89cm',
'carbonorganicpctmeasured_90cm_99cm',
'fragwt25_0cm_9cm',
'fragwt25_100cm_109cm',
'fragwt25_10cm_19cm',
'fragwt25_110cm_119cm',
'fragwt25_120+cm',
'fragwt25_20cm_29cm',
'fragwt25_30cm_39cm',
'fragwt25_40cm_49cm',
'fragwt25_50cm_59cm',
'fragwt25_60cm_69cm',
'fragwt25_70cm_79cm',
'fragwt25_80cm_89cm',
'fragwt25_90cm_99cm',
'fragwt520_0cm_9cm',
'fragwt520_100cm_109cm',
'fragwt520_10cm_19cm',
'fragwt520_110cm_119cm',
'fragwt520_120+cm',
'fragwt520_20cm_29cm',
'fragwt520_30cm_39cm',
'fragwt520_40cm_49cm',
'fragwt520_50cm_59cm',
'fragwt520_60cm_69cm',
'fragwt520_70cm_79cm',
'fragwt520_80cm_89cm',
'fragwt520_90cm_99cm',
'fragwt2075_0cm_9cm',
'fragwt2075_100cm_109cm',
'fragwt2075_10cm_19cm',
'fragwt2075_110cm_119cm',
'fragwt2075_120+cm',
'fragwt2075_20cm_29cm',
'fragwt2075_30cm_39cm',
'fragwt2075_40cm_49cm',
'fragwt2075_50cm_59cm',
'fragwt2075_60cm_69cm',
'fragwt2075_70cm_79cm',
'fragwt2075_80cm_89cm',
'fragwt2075_90cm_99cm',
'fragwt275_0cm_9cm',
'fragwt275_100cm_109cm',
'fragwt275_10cm_19cm',
'fragwt275_110cm_119cm',
'fragwt275_120+cm',
'fragwt275_20cm_29cm',
'fragwt275_30cm_39cm',
'fragwt275_40cm_49cm',
'fragwt275_50cm_59cm',
'fragwt275_60cm_69cm',
'fragwt275_70cm_79cm',
'fragwt275_80cm_89cm',
'fragwt275_90cm_99cm',
'wtpct0175_0cm_9cm',
'wtpct0175_100cm_109cm',
'wtpct0175_10cm_19cm',
'wtpct0175_110cm_119cm',
'wtpct0175_120+cm',
'wtpct0175_20cm_29cm',
'wtpct0175_30cm_39cm',
'wtpct0175_40cm_49cm',
'wtpct0175_50cm_59cm',
'wtpct0175_60cm_69cm',
'wtpct0175_70cm_79cm',
'wtpct0175_80cm_89cm',
'wtpct0175_90cm_99cm',
'wtpctgt2ws_0cm_9cm',
'wtpctgt2ws_100cm_109cm',
'wtpctgt2ws_10cm_19cm',
'wtpctgt2ws_110cm_119cm',
'wtpctgt2ws_120+cm',
'wtpctgt2ws_20cm_29cm',
'wtpctgt2ws_30cm_39cm',
'wtpctgt2ws_40cm_49cm',
'wtpctgt2ws_50cm_59cm',
'wtpctgt2ws_60cm_69cm',
'wtpctgt2ws_70cm_79cm',
'wtpctgt2ws_80cm_89cm',
'wtpctgt2ws_90cm_99cm',
'esp_0cm_9cm',
'esp_100cm_109cm',
'esp_10cm_19cm',
'esp_110cm_119cm',
'esp_120+cm',
'esp_20cm_29cm',
'esp_30cm_39cm',
'esp_40cm_49cm',
'esp_50cm_59cm',
'esp_60cm_69cm',
'esp_70cm_79cm',
'esp_80cm_89cm',
'esp_90cm_99cm',
'cecsumcations_0cm_9cm',
'cecsumcations_100cm_109cm',
'cecsumcations_10cm_19cm',
'cecsumcations_110cm_119cm',
'cecsumcations_120+cm',
'cecsumcations_20cm_29cm',
'cecsumcations_30cm_39cm',
'cecsumcations_40cm_49cm',
'cecsumcations_50cm_59cm',
'cecsumcations_60cm_69cm',
'cecsumcations_70cm_79cm',
'cecsumcations_80cm_89cm',
'cecsumcations_90cm_99cm',
'cec7_0cm_9cm',
'cec7_100cm_109cm',
'cec7_10cm_19cm',
'cec7_110cm_119cm',
'cec7_120+cm',
'cec7_20cm_29cm',
'cec7_30cm_39cm',
'cec7_40cm_49cm',
'cec7_50cm_59cm',
'cec7_60cm_69cm',
'cec7_70cm_79cm',
'cec7_80cm_89cm',
'cec7_90cm_99cm',
'sumbases_0cm_9cm',
'sumbases_100cm_109cm',
'sumbases_10cm_19cm',
'sumbases_110cm_119cm',
'sumbases_120+cm',
'sumbases_20cm_29cm',
'sumbases_30cm_39cm',
'sumbases_40cm_49cm',
'sumbases_50cm_59cm',
'sumbases_60cm_69cm',
'sumbases_70cm_79cm',
'sumbases_80cm_89cm',
'sumbases_90cm_99cm',
'basesatsumcations_0cm_9cm',
'basesatsumcations_100cm_109cm',
'basesatsumcations_10cm_19cm',
'basesatsumcations_110cm_119cm',
'basesatsumcations_120+cm',
'basesatsumcations_20cm_29cm',
'basesatsumcations_30cm_39cm',
'basesatsumcations_40cm_49cm',
'basesatsumcations_50cm_59cm',
'basesatsumcations_60cm_69cm',
'basesatsumcations_70cm_79cm',
'basesatsumcations_80cm_89cm',
'basesatsumcations_90cm_99cm',
'basesatnh4oac_0cm_9cm',
'basesatnh4oac_100cm_109cm',
'basesatnh4oac_10cm_19cm',
'basesatnh4oac_110cm_119cm',
'basesatnh4oac_120+cm',
'basesatnh4oac_20cm_29cm',
'basesatnh4oac_30cm_39cm',
'basesatnh4oac_40cm_49cm',
'basesatnh4oac_50cm_59cm',
'basesatnh4oac_60cm_69cm',
'basesatnh4oac_70cm_79cm',
'basesatnh4oac_80cm_89cm',
'basesatnh4oac_90cm_99cm',
'caco3equivmeasured_0cm_9cm',
'caco3equivmeasured_100cm_109cm',
'caco3equivmeasured_10cm_19cm',
'caco3equivmeasured_110cm_119cm',
'caco3equivmeasured_120+cm',
'caco3equivmeasured_20cm_29cm',
'caco3equivmeasured_30cm_39cm',
'caco3equivmeasured_40cm_49cm',
'caco3equivmeasured_50cm_59cm',
'caco3equivmeasured_60cm_69cm',
'caco3equivmeasured_70cm_79cm',
'caco3equivmeasured_80cm_89cm',
'caco3equivmeasured_90cm_99cm',
'extracid_0cm_9cm',
'extracid_100cm_109cm',
'extracid_10cm_19cm',
'extracid_110cm_119cm',
'extracid_120+cm',
'extracid_20cm_29cm',
'extracid_30cm_39cm',
'extracid_40cm_49cm',
'extracid_50cm_59cm',
'extracid_60cm_69cm',
'extracid_70cm_79cm',
'extracid_80cm_89cm',
'extracid_90cm_99cm',
'wfifteenbarmeasured_0cm_9cm',
'wfifteenbarmeasured_100cm_109cm',
'wfifteenbarmeasured_10cm_19cm',
'wfifteenbarmeasured_110cm_119cm',
'wfifteenbarmeasured_120+cm',
'wfifteenbarmeasured_20cm_29cm',
'wfifteenbarmeasured_30cm_39cm',
'wfifteenbarmeasured_40cm_49cm',
'wfifteenbarmeasured_50cm_59cm',
'wfifteenbarmeasured_60cm_69cm',
'wfifteenbarmeasured_70cm_79cm',
'wfifteenbarmeasured_80cm_89cm',
'wfifteenbarmeasured_90cm_99cm',
'wfifteenbartoclay_0cm_9cm',
'wfifteenbartoclay_100cm_109cm',
'wfifteenbartoclay_10cm_19cm',
'wfifteenbartoclay_110cm_119cm',
'wfifteenbartoclay_120+cm',
'wfifteenbartoclay_20cm_29cm',
'wfifteenbartoclay_30cm_39cm',
'wfifteenbartoclay_40cm_49cm',
'wfifteenbartoclay_50cm_59cm',
'wfifteenbartoclay_60cm_69cm',
'wfifteenbartoclay_70cm_79cm',
'wfifteenbartoclay_80cm_89cm',
'wfifteenbartoclay_90cm_99cm',
'adod_0cm_9cm',
'adod_100cm_109cm',
'adod_10cm_19cm',
'adod_110cm_119cm',
'adod_120+cm',
'adod_20cm_29cm',
'adod_30cm_39cm',
'adod_40cm_49cm',
'adod_50cm_59cm',
'adod_60cm_69cm',
'adod_70cm_79cm',
'adod_80cm_89cm',
'adod_90cm_99cm']

In [94]:
print('categorical variables', len(cat_var_list))
print('numerical variables', len(num_var_list))
print('ids to drop', len(drop_list))

categorical variables 1129
numerical variables 441
ids to drop 17


In [95]:
# perform roll up
ssp_sitepm_ncss_geomorph_agg = aggregate_to_site(ssp_sitepm_ncss_geomorph_v2,num_var_list,cat_var_list)

ssp_sitepm_ncss_geomorph_agg.shape

(546262, 1571)

In [96]:
ssp_sitepm_ncss_geomorph_agg.head()

Unnamed: 0,siteiid,plsssectio,utmnorthin,utmeasting,elev,slope,aspect,benchmarks,latstddeci,longstddec,commphasei,pedodermco_site_siteobs,drainedfla,beddingfla,plantation,soilreplic,pedodermco_pedon,pmorder,psctopdepth,pscbotdepth,noncarbclaywtavg,claytotwtavg,le0to100,wf0175wtavgpsc,volfractgt2wtavg,cec7clayratiowtavg,hzdept_0cm_9cm,hzdept_100cm_109cm,hzdept_10cm_19cm,hzdept_110cm_119cm,hzdept_120+cm,hzdept_20cm_29cm,hzdept_30cm_39cm,hzdept_40cm_49cm,hzdept_50cm_59cm,hzdept_60cm_69cm,hzdept_70cm_79cm,hzdept_80cm_89cm,hzdept_90cm_99cm,hzdepb_0cm_9cm,hzdepb_100cm_109cm,hzdepb_10cm_19cm,hzdepb_110cm_119cm,hzdepb_120+cm,hzdepb_20cm_29cm,hzdepb_30cm_39cm,hzdepb_40cm_49cm,hzdepb_50cm_59cm,hzdepb_60cm_69cm,hzdepb_70cm_79cm,hzdepb_80cm_89cm,hzdepb_90cm_99cm,ph1to1h2o_0cm_9cm,ph1to1h2o_100cm_109cm,ph1to1h2o_10cm_19cm,ph1to1h2o_110cm_119cm,ph1to1h2o_120+cm,ph1to1h2o_20cm_29cm,ph1to1h2o_30cm_39cm,ph1to1h2o_40cm_49cm,ph1to1h2o_50cm_59cm,ph1to1h2o_60cm_69cm,ph1to1h2o_70cm_79cm,ph1to1h2o_80cm_89cm,ph1to1h2o_90cm_99cm,ph01mcacl2_0cm_9cm,ph01mcacl2_100cm_109cm,ph01mcacl2_10cm_19cm,ph01mcacl2_110cm_119cm,ph01mcacl2_120+cm,ph01mcacl2_20cm_29cm,ph01mcacl2_30cm_39cm,ph01mcacl2_40cm_49cm,ph01mcacl2_50cm_59cm,ph01mcacl2_60cm_69cm,ph01mcacl2_70cm_79cm,ph01mcacl2_80cm_89cm,ph01mcacl2_90cm_99cm,sandvcmeasured_0cm_9cm,sandvcmeasured_100cm_109cm,sandvcmeasured_10cm_19cm,sandvcmeasured_110cm_119cm,sandvcmeasured_120+cm,sandvcmeasured_20cm_29cm,sandvcmeasured_30cm_39cm,sandvcmeasured_40cm_49cm,sandvcmeasured_50cm_59cm,sandvcmeasured_60cm_69cm,sandvcmeasured_70cm_79cm,sandvcmeasured_80cm_89cm,sandvcmeasured_90cm_99cm,sandcomeasured_0cm_9cm,sandcomeasured_100cm_109cm,sandcomeasured_10cm_19cm,sandcomeasured_110cm_119cm,sandcomeasured_120+cm,sandcomeasured_20cm_29cm,sandcomeasured_30cm_39cm,sandcomeasured_40cm_49cm,sandcomeasured_50cm_59cm,sandcomeasured_60cm_69cm,sandcomeasured_70cm_79cm,sandcomeasured_80cm_89cm,sandcomeasured_90cm_99cm,sandmedmeasured_0cm_9cm,sandmedmeasured_100cm_109cm,sandmedmeasured_10cm_19cm,sandmedmeasured_110cm_119cm,sandmedmeasured_120+cm,sandmedmeasured_20cm_29cm,sandmedmeasured_30cm_39cm,sandmedmeasured_40cm_49cm,sandmedmeasured_50cm_59cm,sandmedmeasured_60cm_69cm,sandmedmeasured_70cm_79cm,sandmedmeasured_80cm_89cm,sandmedmeasured_90cm_99cm,sandfinemeasured_0cm_9cm,sandfinemeasured_100cm_109cm,sandfinemeasured_10cm_19cm,sandfinemeasured_110cm_119cm,sandfinemeasured_120+cm,sandfinemeasured_20cm_29cm,sandfinemeasured_30cm_39cm,sandfinemeasured_40cm_49cm,sandfinemeasured_50cm_59cm,sandfinemeasured_60cm_69cm,sandfinemeasured_70cm_79cm,sandfinemeasured_80cm_89cm,sandfinemeasured_90cm_99cm,sandvfmeasured_0cm_9cm,sandvfmeasured_100cm_109cm,sandvfmeasured_10cm_19cm,sandvfmeasured_110cm_119cm,sandvfmeasured_120+cm,sandvfmeasured_20cm_29cm,sandvfmeasured_30cm_39cm,sandvfmeasured_40cm_49cm,sandvfmeasured_50cm_59cm,sandvfmeasured_60cm_69cm,sandvfmeasured_70cm_79cm,sandvfmeasured_80cm_89cm,sandvfmeasured_90cm_99cm,sandtotmeasured_0cm_9cm,sandtotmeasured_100cm_109cm,sandtotmeasured_10cm_19cm,sandtotmeasured_110cm_119cm,sandtotmeasured_120+cm,sandtotmeasured_20cm_29cm,sandtotmeasured_30cm_39cm,sandtotmeasured_40cm_49cm,sandtotmeasured_50cm_59cm,sandtotmeasured_60cm_69cm,sandtotmeasured_70cm_79cm,sandtotmeasured_80cm_89cm,sandtotmeasured_90cm_99cm,siltcomeasured_0cm_9cm,siltcomeasured_100cm_109cm,siltcomeasured_10cm_19cm,siltcomeasured_110cm_119cm,siltcomeasured_120+cm,siltcomeasured_20cm_29cm,siltcomeasured_30cm_39cm,siltcomeasured_40cm_49cm,siltcomeasured_50cm_59cm,siltcomeasured_60cm_69cm,siltcomeasured_70cm_79cm,siltcomeasured_80cm_89cm,siltcomeasured_90cm_99cm,siltfinemeasured_0cm_9cm,siltfinemeasured_100cm_109cm,siltfinemeasured_10cm_19cm,siltfinemeasured_110cm_119cm,siltfinemeasured_120+cm,siltfinemeasured_20cm_29cm,siltfinemeasured_30cm_39cm,siltfinemeasured_40cm_49cm,siltfinemeasured_50cm_59cm,siltfinemeasured_60cm_69cm,siltfinemeasured_70cm_79cm,siltfinemeasured_80cm_89cm,siltfinemeasured_90cm_99cm,silttotmeasured_0cm_9cm,silttotmeasured_100cm_109cm,silttotmeasured_10cm_19cm,silttotmeasured_110cm_119cm,silttotmeasured_120+cm,silttotmeasured_20cm_29cm,silttotmeasured_30cm_39cm,silttotmeasured_40cm_49cm,silttotmeasured_50cm_59cm,silttotmeasured_60cm_69cm,silttotmeasured_70cm_79cm,silttotmeasured_80cm_89cm,silttotmeasured_90cm_99cm,claytotmeasured_0cm_9cm,claytotmeasured_100cm_109cm,claytotmeasured_10cm_19cm,claytotmeasured_110cm_119cm,claytotmeasured_120+cm,claytotmeasured_20cm_29cm,claytotmeasured_30cm_39cm,claytotmeasured_40cm_49cm,claytotmeasured_50cm_59cm,claytotmeasured_60cm_69cm,claytotmeasured_70cm_79cm,claytotmeasured_80cm_89cm,claytotmeasured_90cm_99cm,carbonorganicpctmeasured_0cm_9cm,carbonorganicpctmeasured_100cm_109cm,carbonorganicpctmeasured_10cm_19cm,carbonorganicpctmeasured_110cm_119cm,carbonorganicpctmeasured_120+cm,carbonorganicpctmeasured_20cm_29cm,carbonorganicpctmeasured_30cm_39cm,carbonorganicpctmeasured_40cm_49cm,carbonorganicpctmeasured_50cm_59cm,carbonorganicpctmeasured_60cm_69cm,carbonorganicpctmeasured_70cm_79cm,carbonorganicpctmeasured_80cm_89cm,carbonorganicpctmeasured_90cm_99cm,fragwt25_0cm_9cm,fragwt25_100cm_109cm,fragwt25_10cm_19cm,fragwt25_110cm_119cm,fragwt25_120+cm,fragwt25_20cm_29cm,fragwt25_30cm_39cm,fragwt25_40cm_49cm,fragwt25_50cm_59cm,fragwt25_60cm_69cm,fragwt25_70cm_79cm,fragwt25_80cm_89cm,fragwt25_90cm_99cm,fragwt520_0cm_9cm,fragwt520_100cm_109cm,fragwt520_10cm_19cm,fragwt520_110cm_119cm,fragwt520_120+cm,fragwt520_20cm_29cm,fragwt520_30cm_39cm,fragwt520_40cm_49cm,fragwt520_50cm_59cm,fragwt520_60cm_69cm,fragwt520_70cm_79cm,fragwt520_80cm_89cm,fragwt520_90cm_99cm,fragwt2075_0cm_9cm,fragwt2075_100cm_109cm,fragwt2075_10cm_19cm,...,texcl_100cm_109cm_Loamy fine sand,texcl_100cm_109cm_Loamy sand,texcl_100cm_109cm_OTHER,texcl_100cm_109cm_Sand,texcl_100cm_109cm_Sandy clay loam,texcl_100cm_109cm_Sandy loam,texcl_100cm_109cm_Silt loam,texcl_100cm_109cm_Silty clay,texcl_100cm_109cm_Silty clay loam,texcl_100cm_109cm_Very fine sandy loam,texcl_100cm_109cm_nan,texcl_10cm_19cm_Clay,texcl_10cm_19cm_Clay loam,texcl_10cm_19cm_Coarse sandy loam,texcl_10cm_19cm_Fine sand,texcl_10cm_19cm_Fine sandy loam,texcl_10cm_19cm_Loam,texcl_10cm_19cm_Loamy coarse sand,texcl_10cm_19cm_Loamy fine sand,texcl_10cm_19cm_Loamy sand,texcl_10cm_19cm_OTHER,texcl_10cm_19cm_Sand,texcl_10cm_19cm_Sandy clay loam,texcl_10cm_19cm_Sandy loam,texcl_10cm_19cm_Silt,texcl_10cm_19cm_Silt loam,texcl_10cm_19cm_Silty clay,texcl_10cm_19cm_Silty clay loam,texcl_10cm_19cm_Very fine sandy loam,texcl_10cm_19cm_nan,texcl_110cm_119cm_Clay,texcl_110cm_119cm_Clay loam,texcl_110cm_119cm_Coarse sand,texcl_110cm_119cm_Coarse sandy loam,texcl_110cm_119cm_Fine sand,texcl_110cm_119cm_Fine sandy loam,texcl_110cm_119cm_Loam,texcl_110cm_119cm_Loamy coarse sand,texcl_110cm_119cm_Loamy fine sand,texcl_110cm_119cm_Loamy sand,texcl_110cm_119cm_OTHER,texcl_110cm_119cm_Sand,texcl_110cm_119cm_Sandy clay loam,texcl_110cm_119cm_Sandy loam,texcl_110cm_119cm_Silt loam,texcl_110cm_119cm_Silty clay,texcl_110cm_119cm_Silty clay loam,texcl_110cm_119cm_Very fine sandy loam,texcl_110cm_119cm_nan,texcl_120+cm_Clay,texcl_120+cm_Clay loam,texcl_120+cm_Coarse sand,texcl_120+cm_Coarse sandy loam,texcl_120+cm_Fine sand,texcl_120+cm_Fine sandy loam,texcl_120+cm_Loam,texcl_120+cm_Loamy coarse sand,texcl_120+cm_Loamy fine sand,texcl_120+cm_Loamy sand,texcl_120+cm_OTHER,texcl_120+cm_Sand,texcl_120+cm_Sandy clay loam,texcl_120+cm_Sandy loam,texcl_120+cm_Silt loam,texcl_120+cm_Silty clay,texcl_120+cm_Silty clay loam,texcl_120+cm_Very fine sandy loam,texcl_120+cm_nan,texcl_20cm_29cm_Clay,texcl_20cm_29cm_Clay loam,texcl_20cm_29cm_Coarse sandy loam,texcl_20cm_29cm_Fine sand,texcl_20cm_29cm_Fine sandy loam,texcl_20cm_29cm_Loam,texcl_20cm_29cm_Loamy coarse sand,texcl_20cm_29cm_Loamy fine sand,texcl_20cm_29cm_Loamy sand,texcl_20cm_29cm_OTHER,texcl_20cm_29cm_Sand,texcl_20cm_29cm_Sandy clay loam,texcl_20cm_29cm_Sandy loam,texcl_20cm_29cm_Silt loam,texcl_20cm_29cm_Silty clay,texcl_20cm_29cm_Silty clay loam,texcl_20cm_29cm_Very fine sandy loam,texcl_20cm_29cm_nan,texcl_30cm_39cm_Clay,texcl_30cm_39cm_Clay loam,texcl_30cm_39cm_Coarse sandy loam,texcl_30cm_39cm_Fine sand,texcl_30cm_39cm_Fine sandy loam,texcl_30cm_39cm_Loam,texcl_30cm_39cm_Loamy coarse sand,texcl_30cm_39cm_Loamy fine sand,texcl_30cm_39cm_Loamy sand,texcl_30cm_39cm_OTHER,texcl_30cm_39cm_Sand,texcl_30cm_39cm_Sandy clay loam,texcl_30cm_39cm_Sandy loam,texcl_30cm_39cm_Silt loam,texcl_30cm_39cm_Silty clay,texcl_30cm_39cm_Silty clay loam,texcl_30cm_39cm_Very fine sandy loam,texcl_30cm_39cm_nan,texcl_40cm_49cm_Clay,texcl_40cm_49cm_Clay loam,texcl_40cm_49cm_Coarse sand,texcl_40cm_49cm_Coarse sandy loam,texcl_40cm_49cm_Fine sand,texcl_40cm_49cm_Fine sandy loam,texcl_40cm_49cm_Loam,texcl_40cm_49cm_Loamy coarse sand,texcl_40cm_49cm_Loamy fine sand,texcl_40cm_49cm_Loamy sand,texcl_40cm_49cm_OTHER,texcl_40cm_49cm_Sand,texcl_40cm_49cm_Sandy clay loam,texcl_40cm_49cm_Sandy loam,texcl_40cm_49cm_Silt loam,texcl_40cm_49cm_Silty clay,texcl_40cm_49cm_Silty clay loam,texcl_40cm_49cm_Very fine sandy loam,texcl_40cm_49cm_nan,texcl_50cm_59cm_Clay,texcl_50cm_59cm_Clay loam,texcl_50cm_59cm_Coarse sand,texcl_50cm_59cm_Coarse sandy loam,texcl_50cm_59cm_Fine sand,texcl_50cm_59cm_Fine sandy loam,texcl_50cm_59cm_Loam,texcl_50cm_59cm_Loamy coarse sand,texcl_50cm_59cm_Loamy fine sand,texcl_50cm_59cm_Loamy sand,texcl_50cm_59cm_OTHER,texcl_50cm_59cm_Sand,texcl_50cm_59cm_Sandy clay loam,texcl_50cm_59cm_Sandy loam,texcl_50cm_59cm_Silt loam,texcl_50cm_59cm_Silty clay,texcl_50cm_59cm_Silty clay loam,texcl_50cm_59cm_Very fine sandy loam,texcl_50cm_59cm_nan,texcl_60cm_69cm_Clay,texcl_60cm_69cm_Clay loam,texcl_60cm_69cm_Coarse sand,texcl_60cm_69cm_Coarse sandy loam,texcl_60cm_69cm_Fine sand,texcl_60cm_69cm_Fine sandy loam,texcl_60cm_69cm_Loam,texcl_60cm_69cm_Loamy coarse sand,texcl_60cm_69cm_Loamy fine sand,texcl_60cm_69cm_Loamy sand,texcl_60cm_69cm_OTHER,texcl_60cm_69cm_Sand,texcl_60cm_69cm_Sandy clay loam,texcl_60cm_69cm_Sandy loam,texcl_60cm_69cm_Silt loam,texcl_60cm_69cm_Silty clay,texcl_60cm_69cm_Silty clay loam,texcl_60cm_69cm_Very fine sandy loam,texcl_60cm_69cm_nan,texcl_70cm_79cm_Clay,texcl_70cm_79cm_Clay loam,texcl_70cm_79cm_Coarse sand,texcl_70cm_79cm_Coarse sandy loam,texcl_70cm_79cm_Fine sand,texcl_70cm_79cm_Fine sandy loam,texcl_70cm_79cm_Loam,texcl_70cm_79cm_Loamy coarse sand,texcl_70cm_79cm_Loamy fine sand,texcl_70cm_79cm_Loamy sand,texcl_70cm_79cm_OTHER,texcl_70cm_79cm_Sand,texcl_70cm_79cm_Sandy clay loam,texcl_70cm_79cm_Sandy loam,texcl_70cm_79cm_Silt loam,texcl_70cm_79cm_Silty clay,texcl_70cm_79cm_Silty clay loam,texcl_70cm_79cm_Very fine sandy loam,texcl_70cm_79cm_nan,texcl_80cm_89cm_Clay,texcl_80cm_89cm_Clay loam,texcl_80cm_89cm_Coarse sand,texcl_80cm_89cm_Coarse sandy loam,texcl_80cm_89cm_Fine sand,texcl_80cm_89cm_Fine sandy loam,texcl_80cm_89cm_Loam,texcl_80cm_89cm_Loamy coarse sand,texcl_80cm_89cm_Loamy fine sand,texcl_80cm_89cm_Loamy sand,texcl_80cm_89cm_OTHER,texcl_80cm_89cm_Sand,texcl_80cm_89cm_Sandy clay loam,texcl_80cm_89cm_Sandy loam,texcl_80cm_89cm_Silt loam,texcl_80cm_89cm_Silty clay,texcl_80cm_89cm_Silty clay loam,texcl_80cm_89cm_Very fine sandy loam,texcl_80cm_89cm_nan,texcl_90cm_99cm_Clay,texcl_90cm_99cm_Clay loam,texcl_90cm_99cm_Coarse sand,texcl_90cm_99cm_Coarse sandy loam,texcl_90cm_99cm_Fine sand,texcl_90cm_99cm_Fine sandy loam,texcl_90cm_99cm_Loam,texcl_90cm_99cm_Loamy coarse sand,texcl_90cm_99cm_Loamy fine sand,texcl_90cm_99cm_Loamy sand,texcl_90cm_99cm_OTHER,texcl_90cm_99cm_Sand,texcl_90cm_99cm_Sandy clay loam,texcl_90cm_99cm_Sandy loam,texcl_90cm_99cm_Silt loam,texcl_90cm_99cm_Silty clay,texcl_90cm_99cm_Silty clay loam,texcl_90cm_99cm_Very fine sandy loam,texcl_90cm_99cm_nan,Feature_Type_Anthropogenic Feature,Feature_Type_Landform,Feature_Type_Landscape,Feature_Type_Microfeature,Feature_alluvial fan,Feature_coastal plain,Feature_drainageway,Feature_flood plain,Feature_foothills,Feature_ground moraine,Feature_hill,Feature_hills,Feature_hillslope,Feature_interfluve,Feature_intermontane basin,Feature_lake plain,Feature_mountain,Feature_mountain slope,Feature_mountains,Feature_other,Feature_outwash plain,Feature_piedmont,Feature_plain,Feature_plains,Feature_plateau,Feature_ridge,Feature_river valley,Feature_stream terrace,Feature_terrace,Feature_till plain,Feature_upland,Feature_valley
0,10000,16.0,0.0,0.0,49.0,9.0,0.0,0.0,45.561737,-123.794006,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
1,100002,0.0,4009566.0,297348.0,3250.0,2.0,0.0,0.0,36.20972,-101.2543,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1000086,0.0,4430694.0,505322.0,2468.0,0.0,0.0,0.0,40.02645,-110.9376,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1000087,0.0,4430908.0,505439.0,2466.0,0.0,0.0,0.0,40.02837,-110.9363,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1000088,0.0,4431183.0,501752.0,2724.0,0.0,0.0,0.0,40.03087,-110.9795,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### + climate data

In [97]:
ssp_sitepm_ncss_geomorph_agg_prism = pd.merge(ssp_sitepm_ncss_geomorph_agg, 
                                         site_prism,
                                         left_on=['siteiid'],
                                         right_on=['siteiid'],
                                         how='left')


### + satellite data

In [98]:
ssp_sitepm_ncss_geomorph_agg_prism_satellite = pd.merge(ssp_sitepm_ncss_geomorph_agg_prism, 
                                         satellite,
                                         left_on=['siteiid'],
                                         right_on=['siteiid'],
                                         how='left')

In [99]:
ssp_sitepm_ncss_geomorph_agg_prism_satellite.shape

(546262, 1670)

## Checkpoint - download data
site, siteobs, pedon, sitepm, ncss layer+ncss pedon, geomorph, climate, satellite

In [None]:
ssp_sitepm_ncss_geomorph_agg_prism_satellite.to_csv('Saved Datasets/ssp_sitepm_ncss_geomorph_agg_prism_satellite.csv', index=False)

### End of Part 2 - Saving ssp, sitepm, NCSS, Geomorph aggregated, prism and satellite data

## phorizon

#### subseting and cleaning, fill missing

In [100]:
def handle_missing_values(phorizon_df_in):
    """    
    Fill in Missing Values
    Separate dataframe into three subsets/parts based on top depth
    - df 1: top depth is NaN
    - df 2: top depth > bottom depth
    - df 3: bottom fepth is NaN
    - df 4: normal values that don't need to be filled/changed
    """
    
    #### Split to separate dataframes and make edits
    # seubset data when top depth is null dataframe
    df1 = phorizon_df_in.loc[phorizon_df_in['hzdept'].isnull()] #1433
    # fill in top depth as 10 less than the bottom depth
    df1['hzdept'] = df1['hzdept'].fillna(df1['hzdepb']-10)
    
    # top depth is greater than bottom depth dataframe
    df2 = phorizon_df_in[phorizon_df_in['hzdept'] > phorizon_df_in['hzdepb']]
    # flip variable names for hzdept and hzdepb for the rows there bottom depth < top depth
    df2.rename(columns={'hzdept': 'hzdepb', 'hzdepb': 'hzdept'}, inplace=True)
    
    # bottom depth is null dataframe
    df3 = phorizon_df_in.loc[phorizon_df_in['hzdepb'].isnull()] #68015
    # fill in bottom depth as 10 more than the top depth
    df3['hzdepb'] = df3['hzdepb'].fillna(df3['hzdept']+10)
    
    # "normal" dataset where no changes need to be applied
    df4 = phorizon_df_in[~phorizon_df_in['hzdept'].isna()]
    df4 = df4[df4['hzdept'] <= df4['hzdepb']]
    df4 = df4[~df4['hzdepb'].isna()]
    
    #### Append dataframes back together + re-index
    # append df1, df2, df3, and df4 back together by row
    phorizon_df_in = df1.append([df2, df3, 
                              df4]).sort_values(by=['peiidref', 'hzdept']).reset_index()
    
    return (phorizon_df_in)

In [101]:
def data_cleaning(phorizon_df_org_in,drop_var_list):
    print('Start cleaning for phorizon_df_org : ',phorizon_df_org_in.shape)
    phorizon_df = phorizon_df_org.drop(columns=drop_var_list)
    print('After dropping the variables: ',phorizon_df.shape)
    # drop rows that have NaN in both hzdept and hzdepb
    phorizon_df = phorizon_df[~phorizon_df[['hzdept', 'hzdepb']].isnull().all(axis=1)]
    print('After dropping rows that have NaN in both hzdept and hzdepb: ',phorizon_df.shape)
    
    ### missing values for hzdept
    phorizon_df = handle_missing_values(phorizon_df)
    print('After filling in missing values for hzdept : ',phorizon_df.shape)
    
    # Change negative values for hzdept to 0
    print('There are {} rows with negative values for hzdept, which we set to 0'.format(
        phorizon_df[phorizon_df['hzdept'] < 0].shape[0]))
    phorizon_df['hzdept'] = phorizon_df['hzdept'].clip(lower=0)
    
    
    return (phorizon_df)

In [102]:
drop_var_list = ['OID',
'horzlatareapct_h',
'horzlatareapct_l',
'obsinfiltrationrate',
'dipyridylpct',
'dipyridylloc',
'horzlatareapct_r',
'carbdevstagecf',
'horzvoltotpct_h',
'horzvoltotpct_l',
'ksatrepnum',
'aashtocl',
'carbdevstagefe',
'reactadipyridyl',
'ksatstddev',
'rupresplate',
'mneffagent',
'horzvoltotpct_r',
'penetorient',
'toughclass',
'penetrres',
'mneffclass',
'soilodorintensity',
'claycarbest',
'desgnmasterprime',
'unifiedcl',
'soilodor',
'hzthk_l',
'hzthk_h',
'rupresblkcem',
'ksatpedon',
'mannerfailure',
'efflocation',
'excavdifcl',
'dspcomplayerid',
'horzpermclass',
'silttotest',
'desgndisc',
'sandtotest',
'obssoimoiststat',
'effagent',
'phdetermeth',
'rupresblkdry',
'plasticity',
'stickiness',
'hzname_s']

In [103]:
phorizon_df_clean = data_cleaning(phorizon_df_org,drop_var_list)

Start cleaning for phorizon_df_org :  (2480781, 70)
After dropping the variables:  (2480781, 24)
After dropping rows that have NaN in both hzdept and hzdepb:  (2466750, 24)
After filling in missing values for hzdept :  (2466750, 25)
There are 57 rows with negative values for hzdept, which we set to 0


In [104]:
phorizon_df_clean.head()

Unnamed: 0,index,peiidref,seqnum,hzdept,hzdepb,hzthk_r,obsmethod,hzname,desgnmaster,desgnvert,texture,texture_s,stratextsflag,claytotest,fragvoltot,horcolorvflag,rupresblkmst,phfield,effclass,rmonosulfidep,bounddistinct,boundtopo,recwlupdated,recuseriidref,phiid
0,7295,9142,,0.0,2.0,2.0,Small Pit,Oi,O,,SPM,1.0,0,,0.0,0,,,,0,Abrupt,Smooth,4/19/2016 17:07:22,1237.0,43531
1,7943,9142,,2.0,10.0,8.0,Small Pit,A1,A,1.0,ASHY-SL,1.0,0,,7.0,0,Very friable,6.8,,0,Clear,Wavy,4/19/2016 17:07:21,1237.0,43530
2,7917,9142,,10.0,40.0,30.0,Small Pit,A2,A,2.0,ASHY-LS,1.0,0,,7.0,0,Very friable,7.0,,0,Clear,Wavy,4/19/2016 17:07:22,1237.0,43529
3,7944,9142,,40.0,68.0,28.0,Small Pit,Bw,B,,ASHY-LS,1.0,0,,0.0,0,Very friable,7.0,,0,Clear,Wavy,4/19/2016 17:07:21,1237.0,43528
4,7963,9142,,68.0,99.0,31.0,Small Pit,C1,C,1.0,ASHY-LS,1.0,0,,0.0,0,Loose,7.0,,0,Gradual,Wavy,4/19/2016 17:07:21,1237.0,43527


####  Binning 

In [105]:
# Create labels and bins
labels = ['0cm_9cm', '10cm_19cm', '20cm_29cm', '30cm_39cm', '40cm_49cm',
         '50cm_59cm', '60cm_69cm', '70cm_79cm', '80cm_89cm', '90cm_99cm',
         '100cm_109cm', '110cm_119cm', '120+cm']
cut_bins = [-11, 9, 19, 29, 39, 49, 59, 69, 79, 89, 99, 109, 119, 10000]


In [106]:
def binning_func(phorizon_df,cut_bins,labels):
    phorizon_df_in = phorizon_df.copy()
    ### binning
    phorizon_df_in['hzdept_bin'] = pd.cut(phorizon_df_in['hzdept'], cut_bins, labels=labels)
    
    return (phorizon_df_in)

In [107]:
phorizon_df_binned = binning_func(phorizon_df_clean,cut_bins,labels)
phorizon_df_binned.shape

(2466750, 26)

In [108]:
phorizon_df_binned.head()

Unnamed: 0,index,peiidref,seqnum,hzdept,hzdepb,hzthk_r,obsmethod,hzname,desgnmaster,desgnvert,texture,texture_s,stratextsflag,claytotest,fragvoltot,horcolorvflag,rupresblkmst,phfield,effclass,rmonosulfidep,bounddistinct,boundtopo,recwlupdated,recuseriidref,phiid,hzdept_bin
0,7295,9142,,0.0,2.0,2.0,Small Pit,Oi,O,,SPM,1.0,0,,0.0,0,,,,0,Abrupt,Smooth,4/19/2016 17:07:22,1237.0,43531,0cm_9cm
1,7943,9142,,2.0,10.0,8.0,Small Pit,A1,A,1.0,ASHY-SL,1.0,0,,7.0,0,Very friable,6.8,,0,Clear,Wavy,4/19/2016 17:07:21,1237.0,43530,0cm_9cm
2,7917,9142,,10.0,40.0,30.0,Small Pit,A2,A,2.0,ASHY-LS,1.0,0,,7.0,0,Very friable,7.0,,0,Clear,Wavy,4/19/2016 17:07:22,1237.0,43529,10cm_19cm
3,7944,9142,,40.0,68.0,28.0,Small Pit,Bw,B,,ASHY-LS,1.0,0,,0.0,0,Very friable,7.0,,0,Clear,Wavy,4/19/2016 17:07:21,1237.0,43528,40cm_49cm
4,7963,9142,,68.0,99.0,31.0,Small Pit,C1,C,1.0,ASHY-LS,1.0,0,,0.0,0,Loose,7.0,,0,Gradual,Wavy,4/19/2016 17:07:21,1237.0,43527,60cm_69cm


#### Inserting missing Bins

In [109]:
# this is a function that creates a dummy dataframe with all pedons and 
# for each peiid have all labels = ['0cm_9cm', '10cm_19cm', '20cm_29cm', '30cm_39cm', '40cm_49cm',
#         '50cm_59cm', '60cm_69cm', '70cm_79cm', '80cm_89cm', '90cm_99cm',
#         '100cm_109cm', '110cm_119cm', '120+cm']
# and define a lower_band [0,10,20,30,40,50,
#                60,70,80,90,100,110,120
def create_dummy_df(peiid_list_in,labels_in):
    dummy_df = pd.DataFrame()
    dummy_df['peiidref'] = pd.Series(peiid_list_in).repeat(len(labels_in)).reset_index(drop=True)
    dummy_df['bins'] = pd.Series(labels_in*len(peiid_list_in))
    dummy_df['lower_band'] = pd.Series([0,10,20,30,40,50,
                                              60,70,80,90,100,110,120]*len(peiid_list_in))
    return(dummy_df)    

In [110]:
peiid_list = phorizon_df_clean.peiidref.unique().tolist()
pedon_dummy_df = create_dummy_df(peiid_list,labels)
pedon_dummy_df.head(15)

Unnamed: 0,peiidref,bins,lower_band
0,9142,0cm_9cm,0
1,9142,10cm_19cm,10
2,9142,20cm_29cm,20
3,9142,30cm_39cm,30
4,9142,40cm_49cm,40
5,9142,50cm_59cm,50
6,9142,60cm_69cm,60
7,9142,70cm_79cm,70
8,9142,80cm_89cm,80
9,9142,90cm_99cm,90


In [111]:
def insert_all_bin_cols(phorizon_df_binned_in,pedon_dummy_df,dummy_bottom_bin_df):
    phorizon_df_binned_in['lower_band']=phorizon_df_binned_in.hzdept_bin.apply(lambda s:re.findall(r"(\d+)\+*cm", str(s))[0])
    phorizon_df_binned_in['lower_band'] = phorizon_df_binned_in['lower_band'].astype(int)
    
    
    # merge dummy_bottom_bin into main dataframe
    phorizon_df_binned_in = pd.merge(phorizon_df_binned_in, 
                                     dummy_bottom_bin_df,
                                     left_on=['hzdepb'],
                                     right_on=['hzdepb'],
                                     how='left')
    phorizon_df_binned_in['max_bin'] = phorizon_df_binned_in.groupby('peiidref')\
                        ['max_depth_bin'].transform('max')
    phorizon_df_binned_in.drop(columns=['lower_band'],inplace=True)
    
    x = pd.merge(phorizon_df_binned_in,
             pedon_dummy_df,
             left_on=['peiidref','hzdept_bin'],
             right_on=['peiidref','bins'],
             how='right')
    print('forward fill')
    x['max_bin'] = x.groupby('peiidref')['max_bin'].fillna(method='ffill')
    print('backward fill')
    x['max_bin'] = x.groupby('peiidref')['max_bin'].fillna(method='bfill')
    x['hzdept_bin']=x['bins']
    
    x.sort_values(by=['peiidref','lower_band'],inplace=True)
    return(x)

In [112]:
dummy_bottom_bin_df = pd.read_csv('Input Files/dummy_bottom_bin.csv')
dummy_bottom_bin_df.head()

Unnamed: 0,hzdepb,max_depth_bin
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0


In [113]:
phorizon_df_binned_complete = insert_all_bin_cols(phorizon_df_binned,pedon_dummy_df,dummy_bottom_bin_df)
phorizon_df_binned_complete.sort_values(by=['peiidref','lower_band'],inplace=True)
print(phorizon_df_binned_complete.shape)

forward fill
backward fill
(6561998, 30)


In [114]:
phorizon_df_binned_complete.head()

Unnamed: 0,index,peiidref,seqnum,hzdept,hzdepb,hzthk_r,obsmethod,hzname,desgnmaster,desgnvert,texture,texture_s,stratextsflag,claytotest,fragvoltot,horcolorvflag,rupresblkmst,phfield,effclass,rmonosulfidep,bounddistinct,boundtopo,recwlupdated,recuseriidref,phiid,hzdept_bin,max_depth_bin,max_bin,bins,lower_band
0,7295.0,9142,,0.0,2.0,2.0,Small Pit,Oi,O,,SPM,1.0,0.0,,0.0,0.0,,,,0.0,Abrupt,Smooth,4/19/2016 17:07:22,1237.0,43531.0,0cm_9cm,0.0,120.0,0cm_9cm,0
1,7943.0,9142,,2.0,10.0,8.0,Small Pit,A1,A,1.0,ASHY-SL,1.0,0.0,,7.0,0.0,Very friable,6.8,,0.0,Clear,Wavy,4/19/2016 17:07:21,1237.0,43530.0,0cm_9cm,10.0,120.0,0cm_9cm,0
2,7917.0,9142,,10.0,40.0,30.0,Small Pit,A2,A,2.0,ASHY-LS,1.0,0.0,,7.0,0.0,Very friable,7.0,,0.0,Clear,Wavy,4/19/2016 17:07:22,1237.0,43529.0,10cm_19cm,40.0,120.0,10cm_19cm,10
3,,9142,,,,,,,,,,,,,,,,,,,,,,,,20cm_29cm,,120.0,20cm_29cm,20
4,,9142,,,,,,,,,,,,,,,,,,,,,,,,30cm_39cm,,120.0,30cm_39cm,30


#### drop dupplicated bins for each pedon

In [115]:
def find_weight(df):
        
    """
        This function:
        finds the weight according to depth for each row.
    """
    df_in = df.copy()
    df_in['depth'] = df_in['hzdepb'] - df_in['hzdept']
    df_in.loc[df_in['depth']==0,
                'depth'] = df_in.loc[df_in['depth']==0,'depth']+0.001
    df_in['total_depth'] = df_in.groupby(['peiidref','hzdept_bin'])['depth'].transform('sum')
    df_in['weight'] = df_in['depth']/df_in['total_depth']
    
    return df_in['weight']

In [116]:
def find_weighted_average(df,var_name,kind):
    """
    Based on the kind of variable (logarithmic, numerical, Categorical )
    The function find the weighted average to handle duplicates
    
    
    """
    df_in = df.copy()
 
    if kind == 'num':
        
        dropped_na_df = df_in.dropna(subset=[var_name])
        dropped_index = list(set(df_in.index) - set(dropped_na_df.index))
        dropped_na_df['weight'] = find_weight(dropped_na_df)
        dropped_na_df['weighted_var'] = dropped_na_df['weight']*dropped_na_df[var_name]
        dropped_na_df['val'] = dropped_na_df.groupby(['peiidref',
                    'hzdept_bin'])['weighted_var'].transform('sum')
        dropped_na_df.drop_duplicates(subset=['peiidref','hzdept_bin','val'],inplace=True)


        na_df = df_in.loc[dropped_index,['peiidref','hzdept_bin','lower_band']]
        na_df.drop_duplicates(subset=['peiidref','hzdept_bin'],inplace=True)

        merged = pd.merge(dropped_na_df[['peiidref','hzdept_bin','lower_band','val']],
                 na_df[['peiidref','hzdept_bin','lower_band']],
                 on=['peiidref','hzdept_bin','lower_band'],how='outer')
        merged.sort_values(by=['peiidref','lower_band'],inplace=True)

    if kind == 'log':
        
        dropped_na_df = df_in.dropna(subset=[var_name])
        dropped_index = list(set(df_in.index) - set(dropped_na_df.index))
        dropped_na_df['weight'] = find_weight(dropped_na_df)
        dropped_na_df['weighted_var'] = dropped_na_df['weight']*np.exp(dropped_na_df[var_name])
        dropped_na_df['val'] = dropped_na_df.groupby(['peiidref',
                    'hzdept_bin'])['weighted_var'].transform('sum')
        dropped_na_df['val'] = np.log(dropped_na_df['val'])
        dropped_na_df.drop_duplicates(subset=['peiidref','hzdept_bin','val'],inplace=True)


        na_df = df_in.loc[dropped_index,['peiidref','hzdept_bin','lower_band']]
        na_df.drop_duplicates(subset=['peiidref','hzdept_bin'],inplace=True)

        merged = pd.merge(dropped_na_df[['peiidref','hzdept_bin','lower_band','val']],
                 na_df[['peiidref','hzdept_bin','lower_band']],
                 on=['peiidref','hzdept_bin','lower_band'],how='outer')
        merged.sort_values(by=['peiidref','lower_band'],inplace=True)
        
    if kind == 'cat':
        
        dropped_na_df = df_in.dropna(subset=[var_name])
        dropped_index = list(set(df_in.index) - set(dropped_na_df.index))
        dropped_na_df['weight'] = find_weight(dropped_na_df)
        dropped_na_df['max_weight']=dropped_na_df.groupby(['peiidref','hzdept_bin'])['weight'].transform('max')
        
        dropped_na_df_new = dropped_na_df.loc[(dropped_na_df['weight'] == dropped_na_df['max_weight'])]
        dropped_na_df_new['val'] = dropped_na_df_new[var_name]
        dropped_na_df_new.drop_duplicates(subset=['peiidref','hzdept_bin'],inplace=True)
        
        na_df = df_in.loc[dropped_index,['peiidref','hzdept_bin','lower_band']]
        na_df.drop_duplicates(subset=['peiidref','hzdept_bin'],inplace=True)

        merged = pd.merge(dropped_na_df_new[['peiidref','hzdept_bin','lower_band','val']],
                 na_df[['peiidref','hzdept_bin','lower_band']],
                 on=['peiidref','hzdept_bin','lower_band'],how='outer')
        merged.sort_values(by=['peiidref','lower_band'],inplace=True) 
    #return(merged[['peiidref','hzdept_bin','lower_band','val']])     
    return(list(merged['val']))

In [117]:
def handle_duplicated_func(duplicated_phorizon_df,approach) : 
    
    """
    To handle duplicates either drop all and keep the sahllowest depth
    or compute their weighted average
    
    """
    
    duplicated_phorizon_df_in = duplicated_phorizon_df.copy()
   
    if approach == 'drop':
        #### it is sorted by hzdept so drop duplicates will keep the shallowest
        duplicated_phorizon_df_in.sort_values(by=['peiidref','lower_band'],inplace=True)
        phorizon_df_in = duplicated_phorizon_df_in.drop_duplicates(subset=['peiidref','hzdept_bin']).copy()
         
        return(phorizon_df_in)
    if approach == 'weighted_avg':
        
        num_var_list = ['hzdept', 'hzdepb', 'hzthk_r', 'desgnvert',
                   'stratextsflag', 'claytotest', 'fragvoltot', 'horcolorvflag','rmonosulfidep']
        log_var_list = ['phfield']
        cat_var_list = ['obsmethod','hzname','desgnmaster','texture',
                   'rupresblkmst', 'effclass', 'bounddistinct','boundtopo']
        
        temp_df = duplicated_phorizon_df_in[['peiidref','hzdept_bin','lower_band']].drop_duplicates()
        phorizon_df_in = temp_df.copy()
        phorizon_df_in.sort_values(by=['peiidref','lower_band'],inplace=True) 
        
        phorizon_df_in.reset_index(drop=True,inplace=True)
        
        print('numerical variables ------')
        for var_name in num_var_list:
            print('start calculating weighted average for {} '.format(var_name))
            li = find_weighted_average(duplicated_phorizon_df_in,var_name,'num')
            phorizon_df_in[var_name] = pd.Series(li)
        print('logarithmic variables ------') 
        for var_name in log_var_list:
            
            print('start calculating weighted average for {} '.format(var_name))
            li = find_weighted_average(duplicated_phorizon_df_in,var_name,'log')

            phorizon_df_in[var_name] = pd.Series(li)
        print('categorical variables ------')
#         phorizon_df_in.reset_index(drop=True,inplace=True)
        for var_name in cat_var_list:
            #var_name = 'boundtopo'
            print('start calculating weighted average for {} '.format(var_name))
            li = find_weighted_average(duplicated_phorizon_df_in,var_name,'cat')
#             print('li',li[0:5])
#             print('phorizon_df_in',phorizon_df_in.head())
#             print('len(li)',len(li))
#             print('shape phorizon_df_in',phorizon_df_in.shape)
            phorizon_df_in[var_name] = pd.Series(li)   
        return (phorizon_df_in)



In [118]:
# Handle Duplicated Bins
# drop : drop duplicated bin
phorizon_df_binned_unduplicated = handle_duplicated_func(phorizon_df_binned_complete,'weighted_avg')


numerical variables ------
start calculating weighted average for hzdept 
start calculating weighted average for hzdepb 
start calculating weighted average for hzthk_r 
start calculating weighted average for desgnvert 
start calculating weighted average for stratextsflag 
start calculating weighted average for claytotest 
start calculating weighted average for fragvoltot 
start calculating weighted average for horcolorvflag 
start calculating weighted average for rmonosulfidep 
logarithmic variables ------
start calculating weighted average for phfield 
categorical variables ------
start calculating weighted average for obsmethod 
start calculating weighted average for hzname 
start calculating weighted average for desgnmaster 
start calculating weighted average for texture 
start calculating weighted average for rupresblkmst 
start calculating weighted average for effclass 
start calculating weighted average for bounddistinct 
start calculating weighted average for boundtopo 


In [119]:
max_bin_df = phorizon_df_binned_complete[['peiidref','hzdept_bin',
                                          'max_bin']].drop_duplicates()

In [120]:
phorizon_df_binned_b4_pevot = pd.merge(phorizon_df_binned_unduplicated,
                                             max_bin_df,
                                on=['peiidref','hzdept_bin'],how='inner')

In [121]:
### fill bins with previous values
def fill_bins_func(df):
    df_in = df.copy()
    # break into variable "family" of dataframes
    labels_list = set(df_in)-set(['peiidref','hzdept_bin','max_bin','lower_band'])
    
    for label in labels_list:
        print(label)

        #### check when switching to new pedon and that first row of pedon is Null(maybe groupby)
        df_in[label] = df_in.groupby('peiidref')[label].fillna(method='ffill')
        df_in.loc[df_in['lower_band']>df_in['max_bin'],label]=np.nan
    
    return(df_in)

In [122]:
phorizon_df_binned_filled = fill_bins_func(phorizon_df_binned_b4_pevot)

hzdepb
desgnvert
hzdept
obsmethod
texture
effclass
claytotest
horcolorvflag
bounddistinct
desgnmaster


KeyboardInterrupt: 

In [None]:
phorizon_df_binned_b4_pevot_final = phorizon_df_binned_filled.drop(
    columns=['max_bin','hzdept_bin'])

In [None]:
### pivot the table so we can keep each varible for each of the depthes(bins) as a variable
def pivot_table_func(df_in):
    phorizon_df_pivoted = df_in.pivot(index='peiidref', columns='lower_band')
    # fix headers
    phorizon_df_pivoted.columns = ["_".join((str(i), str(j))) for i,j in phorizon_df_pivoted.columns]
    phorizon_df_pivoted.reset_index(inplace=True)
    
    return(phorizon_df_pivoted)

In [None]:
phorizon_pivoted_df = pivot_table_func(phorizon_df_binned_b4_pevot_final)
phorizon_pivoted_df.shape

In [None]:
phorizon_pivoted_df.head()

## Join site to phorizon pedon level

In [None]:
site_var_list = ['siteobsiid','peiidref','siteiid']
site_pedon_df = ssp_final
site_pedon_df['peiidref'] = site_pedon_df['peiid'].copy()
site_pedon_df[site_var_list].head()

In [None]:
### the pedon level data phorizon need to be joined to site level data
site_phorizon_pivoted_df = pd.merge(phorizon_pivoted_df,site_pedon_df[site_var_list],
         on='peiidref',how='left')

In [None]:
site_phorizon_pivoted_df.peiidref.nunique(),site_phorizon_pivoted_df.siteobsiid.nunique()

In [None]:
x = site_phorizon_pivoted_df.groupby('siteobsiid')['peiidref'].nunique().reset_index()
x.sort_values(by='peiidref',ascending=False)

In [None]:
site_with_multiple_pedon=x.loc[x.peiidref>1,
    'siteobsiid'].nunique()/site_phorizon_pivoted_df.siteobsiid.nunique()
print('{} percent of sites have more one pedon and need to be rolled up'.format(
                        site_with_multiple_pedon*100))

### One hot encode the categorical variables

In [None]:
#Method to roll up table specific features, by grouping categories that account for less than 1% of the data
#Inputs: dataframe and cat_Columnumn

def categorical_GetDummies(df, cat_Column):
    
    print("One-Hot Encoded Product-specific feature:", cat_Column)
    
    #Get the normalized value counts of each category
    valueCounts = df[cat_Column].value_counts(normalize=True).reset_index()
    
    #Figure out the categories to keep and the ones to group using the 99% rule
    valuesToKeep = valueCounts[valueCounts[cat_Column] >= 0.01]
    valuesToGroup = valueCounts[valueCounts[cat_Column] < 0.01]
    
    #Reset the values to group to 'OTHER'
    df.loc[df[cat_Column].isin(valuesToGroup['index']), cat_Column] = 'OTHER'
    
    #One-hot encode the variables
    df = pd.get_dummies(df, columns = [cat_Column], dummy_na=True)
    
    return df

In [None]:
cat_var_list = ['obsmethod','hzname','desgnmaster','texture',
       'rupresblkmst', 'effclass', 'bounddistinct','boundtopo']
CAT_BINNED_LIST = site_phorizon_pivoted_df.filter(regex='|'.join(cat_var_list)).columns.unique().tolist()

df_transformed = site_phorizon_pivoted_df.copy()
for cat_column in CAT_BINNED_LIST:
    df_transformed = categorical_GetDummies(df_transformed, cat_column)

In [None]:
df_transformed.head()

### Roll up to site level

In [None]:
def create_agg_dic(NUM_LST,CAT_LST):
    AGG_DICT = {}
    
    for var in NUM_LST:
        AGG_DICT[var] = 'median'
    for var in CAT_LST:
        AGG_DICT[var] = 'max'

    return(AGG_DICT)

In [None]:
def aggregate_to_site(df,num_var_list,cat_var_list):
    df_in = df.copy()
    AGG_DICT = create_agg_dic(num_var_list,cat_var_list)
    df_agg = df_in.groupby('siteiid').agg(AGG_DICT).reset_index()
    
    return df_agg   

In [None]:
num_var_list = ['hzdept', 'hzdepb', 'hzthk_r', 'desgnvert','phfield',
               'stratextsflag', 'claytotest', 'fragvoltot', 'horcolorvflag','rmonosulfidep']
cat_var_list = ['obsmethod','hzname','desgnmaster','texture',
       'rupresblkmst', 'effclass', 'bounddistinct','boundtopo']
all_num_var = df_transformed.filter(regex='|'.join(num_var_list)).columns.unique().tolist()
all_cat_var = df_transformed.filter(regex='|'.join(cat_var_list)).columns.unique().tolist()
all_cat_minus_common = list(set(all_cat_var) - set(all_num_var).intersection(set(all_cat_var)))

site_phorizon_pivoted_df_agg = aggregate_to_site(df_transformed,all_num_var,all_cat_minus_common)

In [None]:
(site_phorizon_pivoted_df_agg.shape,
site_phorizon_pivoted_df_agg.siteiid.nunique())

## Checkpoint - download data

In [None]:
site_phorizon_pivoted_df_agg.to_csv('Saved Datasets/site_phorizon_pivoted_df_agg.csv', index=False)

### End of part 3 - save site, phorizon (pivoted) agg

## Target Variable

### FIRST APPROACH

#### point_to_mukey_df

In [None]:
point_to_mukey_df_original = pd.read_csv('Input Files/point_to_mukey-coecococlass.csv')
point_to_mukey_df_original.shape

In [None]:
point_to_mukey_df_original.head()

In [None]:
lat_long_df = point_to_mukey_df_original[['siteiid', 'latstddeci',
 'longstddec']].drop_duplicates()

In [None]:
def ecosite_cond_dunc(df_in):
    ### we filter out ecoclassid that don't follow the format 
    ###    1. size less than 11
    ###    2. not starting with letters
    ###    3. doesn't contain any digits
    ###    4. it is alphanumeric string
    
    df = df_in.copy()
    df['number_of_char'] = df['ecoclassid'].str.len()
    df['starts_with_char'] = df['ecoclassid'].str[0].str.isalpha()
    df['contain_digits'] = df['ecoclassid'].apply(lambda x: any(map(str.isdigit, str(x))))
    df['is_alphanumeric'] =  df['ecoclassid'].apply(lambda x: bool(re.match('^[a-zA-Z0-9]+$', str(x))))

    conditions = ((df['number_of_char']>=11)&
                (df['starts_with_char']==True)&
                (df['contain_digits']==True)&
                (df['is_alphanumeric']==True))
    return df[conditions]

In [None]:
print(point_to_mukey_df_original.shape)
point_to_mukey_df=ecosite_cond_dunc(point_to_mukey_df_original)
print(point_to_mukey_df.shape)

In [None]:
print('point_to_mukey_df dataframe:\n number of records = {}\n number of unique ecosites = {},\n number of unique siteiid = {},\n number of unique siteobsiid = {}\n number of unique pedons ={}'.\
      format(point_to_mukey_df.shape[0],
             point_to_mukey_df['ecoclassid'].nunique(),
             point_to_mukey_df['siteiid'].nunique(),
             point_to_mukey_df['siteobsiid'].nunique(),
             point_to_mukey_df['peiid'].nunique()))     

In [None]:
#### direct connection of site pedon and ecosite 
print('number of pedons from phorizon is ',len(pedons_from_phorizon_data))
print('matched pedons in point_to_mukey and phorizon',
      point_to_mukey_df.loc[point_to_mukey_df['peiid'].isin(pedons_from_phorizon_data),'peiid'].nunique())
print('----------------------------------------------------')
print('number of siteobs from siteobs is ',len(siteobs_from_siteobs_data))
print('matched sites in point_to_mukey and siteobs',
      point_to_mukey_df.loc[point_to_mukey_df['siteobsiid'].isin(siteobs_from_siteobs_data),'siteobsiid'].nunique())


In [None]:
list(point_to_mukey_df.columns)

In [None]:
point_to_mukey_df.siteiid.nunique()

In [None]:
point_to_mukey_df.head()

#### Join feature and ecosites


In [None]:
site_to_siteob_df = site_pedon_df[['siteiid','siteobsiid']].drop_duplicates().copy()
site_to_siteob_df.shape
site_to_siteob_df.dropna(inplace=True)
site_to_siteob_df.siteiid = site_to_siteob_df.siteiid.astype(int)
site_to_siteob_df.siteobsiid = site_to_siteob_df.siteobsiid.astype(int)

In [None]:
site_to_siteob_df.shape

In [None]:
site_to_siteob_df.head()

In [None]:
site_pedon_df_ids = site_map_ids

point_to_mukey_df_ids = point_to_mukey_df[['ecoclassid','ecoclassname','ecoclasstypename','peiid','siteobsiid']]

In [None]:
site_pedon_df_ids.head()

In [None]:
site_pedon_df_ids.dtypes

In [None]:
point_to_mukey_df_ids.head()

In [None]:
point_to_mukey_df_ids['peiid'] = point_to_mukey_df_ids['peiid'].astype(int)
point_to_mukey_df_ids['siteobsiid'] = point_to_mukey_df_ids['siteobsiid'].astype(int)
site_pedon_df_ids['siteobsiid'] = site_pedon_df_ids['siteobsiid'].astype(int)
site_pedon_df_ids['siteiid'] = site_pedon_df_ids['siteiid'].astype(int)
site_pedon_df_ids['peiid'] = site_pedon_df_ids['peiid'].astype(int)
site_pedon_df_ids['siteiidref'] = site_pedon_df_ids['siteiidref'].astype(int)

In [None]:
feature_target_df = pd.merge(site_pedon_df_ids,
                             point_to_mukey_df_ids,
                             on=['peiid','siteobsiid'],
                             how='left')

In [None]:
feature_target_df.shape

In [None]:
feature_target_df.isna().sum()

In [None]:
print("There are {} sites out of {} sites that don't have matched ecosites".\
      format(feature_target_df.loc[feature_target_df.ecoclassid.isna(),'siteobsiid'].nunique(),
           feature_target_df.siteobsiid.nunique()))


#### MLRA

In [None]:
conda install xlrd

In [None]:
mlra_data = pd.read_excel('Input Files/mlrav42_whole.xls')
mlra_data.shape

In [None]:
mlra_data.head()

In [None]:
site_mlra_direct_original_df = pd.read_csv('Input Files/Site_to_MLRA_20210226.csv')
                                  #index_col=False,usecols=[1,2])
site_mlra_direct_df = site_mlra_direct_original_df[['siteobsiid','MLRA_ID']
                                                  ].drop_duplicates()

In [None]:
site_mlra_direct_df.drop_duplicates(subset=['siteobsiid','MLRA_ID'],inplace=True)
site_mlra_direct_df.rename(columns={'MLRA_ID':'mlra_id'},inplace=True)
site_mlra_direct_df = pd.merge(site_mlra_direct_df,mlra_data,
         on = 'mlra_id', how = 'left')
site_mlra_direct_df.shape

In [None]:
site_mlra_direct_df.head()

In [None]:
site_mlra_direct_df = pd.merge(site_mlra_direct_df,site_to_siteob_df,on='siteobsiid',how='right')
         
site_mlra_direct_df.shape

In [None]:
site_mlra_direct_df.isna().sum()

In [None]:
mlra_subset = ['siteobsiid', 'mlra_id', 'mlra_reg_symbol',
               'mlra_name', 'lrr_symbol', 'lrr_name']

#### join feature_eco with MLRA

In [None]:
approach1_df = pd.merge(feature_target_df,site_mlra_direct_df[mlra_subset],
         on='siteobsiid',how='left')
feature_target_df.shape,site_mlra_direct_df.shape,approach1_df.shape

In [None]:
approach1_df.head()

### SECOND APPROACH

In [None]:
sitehistory_df = pd.read_csv('Input Files/siteecositehistory.csv')
sitehistory_df.shape

In [None]:
ecologicalsite_df = pd.read_csv('Input Files/ecologicalsite.csv',
                                index_col=0)
ecologicalsite_df.shape

In [None]:
ecologicalsite_df.head()

In [None]:
### sitehistory_df and site_mla_df

### ecositeiidref in history
### Rec.ID in

site_mla_history_df = pd.merge(sitehistory_df,ecologicalsite_df,
         left_on='ecositeiidref',right_on='Rec.ID',how='inner')

sitehistory_df.shape,ecologicalsite_df.shape,site_mla_history_df.shape

In [None]:
#### we handle the following data isssue:
#### Data Issue: in 367 out of 35K cases there are multiple ecosites for one site id
### it is handled by choosing the most recent
ecohistoryid_list = sitehistory_df.groupby('siteiidref').agg({
    'siteecositehistoryiid':max})['siteecositehistoryiid'].unique()
print(ecohistoryid_list[0:10])
site_mla_history_df_unduplicated=site_mla_history_df.loc[site_mla_history_df.siteecositehistoryiid.isin(
                                                            ecohistoryid_list)]
site_mla_history_df_unduplicated.shape

In [None]:
site_mla_history_df_joined = pd.merge(site_mla_history_df_unduplicated,
                                      site_mlra_direct_df,how='inner', 
                                      left_on= 'siteiidref',right_on='siteiid')

In [None]:
site_mla_history_df_joined.shape

In [None]:
subset_col_list = ['siteiidref',
                   'ecositeiidref',
                   'Ecological.Site.ID',
                   'Ecological.Site.Name',
                   'Ecological.Site.Origin',
                   'Ecological.Site.Type',
                   'Ecological.Site.LRU',
                   'Ecological.Site.Number',
                   'Ecological.Site.State',
                   'Ecological.Site.Primary.Name',
                   'mlra_reg_symbol',
                   'mlra_id',
                   'mlra_name',
                   'lrr_symbol',
                   'lrr_name']

site_mla_history_df_final = site_mla_history_df_joined[subset_col_list]

site_mla_history_df_final[['ecositeiidref','siteiidref','Ecological.Site.ID']].head()

In [None]:
site_mla_history_df_final.head()

In [None]:
feature_target_df2 = pd.merge(site_pedon_df_ids,
        site_mla_history_df_final[['siteiidref','Ecological.Site.ID','Ecological.Site.Name','Ecological.Site.Type']],
        on = 'siteiidref',how ='left' )

In [None]:
feature_target_df2.rename(columns={'Ecological.Site.ID':'ecoclassid',
                                   'Ecological.Site.Name':'ecoclassname',
                                   'Ecological.Site.Type':'ecoclasstypename'},inplace=True)
feature_target_df2.drop_duplicates(inplace=True)

In [None]:
site_pedon_df.shape,feature_target_df.shape,feature_target_df2.shape

### Consolidated feature_target_df

In [None]:
match_second_approach_peiid_list = feature_target_df2.loc[feature_target_df2.ecoclassid.notna(),
                                    'peiid'].unique().tolist()

In [None]:
# fill the null values in feature_target_df with unnull values from feature_target_df2
condition = ((feature_target_df['peiid'].isin(match_second_approach_peiid_list))&
                      (feature_target_df['ecoclassid'].isna()))


feature_target_df.loc[condition,'ecoclassname']=feature_target_df2.loc[
    feature_target_df2['peiid'].isin(feature_target_df.loc[condition,'peiid'].unique().tolist()),
    'ecoclassname'].tolist()
feature_target_df.loc[condition,'ecoclasstypename']=feature_target_df2.loc[
    feature_target_df2['peiid'].isin(feature_target_df.loc[condition,'peiid'].unique().tolist()),
    'ecoclasstypename'].tolist()
feature_target_df.loc[condition,'ecoclassid']=feature_target_df2.loc[
    feature_target_df2['peiid'].isin(feature_target_df.loc[condition,'peiid'].unique().tolist()),
    'ecoclassid'].tolist()

In [None]:
feature_target_df.isna().sum()

In [None]:
feature_target_mlra = pd.merge(feature_target_df,site_mlra_direct_df[mlra_subset],
            on ='siteobsiid',how='left')

In [None]:
feature_target_mlra.head()

### Apply Filter

In [None]:
feature_target_mlra.loc[(feature_target_mlra.ecoclasstypename!='Forage Suitability Groups')]
feature_target_mlra.shape,feature_target_mlra.siteobsiid.nunique()

In [None]:
print("There are {} sites out of {} sites that don't have matched mlra".\
      format(feature_target_mlra.loc[((feature_target_mlra['mlra_id'].isna())&
                                     (feature_target_df.ecoclassid.notna())),'siteobsiid'].nunique(),
           feature_target_mlra.siteobsiid.nunique()))


In [None]:
### check if siteobs and target are one to one so one site doesn't have 
### multiple ecosite (since it is prdon level)

In [None]:
def check_duplicate_ecosites(duplicate_ecosite_df_in):
    #df = point_to_mukey_df.groupby(['ecoclassid','siteobsiid']).agg({'ecoclassid':'nunique'})
    df = duplicate_ecosite_df_in.groupby(['siteobsiid']).agg({'ecoclassid':'nunique'})
    df.columns= ['nunique_'+x for x in df.columns.ravel()]
    df.reset_index(inplace=True)
    print('There are {} sites out of {} with more than one ecosites'.format(df.loc[df['nunique_ecoclassid']>1].shape[0],
                                                            df.shape[0]))
    return(df.loc[df['nunique_ecoclassid']>1],
            df.loc[df['nunique_ecoclassid']>1,'siteobsiid'].unique().tolist())

In [None]:
duplicate_eco_df,sites_with_dup_ecosites = check_duplicate_ecosites(feature_target_mlra)

In [None]:
#pulling this 25K sites data
duplicate_ecosites_df = feature_target_mlra.loc[feature_target_mlra.siteobsiid.isin(sites_with_dup_ecosites),
    ['ecoclassid','ecoclassname','ecoclasstypename','siteobsiid',
    'mlra_reg_symbol','mlra_id','mlra_name','lrr_symbol','lrr_name']].sort_values(by=['siteobsiid','ecoclassid'])
duplicate_ecosites_df.drop_duplicates(inplace=True)
duplicate_ecosites_df.shape,duplicate_ecosites_df.siteobsiid.nunique()


In [None]:
duplicate_ecosites_df_1st_filter = duplicate_ecosites_df.loc[(duplicate_ecosites_df.ecoclasstypename!='Forage Suitability Groups')]
duplicate_ecosites_df_1st_filter.shape,duplicate_ecosites_df_1st_filter.siteobsiid.nunique()

In [None]:
duplicate_eco_df,_ = check_duplicate_ecosites(duplicate_ecosites_df_1st_filter)
### these will be dropped

In [None]:
## second filter 
duplicate_ecosites_df_1st_filter.dropna(subset= ['ecoclassid'],inplace=True)
duplicate_ecosites_df_2nd_filter= duplicate_ecosites_df_1st_filter[~duplicate_ecosites_df_1st_filter.siteobsiid.isin(
    duplicate_eco_df.siteobsiid.unique().tolist())]

In [None]:
duplicate_ecosites_df_2nd_filter.shape

In [None]:
duplicate_eco_df,_ = check_duplicate_ecosites(duplicate_ecosites_df_2nd_filter)

In [None]:
feature_target_mlra.loc[feature_target_mlra.siteobsiid.isin(sites_with_dup_ecosites)].shape

In [None]:
siteobs_list = feature_target_mlra.siteobsiid.unique().tolist()
unique_ecosite_siteobs_list = list(set(siteobs_list) - set(sites_with_dup_ecosites))
len(siteobs_list),len(unique_ecosite_siteobs_list),len(sites_with_dup_ecosites)

In [None]:
var_list = ['siteobsiid','ecoclassid','ecoclassname','ecoclasstypename'
            ,'mlra_reg_symbol','mlra_id','mlra_name',
            'lrr_symbol','lrr_name'
           ]
unique_ecosite_df = feature_target_mlra.loc[feature_target_mlra.siteobsiid.isin(unique_ecosite_siteobs_list),
                        var_list].drop_duplicates()
unique_ecosite_df.dropna(subset=[x for x in var_list if x!='siteobsiid'],inplace=True)
deduplicated_ecosite_df = duplicate_ecosites_df_2nd_filter[var_list]
final_target_df  = pd.concat([deduplicated_ecosite_df,unique_ecosite_df])
unique_ecosite_df.shape,deduplicated_ecosite_df.shape,final_target_df.shape

In [None]:
unique_ecosite_df.head()

In [None]:
final_target_df['site_count'] = final_target_df.groupby('siteobsiid')['siteobsiid'].transform('count')

In [None]:
final_target_df.loc[final_target_df['site_count']>1]

In [None]:
## third filter
## choosing the first instance

final_target_df = final_target_df.groupby('siteobsiid').nth(0).reset_index()
final_target_df.shape, final_target_df.siteobsiid.nunique()

In [None]:

final_target_df.shape,final_target_df.siteobsiid.nunique()

In [None]:
final_target_df.head()

In [None]:
final_target_df.isna().sum()

In [None]:
site_mlra_direct_df.head()

In [None]:
target_dataframe = pd.merge(site_mlra_direct_df[['siteobsiid','mlra_id',
                                                 'mlra_reg_symbol','mlra_name','lrr_symbol','lrr_name']],
         final_target_df,
         on =['siteobsiid','mlra_id','mlra_reg_symbol','mlra_name','lrr_symbol','lrr_name'],
         how = 'left')

In [None]:
target_dataframe.isna().mean()

In [None]:
target_dataframe.shape

In [None]:
target_dataframe.info()

In [None]:
target_dataframe.dtypes

In [None]:
site_map_ids[['siteobsiid','siteiid']].dtypes

In [None]:
site_map_ids['siteobsiid'] = site_map_ids['siteobsiid'].astype(int)

target_dataframe_site = pd.merge(target_dataframe,
        site_map_ids[['siteobsiid','siteiid']].drop_duplicates(),
        how='inner',on='siteobsiid')

In [None]:
target_dataframe_site.shape

In [None]:
target_dataframe_site.head()

In [None]:
target_dataframe_site.drop(columns=['siteobsiid','site_count'],inplace=True)

In [None]:
#### after applying the first filter it means dropping the non values of mlra and just keeping the ecosites 
#### with matched MLRA we have only 5% of sites with duplicated values : 8768/166025
#### how about we just pick one ?! or drop since the number looks insignificant

In [None]:
target_dataframe_site.info()

In [None]:
target_dataframe_site.ecoclassname.nunique()

In [None]:
target_dataframe_site.head()

In [None]:
target_dataframe_site['ecoclassname'].value_counts().to_frame()

## Checkpoint - download data

In [None]:
target_dataframe_site['siteiid'] = target_dataframe_site['siteiid'].astype(int)
target_dataframe_site.to_csv('Saved Datasets/target_dataframe_site.csv', index=False)

## End of Part 4 - save target dataframe

## Join all data together (Feature and Target)

In [None]:
## Option 1: continue running code from above
phorizon_data = site_phorizon_pivoted_df_agg
sitepm_geomorph_ncss_climate_satellite_data = ssp_sitepm_ncss_geomorph_agg_prism_satellite
phorizon_data.shape , sitepm_geomorph_ncss_climate_satellite_data.shape

### Part 5

In [3]:
#Option 2: skip running the above code and read in saved datasets from the above checkpoints
target_dataframe_site = pd.read_csv('Saved Datasets/target_dataframe_site.csv')
phorizon_data = pd.read_csv('Saved Datasets/site_phorizon_pivoted_df_agg.csv')
sitepm_geomorph_ncss_climate_satellite_data = pd.read_csv('Saved Datasets/ssp_sitepm_ncss_geomorph_agg_prism_satellite.csv')
print(phorizon_data.shape , sitepm_geomorph_ncss_climate_satellite_data.shape)

#create a mapping for pedon (peiid, peiidref,siteiid,siteiidref,siteobsiid)
ssp_final = pd.read_csv('Saved Datasets/ssp_final.csv')
ssp_final['peiidref'] = ssp_final['peiid'].copy()
site_var_list = ['siteobsiid','peiidref','peiid','siteiid','siteiidref']
site_map_ids = ssp_final[site_var_list]

(468105, 1171) (546262, 1670)


Option 2: skip running the above code and read in saved datasets from the above checkpoints
target_dataframe_site = pd.read_csv('Saved Datasets/target_dataframe_site.csv')
phorizon_data = pd.read_csv('Saved Datasets/site_phorizon_pivoted_df_agg.csv')
sitepm_geomorph_ncss_climate_satellite_data = pd.read_csv('Saved Datasets/ssp_sitepm_ncss_geomorph_agg_prism_satellite.csv')
print(phorizon_data.shape , sitepm_geomorph_ncss_climate_satellite_data.shape)

#create a mapping for pedon (peiid, peiidref,siteiid,siteiidref,siteobsiid)
ssp_final = pd.read_csv('Saved Datasets/ssp_final.csv')
site_var_list = ['siteobsiid','peiidref','peiid','siteiid','siteiidref']
site_map_ids = ssp_final[site_var_list]

In [4]:
#join feature data together
lab_soil_data = sitepm_geomorph_ncss_climate_satellite_data.drop_duplicates(subset=['siteiid'])

feature_data = pd.merge(lab_soil_data,phorizon_data,
         how='left', on='siteiid')
feature_data.shape

(546262, 2840)

In [5]:
## read target data (MLRA , ecoclass information)
target_dataframe_site.shape

(547494, 9)

In [6]:
# change feature_data siteiid to int in order to merge
feature_data['siteiid'] = feature_data['siteiid'].astype(int)

### join features and target and create the final model data
modeling_data = pd.merge(feature_data,target_dataframe_site,
         how='left', on='siteiid')
modeling_data.shape

(547494, 2848)

In [None]:
### modeling data description
modeling_describe_df = modeling_data.describe(include='all')
des = modeling_describe_df.T
des['missing_percent'] = modeling_data.isna().mean()

### Drop Variable

In [None]:
var_to_drop_df = pd.read_csv('Input Files/var_to_drop.csv')

In [None]:
list_to_drop = var_to_drop_df['var_name'].tolist()
list_to_drop

In [None]:
modeling_data.columns

In [None]:
print(modeling_data.shape)
modeling_data = modeling_data.drop(columns=list_to_drop)
print(modeling_data.shape)

In [None]:
#save the merged dataframe as it is memory intensive and read from saved csv
modeling_data.to_csv('Saved Datasets/modeling_data_after_merge_feature_target.csv', index = False)

### count instead of dummy

In [None]:
variable_with_pmorigin = modeling_data.columns[modeling_data.columns.str.contains('pmorigin')]
variable_with_pmmodifier = modeling_data.columns[modeling_data.columns.str.contains('pmmodifier')]
variable_with_pmkind = modeling_data.columns[modeling_data.columns.str.contains('pmkind')]

In [None]:
class count_keywords_cls():

    def __init__(self,dummy_var,var_name):
        self.dummy_vars = dummy_var
        self.var_name = var_name
        
    def subset_dummy_data(self,modeling_data):
        """
        input : modeling data
        output: subset of modeling with column names renamed
        
        This function subset the model data for variables we sent.
        Since these are the one-hot encoded variables their naming convention is varname_category e.g. pmkind_ash
        we only keep the category name in column
        """
        self.dummy_vars = list(set(self.dummy_vars)-{self.var_name+'_nan', self.var_name+'_OTHER'})
        print('model df = ',modeling_data.shape)
        dummy_df = modeling_data[self.dummy_vars + ['siteiid']].copy()
        
#       Note: due to nature of our data set it is possible for each siteid (row) to have multiple categories
#       so we add a suffix of ' '  for when we do a .dot product, it could be considered as a delimiter so
#       we can distingush the cateories

        dummy_df = dummy_df.add_suffix(' ')
        dummy_df.columns = dummy_df.columns.str.replace(self.var_name+'_','')
        print('dummy df shape after subset = ', dummy_df.shape)
        self.dummy_df = dummy_df
        
    def reconstruct_var(self):
        """
        takes the subseted function and reconstruct the categries from one-hot encoding by dot product of 
        columns with value 1 and column name.

        
        """

        cols_without_siteid = list(set(self.dummy_df.columns)-{'siteiid '})
        ### To perevent memory compromise : subset to prevent kernel restart
        self.dummy_df.loc[0:100000,self.var_name] = self.dummy_df.loc[0:100000,cols_without_siteid].dot(self.dummy_df[cols_without_siteid].columns)
        print('dummy df shape after 1 dot = ', self.dummy_df.shape)
        self.dummy_df.loc[100000:200000,self.var_name] = self.dummy_df.loc[100000:200000,cols_without_siteid].dot(self.dummy_df[cols_without_siteid].columns)
        print('dummy df shape after 2 dot = ', self.dummy_df.shape)
        self.dummy_df.loc[200000:300000,self.var_name] = self.dummy_df.loc[200000:300000,cols_without_siteid].dot(self.dummy_df[cols_without_siteid].columns)
        print('dummy df shape after 3 dot = ', self.dummy_df.shape)
        self.dummy_df.loc[300000:400000,self.var_name] = self.dummy_df.loc[300000:400000,cols_without_siteid].dot(self.dummy_df[cols_without_siteid].columns)
        print('dummy df shape after 4 dot = ', self.dummy_df.shape)
        self.dummy_df.loc[400000:,self.var_name] = self.dummy_df.loc[400000:,cols_without_siteid].dot(self.dummy_df[cols_without_siteid].columns)
        print('dummy df shape after 5 dot = ', self.dummy_df.shape)
        
    def cleaning_tokenizing(self):
        """
        This function lemmuniza and lower case the categories
        """
    
        self.dummy_df[self.var_name+'_lower'] = self.dummy_df[self.var_name].str.lower().astype(str)
        self.dummy_df[self.var_name+'_lemm_lower']= self.dummy_df[self.var_name+'_lower'].apply(lambda x:word_lemm_obj.lemmatize(x))
        dummy_df_final= self.dummy_df[['siteiid ',self.var_name+'_lemm_lower']]
        return(dummy_df_final)
    
    def count_keywords_func(self,dummy_df_final):
        """
        This function tokenize the 
        """
       
        
        ### data cleaning, replace - with ' '
        dummy_df_final[self.var_name+'_lemm_lower'] = dummy_df_final[self.var_name+'_lemm_lower'].apply(lambda x:x.replace('-',' '))
        print('getting list of keywords')
        KEYWORDS_LST = np.unique(nltk.word_tokenize(
            ' '.join(dummy_df_final[self.var_name+'_lemm_lower'].tolist())))
        
        filterd_keywords = [keyword for keyword in KEYWORDS_LST if 
                            ((keyword not in punctuation) & 
                             (not keyword.isdigit()) & 
                             (keyword not in(stop_words)))
                           ]
        print(' number of keywords is : ',len(filterd_keywords))
        print(' var name = ',self.var_name)
        for kw in filterd_keywords:
            print ('counting keywords for : ', kw)
            dummy_df_final[self.var_name+'_'+kw] = dummy_df_final[self.var_name+'_lemm_lower'].str.count(kw)
        dummy_df_final.drop(columns= [self.var_name+'_lemm_lower'],inplace=True)
        dummy_df_final.columns = dummy_df_final.columns.str.strip()
        
        return dummy_df_final
    
    def replace_in_model_df(self,count_var_df,modeling_data):
        modeling_data_in = modeling_data.copy()
        modeling_data_in.drop(columns= self.dummy_vars,inplace=True)
        new_model_data = pd.merge(modeling_data_in,count_var_df,on='siteiid',how='left')
        return (new_model_data)

In [None]:
count_keywords_pmmodifier = count_keywords_cls(variable_with_pmmodifier,'pmmodifier')
count_keywords_pmmodifier.subset_dummy_data(modeling_data)
count_keywords_pmmodifier .reconstruct_var()
tokenized_df_pmmodifier = count_keywords_pmmodifier.cleaning_tokenizing()
count_df_pmmodifier = count_keywords_pmmodifier.count_keywords_func(tokenized_df_pmmodifier)
print('dropping duplicated siteiid')
count_df_pmmodifier.drop_duplicates(subset=['siteiid'],inplace=True)
print('merge')
new_model_data_2 = count_keywords_pmmodifier.replace_in_model_df(count_df_pmmodifier,modeling_data)
new_model_data_2.shape

In [None]:
count_keywords_pmkind = count_keywords_cls(variable_with_pmkind,'pmkind')
count_keywords_pmkind.subset_dummy_data(new_model_data_2)
count_keywords_pmkind .reconstruct_var()
tokenized_df_pmkind = count_keywords_pmkind.cleaning_tokenizing()
count_df_pmkind = count_keywords_pmkind.count_keywords_func(tokenized_df_pmkind)
count_df_pmkind.drop_duplicates(subset=['siteiid'],inplace=True)
new_model_data_3 = count_keywords_pmkind.replace_in_model_df(count_df_pmkind,new_model_data_2)
new_model_data_3.shape

In [None]:
count_keywords_pmorigin = count_keywords_cls(variable_with_pmorigin,'pmorigin')
count_keywords_pmorigin.subset_dummy_data(new_model_data_3)
count_keywords_pmorigin.reconstruct_var()
tokenized_df_pmorigin = count_keywords_pmorigin.cleaning_tokenizing()
count_df_pmorigin = count_keywords_pmorigin.count_keywords_func(tokenized_df_pmorigin)
count_df_pmorigin.drop_duplicates(subset=['siteiid'],inplace=True)
new_model_data_4 = count_keywords_pmorigin.replace_in_model_df(count_df_pmorigin,new_model_data_3)
new_model_data_4.shape

In [None]:
modeling_data = new_model_data_4

### Discretization

In [None]:
def discretize_claytotest(var_name,modeling_data):
    
    """
    Discretize to create 4 new flags: clay<=10 (=1 if >=0 and <=10, else 0); 
    clay10to20 (=1 if >10 and <=20, else 0); clay20to30 (=1 if >20 and <=30, else 0); 
    clay>30 (=1 if >30, else 0); missing values =0 for all new flags 
    
    """
    modeling_data[var_name+'<10'] = modeling_data[var_name].apply(lambda x: 
                                                            1 if ((x <= 10)&(x >= 0)) else 0)
    modeling_data[var_name+'_10to20'] = modeling_data[var_name].apply(lambda x: 
                                                            1 if ((x <= 20)&(x > 10)) else 0)
    modeling_data[var_name+'_20to30'] = modeling_data[var_name].apply(lambda x: 
                                                            1 if ((x <= 30)&(x > 20)) else 0)
    modeling_data[var_name+'>30'] = modeling_data[var_name].apply(lambda x: 
                                                            1 if (x > 30) else 0)
    modeling_data.drop(columns=[var_name],inplace=True)

In [None]:
claytotes_vars = modeling_data.columns[modeling_data.columns.str.contains('claytotest_')]
for claytotest_var in claytotes_vars:
    print(claytotest_var)
    discretize_claytotest(claytotest_var,modeling_data)

In [None]:
def discritize_fragvoltot(var_name,modeling_data):
    
    """
    Discretize to create 2 new flags: frag<=10 (=1 if >=0 and <=10, else 0); 
    frag>10 (=1 if >10; else 0); 
    missing values =0 for all new flags
    
    """
    modeling_data[var_name+'<10'] = modeling_data[var_name].apply(lambda x: 
                                                            1 if ((x <= 10)&(x >= 0)) else 0)
    modeling_data[var_name+'>10'] = modeling_data[var_name].apply(lambda x: 
                                                            1 if (x > 10) else 0)
    modeling_data.drop(columns=[var_name],inplace=True)

In [None]:
fragvoltot_vars = modeling_data.columns[modeling_data.columns.str.contains('fragvoltot')]
for fragvoltot_var in fragvoltot_vars:
    print(fragvoltot_var)
    discritize_fragvoltot(fragvoltot_var,modeling_data)

In [None]:
def discritize_phfield(var_name,modeling_data):
    
    """
    Discretize using scale to right to create 11 new flags; 
    missing values are 0 for all flags
    
    """
    
    modeling_data[var_name+'_strong_acidic'] = modeling_data[var_name].apply(lambda x: 
                                         1 if (x <= 5.5) else 0)
    modeling_data[var_name+'_moderate_acidic'] = modeling_data[var_name].apply(lambda x: 
                                    1 if ((x <= 6)&(x >= 5.6)) else 0)
    modeling_data[var_name+'_slight_acidic'] = modeling_data[var_name].apply(lambda x: 
                                    1 if ((x <= 6.5)&(x >= 6.1)) else 0)
    modeling_data[var_name+'_neutral'] = modeling_data[var_name].apply(lambda x: 
                                    1 if ((x <= 7.3)&(x >= 6.6)) else 0)
    modeling_data[var_name+'_slight_alkaline'] = modeling_data[var_name].apply(lambda x: 
                                    1 if ((x <= 7.8)&(x >= 7.4)) else 0)
    modeling_data[var_name+'_moderate_alkaline'] = modeling_data[var_name].apply(lambda x: 
                                    1 if ((x <= 8.4)&(x >= 7.9)) else 0)
    modeling_data[var_name+'_strong_alkaline'] = modeling_data[var_name].apply(lambda x: 
                                    1 if (x >= 8.5) else 0)    
    modeling_data.drop(columns=[var_name],inplace=True)

In [None]:
phfild_vars = modeling_data.columns[modeling_data.columns.str.contains('phfield')]
for phfild_var in phfild_vars:
    print(phfild_var)
    discritize_phfield(phfild_var,modeling_data)

### Replace lat, long , elev with new data

In [None]:
lat_long_elev_data = pd.read_csv('Input Files/siteiid_lat_long_elev.csv')

In [None]:
geographic_loc_info = ['latstddeci','latstddecimaldegrees','longstddec','longstddecimaldegrees','elev']
modeling_data.drop(columns = geographic_loc_info,inplace=True)  

In [None]:
geo_df = lat_long_elev_data[['siteiid','latstddeci','longstddec','mn75_grd']].drop_duplicates()
geo_df.rename(columns={'mn75_grd':'elev'},inplace=True)
geo_df.shape, geo_df['siteiid'].nunique()

In [None]:
modeling_data['siteiid'] = modeling_data['siteiid'].astype(int) #make data type int to match geo_df siteiid then change back to string later

modeling_data_replace_geo = pd.merge(modeling_data,
         geo_df,
         on='siteiid',how='left')

In [None]:
modeling_data = modeling_data_replace_geo.copy()

### Fill in Null values

#### Fill with zero

In [None]:
var_to_fill_zero_df = pd.read_csv('Input Files/var_to_fill_zero_data.csv')
missing_var_list = var_to_fill_zero_df['var_name'].tolist()

In [None]:
modeling_data[missing_var_list] = modeling_data[
    missing_var_list].apply(lambda x: x.fillna(0))

#### Fill with median

In [None]:
var_to_fill_with_median = ['hzdept_0','hzdept_10','hzdept_20','hzdept_30','hzdept_40','hzdept_50','hzdept_60',
 'hzdept_70','hzdept_80','hzdept_90','hzdept_100','hzdept_110','hzdept_120',
'hzdepb_0','hzdepb_10','hzdepb_20','hzdepb_30','hzdepb_40','hzdepb_50','hzdepb_60',
'hzdepb_70','hzdepb_80','hzdepb_90','hzdepb_100','hzdepb_110','hzdepb_120']

In [None]:
modeling_data[var_to_fill_with_median] = modeling_data[
    var_to_fill_with_median].apply(lambda x: x.fillna(x.median()))

### Create Flag

In [None]:
#Feature engineer flag noncarbclay (=0 if Nan; 1 otherwise)

flag_var = ['noncarbclaywtavg','claytotwtavg',
            'cec7clayratiowtavg','pmorder','psctopdepth']


In [None]:
for var in flag_var:
    modeling_data.loc[modeling_data[var].notna(),var] = 1
    modeling_data.loc[modeling_data[var].isna(),var] = 0

### Group together

In [None]:
# Let's group these together into a new flag 
# statextsflag_0to30 (=1 if _0,10,20, or 30 are 1, else 0)

stratextsflag_vars = ['stratextsflag_0','stratextsflag_10',
                      'stratextsflag_20','stratextsflag_30']

modeling_data[stratextsflag_vars] = modeling_data[
    stratextsflag_vars].apply(lambda x: x.fillna(0))

modeling_data['statextsflag_0to30'] = modeling_data[stratextsflag_vars].any(axis=1).map(
    {True:1,False:0})

modeling_data.drop(columns=stratextsflag_vars,inplace=True)

In [None]:
# Group _0 to _70 into a new flag horcolorvflag_0to70 
#                       (=1 if any of _0 to _70 =1; else 0); 
# this will convert missing to 0 as well

horcolorvflag_vars = ['horcolorvflag_0', 'horcolorvflag_10', 'horcolorvflag_20', 
                      'horcolorvflag_30', 'horcolorvflag_40', 'horcolorvflag_50',
                      'horcolorvflag_60', 'horcolorvflag_70']

modeling_data[horcolorvflag_vars] = modeling_data[
    horcolorvflag_vars].apply(lambda x: x.fillna(0))

modeling_data['horcolorvflag_0to70'] = modeling_data[horcolorvflag_vars].any(axis=1).map(
    {True:1,False:0})

modeling_data.drop(columns=horcolorvflag_vars,inplace=True)



### OTHER CLEANING

In [None]:
## drop count of Nan variables
nan_vars = modeling_data.columns[modeling_data.columns.str.endswith('_nan')]
# drop variables that have _OTHER and are less than 0.01 of column
other_vars = [
'obsmethod_0_OTHER',
'obsmethod_100_OTHER',
'obsmethod_70_OTHER',
'obsmethod_30_OTHER',
'obsmethod_80_OTHER',
'obsmethod_120_OTHER',
'obsmethod_110_OTHER',
'obsmethod_20_OTHER',
'obsmethod_90_OTHER',
'obsmethod_40_OTHER',
'obsmethod_50_OTHER'
]

modeling_data.drop(columns=nan_vars,inplace=True)
modeling_data.drop(columns=other_vars,inplace=True)

In [None]:
## fill NAN values with zeros for the following features
fill_with_zero_feat = [
    'Feature_Type_Anthropogenic Feature', 'Feature_Type_Landform', 'Feature_Type_Landscape', 
    'Feature_Type_Microfeature', 'Feature_alluvial fan', 'Feature_coastal plain', 'Feature_drainageway', 
    'Feature_flood plain', 'Feature_foothills', 'Feature_ground moraine', 'Feature_hill', 'Feature_hills', 
    'Feature_hillslope', 'Feature_interfluve', 'Feature_intermontane basin', 'Feature_lake plain', 
    'Feature_mountain', 'Feature_mountain slope', 'Feature_mountains', 'Feature_other', 'Feature_outwash plain',
    'Feature_piedmont', 'Feature_plain', 'Feature_plains', 'Feature_plateau', 'Feature_ridge', 
    'Feature_river valley', 'Feature_stream terrace', 'Feature_terrace', 'Feature_till plain', 
    'Feature_upland', 'Feature_valley', 'obsmethod_0_Bucket Auger', 'obsmethod_50_Large Pit or Quarry', 
    'obsmethod_110_Push Tube', 'obsmethod_10_Shovel Slice', 'obsmethod_20_Push Tube', 'obsmethod_30_Small Pit', 
    'obsmethod_20_Small Pit', 'obsmethod_80_Bucket Auger', 'hzname_20_OTHER', 'hzname_70_OTHER',
    'obsmethod_60_Trench', 'hzname_10_OTHER', 'obsmethod_120_Bucket Auger', 'obsmethod_40_Small Pit',
    'obsmethod_0_Cut', 'desgnmaster_50_OTHER', 'obsmethod_10_Small Pit', 'obsmethod_20_Cut', 
    'obsmethod_10_Push Tube', 'obsmethod_70_Cut', 'obsmethod_50_Push Tube', 'obsmethod_30_Shovel Slice', 
    'obsmethod_90_Screw Auger', 'obsmethod_50_Small Pit', 'obsmethod_10_Large Pit or Quarry', 'obsmethod_50_Cut',
    'hzname_0_OTHER', 'obsmethod_120_Cut', 'obsmethod_110_Trench', 'obsmethod_90_Push Tube', 
    'obsmethod_20_Large Pit or Quarry', 'obsmethod_40_Shovel Slice', 'obsmethod_10_Trench', 'hzname_90_OTHER', 
    'obsmethod_120_Trench', 'obsmethod_100_Small Pit', 'obsmethod_0_Small Pit', 'obsmethod_0_Shovel Slice', 
    'obsmethod_120_Push Tube', 'obsmethod_30_Large Pit or Quarry', 'obsmethod_40_Cut', 
    'obsmethod_120_Large Pit or Quarry', 'obsmethod_30_Bucket Auger', 'obsmethod_60_Push Tube', 
    'obsmethod_20_Shovel Slice', 'obsmethod_30_Push Tube', 'obsmethod_70_Trench', 'obsmethod_0_Trench',
    'obsmethod_60_Cut', 'obsmethod_40_Large Pit or Quarry', 'hzname_40_OTHER', 'obsmethod_80_Trench',
    'hzname_60_OTHER', 'obsmethod_100_Trench', 'hzname_120_OTHER', 'obsmethod_100_Cut', 'obsmethod_80_Small Pit', 
    'desgnmaster_0_OTHER', 'obsmethod_110_Bucket Auger', 'obsmethod_60_Small Pit', 'obsmethod_90_Trench', 
    'obsmethod_90_Cut', 'desgnmaster_110_OTHER', 'desgnmaster_80_OTHER', 'obsmethod_90_Bucket Auger', 
    'desgnmaster_100_OTHER', 'obsmethod_30_Trench', 'obsmethod_80_Large Pit or Quarry', 
    'desgnmaster_10_OTHER', 'obsmethod_10_Bucket Auger', 'obsmethod_40_Trench',
    'obsmethod_110_Large Pit or Quarry', 'obsmethod_60_Bucket Auger', 'hzname_80_OTHER', 'hzname_50_OTHER', 
    'obsmethod_50_Bucket Auger', 'obsmethod_70_Large Pit or Quarry', 'obsmethod_70_Push Tube', 
    'desgnmaster_40_OTHER', 'obsmethod_70_Small Pit', 'obsmethod_110_Small Pit', 'obsmethod_10_Cut',
    'obsmethod_80_Cut', 'obsmethod_60_Large Pit or Quarry', 'desgnmaster_90_OTHER', 'obsmethod_110_Cut', 
    'obsmethod_20_Bucket Auger', 'desgnmaster_20_OTHER', 'obsmethod_100_Bucket Auger', 'obsmethod_80_Screw Auger',
    'obsmethod_20_Trench', 'obsmethod_100_Large Pit or Quarry', 'obsmethod_50_Shovel Slice',
    'desgnmaster_70_OTHER', 'obsmethod_10_OTHER', 'obsmethod_60_OTHER', 'obsmethod_80_Push Tube',
    'obsmethod_0_Large Pit or Quarry', 'desgnmaster_30_OTHER', 'obsmethod_40_Bucket Auger', 
    'obsmethod_90_Large Pit or Quarry', 'hzname_100_OTHER', 'obsmethod_50_Trench', 'hzname_110_OTHER', 
    'obsmethod_120_Small Pit', 'obsmethod_70_Bucket Auger', 'obsmethod_90_Small Pit', 'obsmethod_0_Push Tube',
    'obsmethod_100_Screw Auger', 'obsmethod_100_Push Tube', 'desgnmaster_60_OTHER', 'hzname_30_OTHER', 
    'obsmethod_30_Cut', 'obsmethod_40_Push Tube']


modeling_data[fill_with_zero_feat] = modeling_data[
    fill_with_zero_feat].apply(lambda x: x.fillna(0))

### Join with Veg

#### Create reference table for plant/trees
Need this table in order to join the vegplot and plant datasets

In [None]:
pv_1 = windbreakrowdata[['plantiidref', 'vegplotiidref']]
pv_2 = plottreesiteindexsummary[['plantiidref', 'vegplotiidref']]
pv_3 = plottreeinventory[['plantiidref', 'vegplotiidref']]
pv_4 = plotplantinventory[['plantiidref', 'vegplotiidref']]

frames = [pv_1, pv_2, pv_3, pv_4]
pv_table = pd.concat(frames)

# drop duplicates
pv_table_final = pv_table.drop_duplicates()

pv_table_final.head()

#### cleanse vegplot

In [None]:
# preview raw data
vegplot.head()

In [None]:
# missing values
missing_values(vegplot)

In [None]:
# keep variables and drop the rest
vegplot_v2 = vegplot[['vegplotiid',
                      'soilprofileindicator',
                      'alkalineaffected',
                      'understorydescindicator',
                      'mensurationdataindicator',
                      'siteobsiidref']]

vegplot_final = vegplot_v2.drop_duplicates()

#### plant

In [None]:
# preview raw plant dataset
plant.head()

In [None]:
# missing values
missing_values(plant)

In [None]:
# drop variables that are more than 70% missing and/or are not useful for analysis
plant_v2 = plant.drop(columns = ['obterm',
                                 'plantdbiidref',
                                 'grpiidref', 
                                 'objwlupdated',
                                 'objuseriidref',
                                 'recwlupdated',
                                 'recuseriidref',
                                 'plantsubspecies',
                                 'plantvariety'])

In [None]:
plant_final = plant_v2.drop_duplicates()
plant_final.shape

#### pv + vegplot

In [None]:
pv_vegplot = pd.merge(pv_table_final, 
                    vegplot_final, 
                    how='left', 
                    left_on=['vegplotiidref'], 
                    right_on=['vegplotiid'],
                    suffixes=('_pv','_vegplot'))

pv_vegplot.shape

#### +plant

In [None]:
pv_vegplot_plant = pd.merge(pv_vegplot, 
                    plant_final, 
                    how='inner', 
                    left_on=['plantiidref'], 
                    right_on=['plantiid'],
                    suffixes=('_pv','_plant'))

pv_vegplot_plant.shape

In [None]:
pv_vegplot_plant = pv_vegplot_plant.drop(columns = ['plantiidref',
                                                    'vegplotiidref',
                                                    'vegplotiid',
                                                    'plantiid'])

In [None]:
pv_vegplot_plant = pv_vegplot_plant.drop_duplicates()
pv_vegplot_plant.shape

In [None]:
pv_vegplot_plant.head()

In [None]:
# fill missing plantsciname with plantnatvernm
pv_vegplot_plant['plantsciname'].fillna(pv_vegplot_plant['plantnatvernm'], inplace=True)


##### One hot encode

In [None]:
# one hot encode - get variables
plantsciname = pv_vegplot_plant['plantsciname'].value_counts().to_frame()
features = plantsciname[plantsciname['plantsciname'] > 5000].reset_index()
features = features.drop(columns = 'plantsciname')
features = features.rename(columns={"index": "Features"})
features

In [None]:
pv_vegplot_plant_v2 = pv_vegplot_plant.drop(columns = ['plantsym', 
                                                       'plantnatvernm',
                                                       'plantgenus',
                                                       'plantspecies'])

pv_vegplot_plant_v3 = pd.merge(pv_vegplot_plant_v2, 
                               features, 
                               how='left', 
                               left_on=['plantsciname'], 
                               right_on=['Features'])
pv_vegplot_plant_v3



In [None]:
# fill in NaN in column Features with "Other"
values = {'Features': 'Other'}
pv_vegplot_plant_v3 = pv_vegplot_plant_v3.fillna(value=values)
pv_vegplot_plant_v3

In [None]:
# One Hot Encoding
pv_vegplot_plant_v4 = pv_vegplot_plant_v3[['siteobsiidref']].join(pd.get_dummies(pv_vegplot_plant_v3['Features']).add_prefix('PlantName_')).groupby('siteobsiidref').max().reset_index()
pv_vegplot_plant_v4

In [None]:
pv_vegplot_plant_v4['siteobsiidref'] = pv_vegplot_plant_v4['siteobsiidref'].astype('str')

##### join siteid

In [None]:
site_ids = site_map_ids

In [None]:
site_ids.drop(columns = ['peiidref', 'peiid', 'siteiidref'], inplace=True)
site_ids

In [None]:
# change to integer in order to join
pv_vegplot_plant_v4['siteobsiidref'] = pv_vegplot_plant_v4['siteobsiidref'].astype(int)

plantname = pd.merge(pv_vegplot_plant_v4,
                           site_ids,
                           how='inner',
                           left_on=['siteobsiidref'], 
                           right_on=['siteobsiid'])
plantname.shape

##### collapse on siteiid

In [None]:
plantname_final = plantname.groupby('siteiid').max().reset_index()

In [None]:
plantname_final['siteiid'] = plantname_final['siteiid'].astype(int)

In [None]:
plantname_final.shape

In [None]:
plantname_final.head()

In [None]:
plantname_final.drop(columns='siteobsiid',inplace=True )

In [None]:
modeling_data_veg = modeling_data.merge(plantname_final, how='left', on = 'siteiid')

In [None]:
modeling_data.shape,modeling_data_veg.shape

In [None]:
fill_veg_vars = list(set(list(plantname_final))-{'siteiid'})
modeling_data_veg[fill_veg_vars] = modeling_data_veg[
    fill_veg_vars].apply(lambda x: x.fillna(0))

modeling_data = modeling_data_veg.copy()

### Drop Vars

In [None]:
### drop texcl variables because the information is already repeating in texture varibles with less missing %
variable_with_texcl = modeling_data.columns[modeling_data.columns.str.startswith('texcl')]
modeling_data.drop(columns=variable_with_texcl,inplace=True)

In [None]:
### drop these extra variables
drop_vars = [          
'rupresblkmst_80_Extremely firm','rupresblkmst_90_Extremely firm','rupresblkmst_100_Extremely firm',
'rupresblkmst_110_Extremely firm','rupresblkmst_120_Extremely firm',
'earthcov_1_Marshland',
'PlantName_Abies grandis',
'PlantName_Larix occidentalis',
'PlantName_Pseudotsuga menziesii var. glauca',
'PlantName_Tsuga heterophylla',
'pmorigin_andesite',
'pmorigin_gneiss',
'pmorigin_granitoid',
'pmorigin_metasedimentary',
'pmorigin_mudstone',
'pmorigin_tuff',
'siteobsiidref']
modeling_data.drop(columns=drop_vars,inplace=True)
modeling_data.shape

In [None]:
#drop_less_frequent (less than 1%):
less_freq =   ['pmgroupnam_till',
    'earthcov_1_Other tree cover', 'earthcov_1_Savanna rangeland', 'Feature_Type_Anthropogenic Feature',
    'Feature_Type_Microfeature',
    'horcolorvflag_80', 'horcolorvflag_90', 'horcolorvflag_100', 'horcolorvflag_110', 'horcolorvflag_120',
    'obsmethod_50_Large Pit or Quarry',
    'hzname_60_2Bt2', 'hzname_40_E',
    'obsmethod_10_Shovel Slice', 'desgnmaster_60_O', 'hzname_0_Oa', 'rupresblkmst_50_Extremely firm', 
    'obsmethod_60_Trench', 'hzname_120_Bk', 'obsmethod_0_Cut', 'desgnmaster_90_O', 'obsmethod_20_Cut',
     'hzname_110_2C2', 'obsmethod_70_Cut', 'effclass_60_Very slightly effervescent', 'obsmethod_30_Shovel Slice', 
     'obsmethod_90_Screw Auger', 'obsmethod_10_Large Pit or Quarry', 'desgnmaster_30_AB', 
     'effclass_30_Very slightly effervescent', 'hzname_50_2Bt2', 'desgnmaster_60_E', 
     'effclass_50_Very slightly effervescent', 'obsmethod_50_Cut', 'obsmethod_120_Cut', 'obsmethod_110_Trench', 
     'obsmethod_20_Large Pit or Quarry', 'obsmethod_40_Shovel Slice', 'obsmethod_10_Trench', 'hzname_30_BC',
     'obsmethod_120_Trench', 'obsmethod_0_Shovel Slice', 'effclass_110_Very slightly effervescent', 
     'obsmethod_30_Large Pit or Quarry', 'desgnmaster_80_O', 'obsmethod_40_Cut', 'obsmethod_120_Large Pit or Quarry',
     'effclass_40_Very slightly effervescent', 'rupresblkmst_60_Extremely firm', 'desgnmaster_50_O', 'hzname_40_Bt3',
     'hzname_10_R', 'hzname_30_BA', 'hzname_70_Btk', 'obsmethod_20_Shovel Slice', 'obsmethod_70_Trench',
     'obsmethod_0_Trench', 'obsmethod_60_Cut', 'obsmethod_40_Large Pit or Quarry', 'hzname_40_Btk', 
     'desgnmaster_40_O', 'obsmethod_80_Trench', 'rupresblkmst_0_Very firm', 'desgnmaster_70_E', 'desgnmaster_20_BE', 
     'obsmethod_100_Trench', 'hzname_80_Bw', 'obsmethod_100_Cut', 'hzname_30_Cr', 
     'effclass_80_Very slightly effervescent', 'hzname_110_Cg', 'hzname_80_C3', 'hzname_30_AB', 
     'obsmethod_90_Trench', 'obsmethod_90_Cut', 'desgnmaster_10_R', 'obsmethod_30_Trench', 'hzname_50_A', 
     'boundtopo_0_Irregular', 'effclass_120_Very slightly effervescent', 'obsmethod_80_Large Pit or Quarry', 
     'effclass_90_Very slightly effervescent', 'obsmethod_40_Trench', 'hzname_0_C1',
     'obsmethod_110_Large Pit or Quarry', 'hzname_70_C3', 'bounddistinct_40_Diffuse', 
     'obsmethod_70_Large Pit or Quarry', 'desgnmaster_120_A', 'obsmethod_10_Cut', 'obsmethod_80_Cut', 
     'obsmethod_60_Large Pit or Quarry', 'obsmethod_110_Cut', 'effclass_100_Very slightly effervescent', 
     'obsmethod_80_Screw Auger', 'obsmethod_20_Trench', 'obsmethod_100_Large Pit or Quarry',
     'obsmethod_50_Shovel Slice', 'obsmethod_10_OTHER', 'desgnmaster_30_BA', 'obsmethod_60_OTHER', 
     'obsmethod_0_Large Pit or Quarry', 'hzname_50_Btk', 'bounddistinct_30_Diffuse', 'desgnmaster_110_A', 
     'hzname_120_Bt3', 'hzname_80_2Bt3', 'obsmethod_90_Large Pit or Quarry', 'obsmethod_50_Trench',
     'rupresblkmst_70_Extremely firm', 'bounddistinct_20_Diffuse', 'desgnmaster_70_O', 'hzname_50_A2', 
     'hzname_100_Bt', 'hzname_60_Btk', 'obsmethod_100_Screw Auger', 'hzname_120_Bk2', 
     'effclass_70_Very slightly effervescent', 'obsmethod_30_Cut', 'pmkind_glaciolacustrine',
     'pmorigin_cherty', 'pmorigin_dolomite', 'pmorigin_quartzite', 'pmorigin_schist', 'pmorigin_volcanic', 
     'phfield_0_strong_alkaline', 'PlantName_Acer saccharum', 'PlantName_Aristida', 
     'PlantName_Calamagrostis canadensis', 'PlantName_Calamagrostis rubescens', 'PlantName_Chrysothamnus', 
     'PlantName_Elymus elymoides', 'PlantName_Koeleria macrantha', 'PlantName_Lupinus', 
     'PlantName_Pascopyrum smithii', 'PlantName_Pinus contorta', 'PlantName_Pinus ponderosa', 
     'PlantName_Pinus strobus', 'PlantName_Poa', 'PlantName_Populus tremuloides', 
     'PlantName_Prosopis glandulosa var. torreyana', 'PlantName_Pseudoroegneria spicata', 
     'PlantName_Quercus alba', 'PlantName_Quercus rubra', 'PlantName_Sporobolus cryptandrus',
     'PlantName_Symphoricarpos albus', 
     'texture_60_VFSL', 'texture_0_GR-SIL', 'texture_90_LFS', 'texture_100_VFSL', 'texture_90_VFSL', 
     'texture_0_GRV-L', 'texture_120_LS', 'texture_30_GRV-L', 'texture_120_VFSL', 'texture_110_LFS',
     'texture_50_VFSL', 'texture_10_GRV-L', 'texture_40_VFSL', 'texture_100_LFS', 'texture_0_GR-SL',
     'texture_120_LFS', 'texture_20_GRV-L', 'texture_60_LFS', 'texture_70_LFS', 'texture_10_GR-SIL',
     'texture_80_LFS', 'texture_110_VFSL', 'texture_30_GR-L', 'texture_80_VFSL', 'texture_70_VFSL']

modeling_data.drop(columns=less_freq,inplace=True)
modeling_data.shape

### Drop Index variable 

In [None]:
## first we impute the NaN values with kmeans (based on location because climate is location and elev deopendent)
vars_to_impute = ['ppt01', 'ppt02', 'ppt03', 'ppt04', 'ppt05', 'ppt06', 'ppt07', 'ppt08', 'ppt09', 'ppt10', 'ppt11', 'ppt12', 'pptannual', 

'tdmean01', 'tdmean02', 'tdmean03', 'tdmean04', 'tdmean05', 'tdmean06', 'tdmean07', 'tdmean08', 'tdmean09', 'tdmean10', 'tdmean11', 'tdmean12', 'tdmeanannual',

'tmax01', 'tmax02', 'tmax03', 'tmax04', 'tmax05', 'tmax06', 'tmax07', 'tmax08', 'tmax09', 'tmax10', 'tmax11', 'tmax12', 'tmaxannual',

'tmean01', 'tmean02', 'tmean03', 'tmean04', 'tmean05', 'tmean06', 'tmean07', 'tmean08', 'tmean09', 'tmean10', 'tmean11', 'tmean12', 'tmeanannual',

'tmin01', 'tmin02', 'tmin03', 'tmin04', 'tmin05', 'tmin06', 'tmin07', 'tmin08', 'tmin09', 'tmin10', 'tmin11', 'tmin12', 'tminannual',
                  'vpdmaxannual',

'vpdmin01', 'vpdmin02', 'vpdmin03', 'vpdmin04', 'vpdmin05', 'vpdmin06', 'vpdmin07', 'vpdmin08', 'vpdmin09', 'vpdmin10', 'vpdmin11', 'vpdmin12', 'vpdminannual']

knn_regress_model = KNeighborsRegressor(n_neighbors=3)


features = ['latstddeci','longstddec','elev']
for var in vars_to_impute:
    print(var)
    target = var
    knn_regress_model.fit(X = modeling_data.loc[modeling_data[target].notna(),features], 
                          y = modeling_data.loc[modeling_data[target].notna(),target])
    
    
    modeling_data.loc[modeling_data[target].isnull(), target] = knn_regress_model.predict(
                                modeling_data[features])[modeling_data[target].isnull()]

### Here are new climate variables to replace monthly variables:
    tdm_nov_to_apr = mean (tdmean11, tdmean12, tdmean01, tdmean02, tdmean03, tdmean04)
    tdm_may_to_oct = mean(tdmean05 ... tdmean10)
    ppt_may_to_sep = mean(ppt05 ... ppt09)
    ppt_oct_to_apr = mean(ppt10,ppt11,ppt12,ppt01,ppt02,ppt03,ppt04)
    tmax_apr_to_sep = mean(tmax04 ... tmax09)
    tmax_oct_to_mar = mean(tmax10,tmax11,tmax12,tmax01,tmax02,tmax03)
    tmin_apr_to_oct = mean(tmin04 ... tmin10)
    tmin_nov_to_mar = mean(tmin11, tmin12, tmin01, tmin02, tmin03)
    tmean_apr_to_oct = mean(tmean04 ... tmean10)
    tmean_nov_to_mar = mean(tmean11,tmean12,tmean01,tmean02,tmean03)
    vpdmin_jun_to_oct = mean(vpd06 ... vpd10)
    just drop vpdmax01 ... vpdmax12 (index is basically the same as vpdmaxannual

In [None]:
modeling_data.drop(columns=['vpdmax01','vpdmax02','vpdmax03','vpdmax04',
                             'vpdmax05','vpdmax06','vpdmax07','vpdmax08',
                             'vpdmax09','vpdmax10','vpdmax11','vpdmax12'],inplace=True)

In [None]:
def replace_var_with_average(var_list,model_data):
    avg = model_data[var_list].mean(axis=1)
    model_data.drop(columns=var_list,inplace=True)
    return(avg)

In [None]:
modeling_data['tdm_nov_to_apr'] = replace_var_with_average(['tdmean11', 
                            'tdmean12', 'tdmean01', 'tdmean02', 'tdmean03', 'tdmean04'],modeling_data)
modeling_data['tdm_may_to_oct'] = replace_var_with_average(['tdmean05', 
                            'tdmean06', 'tdmean07', 'tdmean08', 'tdmean09', 'tdmean10'],modeling_data)

modeling_data['ppt_may_to_sep'] = replace_var_with_average(['ppt05','ppt06','ppt07','ppt08','ppt09'],modeling_data)
modeling_data['ppt_oct_to_apr'] = replace_var_with_average(['ppt10','ppt11','ppt12',
                                                             'ppt01','ppt02','ppt03','ppt04'],modeling_data)

modeling_data['tmax_apr_to_sep'] = replace_var_with_average(['tmax04','tmax05','tmax06','tmax07',
                                                              'tmax08','tmax09'],modeling_data)
modeling_data['tmax_oct_to_mar'] = replace_var_with_average(['tmax10','tmax11','tmax12','tmax01',
                                                              'tmax02','tmax03'], modeling_data)

modeling_data['tmin_apr_to_oct'] = replace_var_with_average(['tmin04','tmin05','tmin06','tmin07',
                                                              'tmin08','tmin09','tmin10'], modeling_data)
modeling_data['tmin_nov_to_mar'] = replace_var_with_average(['tmin11', 'tmin12', 'tmin01', 'tmin02',
                                                              'tmin03'], modeling_data)

modeling_data['tmean_apr_to_oct'] = replace_var_with_average(['tmean04','tmean05','tmean06','tmean07', 'tmean08','tmean09','tmean10'], modeling_data)
modeling_data['tmean_nov_to_mar'] = replace_var_with_average(['tmean11','tmean12','tmean01','tmean02',
                                                               'tmean03'], modeling_data)

modeling_data['vpdmin_jun_to_oct'] = replace_var_with_average(['vpdmin06','vpdmin07', 'vpdmin08',
                                                               'vpdmin09','vpdmin10'],modeling_data)

In [None]:
modeling_data.drop_duplicates(inplace=True)

In [None]:
modeling_data.shape, modeling_data.siteiid.nunique()

In [None]:
ClusterGroups = pd.read_csv('Input Files/ClusterGroups.csv')
ClusterGroups.shape

In [None]:
ClusterGroups.drop_duplicates(inplace=True)
ClusterGroups.shape

In [None]:
modeling_data_with_groups = pd.merge(modeling_data,ClusterGroups,on='siteiid',how='left')
modeling_data_with_groups.shape

### Topographic Data

In [None]:
VDPNED6_NEGNED6_df = topo_df_rest[['siteiid','VDPNED6','NEGNED6']].drop_duplicates()
VDPNED6_NEGNED6_df.shape

In [None]:
site_siteobs_df = site_map_ids[['siteobsiid','siteiid']].drop_duplicates()

site_siteobs_df['siteobsiid'] = site_siteobs_df['siteobsiid'].astype(int)

site_siteobs_df.shape

In [None]:
pedons_topo_df = pd.merge(pedons_topo_df[['siteobsiid',
                        'CRVNED6','DEMNED6c','DVMNED6',
                        'GESUSG6_NA','MRNNED6','POSNED6',
                        'SLPNED6','TPINED6']],site_siteobs_df,on='siteobsiid',how='inner')
pedons_topo_df.shape

In [None]:
pedons_topo_df = pedons_topo_df[['siteiid','CRVNED6','DEMNED6c','DVMNED6',
                        'GESUSG6_NA','MRNNED6','POSNED6',
                        'SLPNED6','TPINED6']].drop_duplicates()

pedons_topo_df['siteiid'] = pedons_topo_df['siteiid'].astype(int)

pedons_topo_df.shape

#### Fix GESUS variable with its classes

In [None]:
surfacegeo.head()

In [None]:
# pick out just the GESUS_variables and Value columns
surfacegeo_var = surfacegeo[['GESUS_variables', 'Value']]

In [None]:
# replace original GESUS variables with its text version
pedons_topo_df_V2 = pd.merge(pedons_topo_df,
                      surfacegeo_var,
                      how='left',
                      left_on=['GESUSG6_NA'],
                      right_on=['Value'])
pedons_topo_df_V2

In [None]:
# One Hot Encode GESUS_variables
GESUS_one_hot_encode = pedons_topo_df_V2[['siteiid']].join(pd.get_dummies(pedons_topo_df_V2['GESUS_variables'])).groupby('siteiid').max().reset_index()
GESUS_one_hot_encode

In [None]:
# join one hot encoded variables back to dataset
pedons_topo_df_final = pd.merge(pedons_topo_df_V2,
                      GESUS_one_hot_encode,
                      how='left',
                      left_on=['siteiid'],
                      right_on=['siteiid'])
pedons_topo_df_final

In [None]:
# Drop Value, GESUSG6_NA, GESUS_variables
pedons_topo_df_final = pedons_topo_df_final.drop(columns=['Value', 'GESUSG6_NA', 'GESUS_variables'])

#### merge data with topo data

In [None]:
modeling_data_with_topography = pd.merge(modeling_data_with_groups,pedons_topo_df_final,on='siteiid',how='left')
modeling_data_with_topography.shape


In [None]:
modeling_data_with_topography_cmp = pd.merge(modeling_data_with_topography,VDPNED6_NEGNED6_df,
                                              on='siteiid',how='left')
modeling_data_with_topography_cmp.shape

In [None]:
modeling_data_with_topography_cmp.head()

In [None]:
modeling_data_with_topography_cmp[['VDPNED6','NEGNED6']].isna().mean()

#### Fill in NaN for surface geo one hot encoded variables

In [None]:
surfacegeo_missing_var_list = [
'GESUS_alluvial_thick_sediments',
'GESUS_alluvial_thin_sediments',
'GESUS_coastal_zone_sendiments',
'GESUS_colluvial_alluvial_sediments',
'GESUS_colluvial_sediments_discontinuous',
'GESUS_colluvial_sediments_loess_residual_thin',
'GESUS_eolian_sediments_dunesand',
'GESUS_eolian_sediments_highplains',
'GESUS_eolian_sediments_loess',
'GESUS_glacial_till_sediments_clayey',
'GESUS_glacial_till_sediments_sandy',
'GESUS_glacial_till_sediments_silty',
'GESUS_glaciofluvial_icecontact_sediments',
'GESUS_organic_rich_muck',
'GESUS_other',
'GESUS_proglacial_sediments_coarse_grained',
'GESUS_proglacial_sediments_fine_grained',
'GESUS_residual_materials_alluvial_sediments',
'GESUS_residual_materials_bedrock',
'GESUS_residual_materials_carbonate_rocks',
'GESUS_residual_materials_fine_grained_sedimentary_rocks',
'GESUS_residual_materials_fine_igneous_metamorphic_rocks',
'GESUS_residual_materials_sedimentary_rocks',
'GESUS_water']
modeling_data_with_topography_cmp[surfacegeo_missing_var_list] = modeling_data_with_topography_cmp[
    surfacegeo_missing_var_list].apply(lambda x: x.fillna(0))

In [None]:
modeling_data = modeling_data_with_topography_cmp.copy()

### Drop Index variable 
some index variables we drop after applying pca on model data and keep the pca value instead

In [None]:
drop_pre_PCA_index_var = pd.read_excel('Input Files/VariablestoDropWhenUsingIndices.xlsx')

In [None]:
drop_pre_PCA_vars = drop_pre_PCA_index_var['ExcludeVariables'].unique().tolist()

In [None]:
shared_with_model = modeling_data.columns[modeling_data.columns.isin(drop_pre_PCA_vars)].tolist()

In [None]:
list(set(drop_pre_PCA_vars) - set(shared_with_model))

In [None]:
modeling_data.drop(columns= shared_with_model,inplace=True)
modeling_data.shape

In [None]:
pca_indexed_vars = pd.read_csv('Input Files/ModelDataIndexData.csv')
pca_indexed_vars.shape

In [None]:
pca_indexed_vars.drop_duplicates(inplace=True)
pca_indexed_vars.shape, pca_indexed_vars.siteiid.nunique()

In [None]:
drop_climate_vars = pca_indexed_vars.columns[pca_indexed_vars.columns.str.contains('|'.join(['Temp',
                                                                           'VPD',
                                                                           'Precipitation',
                                                                           'Dewpoint']))].tolist()
pca_indexed_vars.drop(columns= drop_climate_vars,inplace=True)

In [None]:
modeling_data.drop_duplicates(inplace=True)
modeling_data.shape, modeling_data.siteiid.nunique()

In [None]:
modeling_data_with_pca_indexed = pd.merge(modeling_data,pca_indexed_vars,on='siteiid',how='left' )
modeling_data_with_pca_indexed.shape

### KNN Impute

In [None]:
modeling_data = modeling_data_with_pca_indexed.copy()


### these are the variables we replace with knn 
vars_to_impute = ['NDVI_5Pct', 'NDVI_IQR90', 'NDVI_95Pct', 'SATVI_5Pct', 'SATVI_IQR90', 'SATVI_95Pct',
                  'CRVNED6','DEMNED6c','DVMNED6', 'MRNNED6', 'POSNED6', 'SLPNED6',
                  'TPINED6', 'VDPNED6', 'NEGNED6']


knn_regress_model = KNeighborsRegressor(n_neighbors=3)


features = ['latstddeci','longstddec','elev']
for var in vars_to_impute:
    print(var)
    target = var
    knn_regress_model.fit(X = modeling_data.loc[modeling_data[target].notna(),features], 
                          y = modeling_data.loc[modeling_data[target].notna(),target])
    
    
    modeling_data.loc[modeling_data[target].isnull(), target] = knn_regress_model.predict(
                                modeling_data[features])[modeling_data[target].isnull()]

### Last Cleaning/Variable Drop

In [None]:
# This is the modeling dataset that goes into the K-Means clustering algorithm
modeling_data.to_csv('modeling_data_afterKNN.csv',index=False)

In [None]:
modeling_data.shape

In [None]:
other_type_var = [x for x in list(modeling_data) if 'other' in x.lower()] + ['HorizonMasterOth0to10Index',
'HorizonMasterOth10to30Index',
'HorizonMasterOth40to70Index',
'HorizonMasterOth80to100Index',
'HorizonNameOth_30to120Index',
'HorizonTextureOTH_0to60Index',
'HorizonTextureOTH_60to120Index']

In [None]:
other_type_var

In [None]:
'pmgroupnam_OTHER',
'taxonname_OTHER',
'taxclname_OTHER',
'earthcov_1_Other grass/herbaceous cover',
'pmkind_OTHER',
'pmorigin_OTHER',
'Feature_other',
'PlantName_Other',
'HorizonMasterOth0to10Index',
'HorizonMasterOth10to30Index',
'HorizonMasterOth40to70Index',
'HorizonMasterOth80to100Index',
'HorizonNameOth_30to120Index',
'HorizonTextureOTH_0to60Index',
'HorizonTextureOTH_60to120Index',