In [1]:
# Import libraries and dependencies
import requests
import numpy as np
import pandas as pd
import urllib
import json
import random
import math



In [2]:
# Read appellation geo
appellations_geo_df: pd.DataFrame = pd.read_csv('./Database data/appellations_geo.csv')
#appellations_geo_df

### Set up URL for SoilGrids API and DataFrame to hold return

In [3]:
# Dummy call of API to get column names
appLon = appellations_geo_df.iloc[0,3]
appLat = appellations_geo_df.iloc[0, 2]
url = f'https://rest.soilgrids.org/soilgrids/v2.0/properties/query?lon={appLon}&lat={appLat}&property=bdod&property=cec&property=cfvo&property=clay&property=nitrogen&property=ocd&property=ocs&property=phh2o&property=sand&property=silt&property=soc&depth=0-5cm&depth=0-30cm&depth=5-15cm&depth=15-30cm&depth=30-60cm&depth=60-100cm&depth=100-200cm&value=Q0.5&value=Q0.05&value=Q0.95&value=mean&value=uncertainty'
appellation_sample = requests.get(url).json()
#appellation_sample

In [4]:
# Initialize list for column names and add appellation
soils_columns = []
soils_columns.append('appellation')
# Get column names and append to list
for i in range(len(appellation_sample['properties']['layers'])):
    for j in range(len(appellation_sample['properties']['layers'][i]['depths'])):
        soils_columns.append(f"{appellation_sample['properties']['layers'][i]['name']}_{appellation_sample['properties']['layers'][i]['depths'][j]['label']}")
soils_columns

# Initialize DataFrame with soil_columns as column names
soils_df: pd.DataFrame = pd.DataFrame(columns = soils_columns)

### Loop through API call for appellations and add to DataFram

In [5]:
# Copy appellation_geo_df to store new coordinates if original coordinates do not return API results
appellations_geo_df_REVISED = appellations_geo_df.copy()

# Loop through coordinates of appellations to create API call and add to soils_df
for appellation in range(len(appellations_geo_df)):
    appLon = appellations_geo_df.iloc[appellation,3]
    appLat = appellations_geo_df.iloc[appellation, 2]
    url = f'https://rest.soilgrids.org/soilgrids/v2.0/properties/query?lon={appLon}&lat={appLat}&property=bdod&property=cec&property=cfvo&property=clay&property=nitrogen&property=ocd&property=ocs&property=phh2o&property=sand&property=silt&property=soc&depth=0-5cm&depth=0-30cm&depth=5-15cm&depth=15-30cm&depth=30-60cm&depth=60-100cm&depth=100-200cm&value=Q0.5&value=Q0.05&value=Q0.95&value=mean&value=uncertainty'
    appellation_soil_api_response = requests.get(url).json()
    
    # If API return is null, change coordinates until a nearby point returns non-null results
    while not appellation_soil_api_response['properties']['layers'][i]['depths'][j]['values']['Q0.5']:
        random_multiplier = 1
        appLon = appLon + random.uniform(-.005, 0.005)*random_multiplier
        appLat = appLat + random.uniform(-.005, 0.005)*random_multiplier
        # Update coordinates in revised dataframe
        appellations_geo_df_REVISED.iloc[appellation, 3] = appLon
        appellations_geo_df_REVISED.iloc[appellation, 2] = appLat
        url = f'https://rest.soilgrids.org/soilgrids/v2.0/properties/query?lon={appLon}&lat={appLat}&property=bdod&property=cec&property=cfvo&property=clay&property=nitrogen&property=ocd&property=ocs&property=phh2o&property=sand&property=silt&property=soc&depth=0-5cm&depth=0-30cm&depth=5-15cm&depth=15-30cm&depth=30-60cm&depth=60-100cm&depth=100-200cm&value=Q0.5&value=Q0.05&value=Q0.95&value=mean&value=uncertainty'
        # Increment random_multiplier to help assure that random changes move iterations farther from original point
        random_multiplier += 0.1
        appellation_soil_api_response = requests.get(url).json()

    # Initiate list to hold data and append appellation name based on index number
    soils_row = []
    soils_row.append(appellations_geo_df.iloc[appellation,0])
    
    # Loop through API return and add selected data to soils_row
    for i in range(len(appellation_soil_api_response['properties']['layers'])):
        for j in range(len(appellation_soil_api_response['properties']['layers'][i]['depths'])):
            soils_row.append(appellation_soil_api_response['properties']['layers'][i]['depths'][j]['values']['Q0.5'])
    
    # Add soils_row as row to end of soils_df
    soils_df.loc[len(soils_df)] = soils_row
#soils_df

In [6]:
# Export "raw" soils_df to csv
soils_df.to_csv('./Database data/soils_df_without_averages.csv')

### Calculate columns and clean data

In [7]:
# Display column headers
soils_df.columns

Index(['appellation', 'bdod_0-5cm', 'bdod_5-15cm', 'bdod_15-30cm',
       'bdod_30-60cm', 'bdod_60-100cm', 'bdod_100-200cm', 'cec_0-5cm',
       'cec_5-15cm', 'cec_15-30cm', 'cec_30-60cm', 'cec_60-100cm',
       'cec_100-200cm', 'cfvo_0-5cm', 'cfvo_5-15cm', 'cfvo_15-30cm',
       'cfvo_30-60cm', 'cfvo_60-100cm', 'cfvo_100-200cm', 'clay_0-5cm',
       'clay_5-15cm', 'clay_15-30cm', 'clay_30-60cm', 'clay_60-100cm',
       'clay_100-200cm', 'nitrogen_0-5cm', 'nitrogen_5-15cm',
       'nitrogen_15-30cm', 'nitrogen_30-60cm', 'nitrogen_60-100cm',
       'nitrogen_100-200cm', 'ocd_0-5cm', 'ocd_5-15cm', 'ocd_15-30cm',
       'ocd_30-60cm', 'ocd_60-100cm', 'ocd_100-200cm', 'ocs_0-30cm',
       'phh2o_0-5cm', 'phh2o_5-15cm', 'phh2o_15-30cm', 'phh2o_30-60cm',
       'phh2o_60-100cm', 'phh2o_100-200cm', 'sand_0-5cm', 'sand_5-15cm',
       'sand_15-30cm', 'sand_30-60cm', 'sand_60-100cm', 'sand_100-200cm',
       'silt_0-5cm', 'silt_5-15cm', 'silt_15-30cm', 'silt_30-60cm',
       'silt_60-100cm', 's

In [8]:
# Create list of soil variables for use in selected calculations
soils_variables = []
for i in range(len(appellation_sample['properties']['layers'])):
    soils_variables.append(appellation_sample['properties']['layers'][i]['name'])
soils_variables

['bdod',
 'cec',
 'cfvo',
 'clay',
 'nitrogen',
 'ocd',
 'ocs',
 'phh2o',
 'sand',
 'silt',
 'soc']

In [9]:
# Edit list of soil variables
# Remove 'ocs' column as there is only one range for ocs
soils_variables.remove('ocs')
# Remove 'phh2o' column as average of pH is calculated 
soils_variables.remove('phh2o')
soils_variables

['bdod', 'cec', 'cfvo', 'clay', 'nitrogen', 'ocd', 'sand', 'silt', 'soc']

In [10]:
# Create list of depths for measurments up to 100cm for use with calculations
soils_range_0_100cm = []
for j in range(len(appellation_sample['properties']['layers'][i]['depths'])-1):
    soils_range_0_100cm.append(appellation_sample['properties']['layers'][i]['depths'][j]['label'])
soils_range_0_100cm

['0-5cm', '5-15cm', '15-30cm', '30-60cm', '60-100cm']

In [11]:
# Create list of weights for measurements for use with calculations
soils_range_weight = [0.05, 0.1, 0.15, 0.3, 0.4]

In [12]:
# Calculate average valuse for depths down to 100cm and populate new column
for i in range(len(soils_variables)):
    variable = soils_variables[i]
    variable_summator = 0.0
    for j in range(len(soils_range_0_100cm)):
        depth = soils_range_0_100cm[j]
        variable_summator = variable_summator + soils_df[f'{variable}_{depth}']*soils_range_weight[j]
        soils_df[f'{variable}_0-100cm'] = variable_summator
#soils_df

In [13]:
# Cast phh2o columns to float and convert to typical pH (e.g., divide by 10).
for depth in soils_range_0_100cm:
    soils_df[f'phh2o_{depth}'] = soils_df[f'phh2o_{depth}'].astype(float)*.1
# Convert phh2o_100-200cm to typical pH
soils_df['phh2o_100-200cm'] = soils_df['phh2o_100-200cm']*.1

In [14]:
# Take antilog of pH values and calculate weighted average to calculate pseudo-average
for i in range(len(soils_range_0_100cm)):
    pH_summator = 0.0
    depth = soils_range_0_100cm[i]
    pH_summator = pH_summator + np.power(10, soils_df[f'phh2o_{depth}'])*soils_range_weight[i]
    soils_df[f'phh2o_0-100cm'] = np.log10(pH_summator)
#soils_df

In [15]:
# Reorder columns
soils_df = soils_df[['appellation', 
    'bdod_0-100cm', 'bdod_0-5cm', 'bdod_5-15cm', 'bdod_15-30cm', 'bdod_30-60cm', 'bdod_60-100cm', 'bdod_100-200cm', 
    'cec_0-100cm', 'cec_0-5cm', 'cec_5-15cm', 'cec_15-30cm', 'cec_30-60cm', 'cec_60-100cm', 'cec_100-200cm', 
    'cfvo_0-100cm', 'cfvo_0-5cm', 'cfvo_5-15cm', 'cfvo_15-30cm', 'cfvo_30-60cm', 'cfvo_60-100cm', 'cfvo_100-200cm', 
    'clay_0-100cm', 'clay_0-5cm', 'clay_5-15cm', 'clay_15-30cm', 'clay_30-60cm', 'clay_60-100cm', 'clay_100-200cm', 
    'nitrogen_0-100cm', 'nitrogen_0-5cm', 'nitrogen_5-15cm', 'nitrogen_15-30cm', 'nitrogen_30-60cm', 'nitrogen_60-100cm', 'nitrogen_100-200cm',
    'ocd_0-100cm', 'ocd_0-5cm', 'ocd_5-15cm', 'ocd_15-30cm', 'ocd_30-60cm', 'ocd_60-100cm', 'ocd_100-200cm', 
    'ocs_0-30cm',
    'phh2o_0-100cm', 'phh2o_0-5cm', 'phh2o_5-15cm', 'phh2o_15-30cm', 'phh2o_30-60cm', 'phh2o_60-100cm', 'phh2o_100-200cm', 
    'sand_0-100cm', 'sand_0-5cm', 'sand_5-15cm', 'sand_15-30cm', 'sand_30-60cm', 'sand_60-100cm', 'sand_100-200cm',
    'silt_0-100cm', 'silt_0-5cm', 'silt_5-15cm', 'silt_15-30cm', 'silt_30-60cm', 'silt_60-100cm', 'silt_100-200cm', 
    'soc_0-100cm', 'soc_0-5cm', 'soc_5-15cm', 'soc_15-30cm', 'soc_30-60cm', 'soc_60-100cm', 'soc_100-200cm']]
#soils_df

In [16]:
# Export all data to csv
soils_df.to_csv('./Database data/soils_df_all_data.csv')

In [17]:
# Drop columns to keep only measurments for 0-100cm and 100-200cm
soils_df = soils_df[['appellation', 
    'bdod_0-100cm', 'bdod_100-200cm', 
    'cec_0-100cm', 'cec_100-200cm', 
    'cfvo_0-100cm', 'cfvo_100-200cm', 
    'clay_0-100cm', 'clay_100-200cm', 
    'nitrogen_0-100cm', 'nitrogen_100-200cm',
    'ocd_0-100cm', 'ocd_100-200cm', 
    'ocs_0-30cm',
    'phh2o_0-100cm', 'phh2o_100-200cm', 
    'sand_0-100cm', 'sand_100-200cm',
    'silt_0-100cm', 'silt_100-200cm', 
    'soc_0-100cm', 'soc_100-200cm']]
soils_df

Unnamed: 0,appellation,bdod_0-100cm,bdod_100-200cm,cec_0-100cm,cec_100-200cm,cfvo_0-100cm,cfvo_100-200cm,clay_0-100cm,clay_100-200cm,nitrogen_0-100cm,...,ocd_100-200cm,ocs_0-30cm,phh2o_0-100cm,phh2o_100-200cm,sand_0-100cm,sand_100-200cm,silt_0-100cm,silt_100-200cm,soc_0-100cm,soc_100-200cm
0,Alexander Valley,156.5,160,223.95,235,75.25,80,222.75,236,68.45,...,37,40,5.90206,6.3,328.25,313,368.4,313,74.65,21
1,Amador County,158.4,170,158.05,187,51.5,40,181.8,159,60.6,...,28,40,5.60206,6.1,431.35,477,358.8,336,74.9,19
2,Anderson Valley,156.35,160,185.9,194,33.9,30,263.4,238,65.5,...,32,36,5.80206,6.2,395.45,393,307.65,332,65.0,14
3,Atlas Peak,159.65,160,204.15,225,42.6,42,239.1,227,70.8,...,38,34,6.40206,7.0,276.7,331,406.8,370,68.6,16
4,Bennett Valley,149.8,158,201.0,240,74.0,70,200.95,189,75.3,...,34,34,5.70206,6.1,282.35,291,372.7,334,67.1,16
5,Calistoga,143.95,144,246.8,234,59.5,90,198.65,198,81.9,...,37,37,5.60206,6.0,418.2,450,301.35,270,76.05,24
6,Carneros,160.4,161,215.4,233,30.15,25,216.1,219,60.15,...,38,36,5.80206,6.4,317.75,340,408.6,401,57.9,13
7,Chalk Hill,156.5,160,227.95,218,68.0,70,241.7,272,69.3,...,37,41,5.40206,5.8,369.45,345,347.15,293,78.15,23
8,Chehalem Mountains,138.4,151,181.05,174,49.15,30,256.4,296,135.3,...,37,68,5.50206,5.7,154.5,185,588.05,520,141.25,37
9,Columbia Valley,144.7,146,130.7,113,0.0,0,49.8,32,54.6,...,27,23,7.80206,8.2,379.7,359,497.0,522,48.1,15


In [18]:
# Export final data to csv
soils_df.to_csv('./Database data/soils_df_FINAL.csv')

In [19]:
# Export revised geo data to csv for use in other applications
appellations_geo_df_REVISED.to_csv('./Database data/appellelations_geo_REVISED.csv')
appellations_geo_df_REVISED

Unnamed: 0,appellation,count,app_lat,app_lon
0,Alexander Valley,32,38.613,-122.7694
1,Amador County,4,38.2479,-120.6731
2,Anderson Valley,2,40.4432,-122.2828
3,Atlas Peak,6,38.3195,-122.2559
4,Bennett Valley,2,38.426167,-122.659994
5,Calistoga,4,38.576605,-122.583814
6,Carneros,199,38.2554,-122.3325
7,Chalk Hill,6,38.6038,-122.7772
8,Chehalem Mountains,1,45.3762,-123.0373
9,Columbia Valley,230,47.108766,-119.284602
