### Scratch book for initial look and working with json files for rock outcrops

Written: 10/03/2023
Last edited: 10/03/2023

In [148]:
# Any necessary package imports
import json

In [149]:
# set up and read in the json file
f = open('stewarts_mill.json')
outcrop_data = json.load(f)

In [150]:
# several very basic functions to extract information from the json and parse a bit

# get all of the unique likeness codes
def unique_likenesses(input_json):
    # set up an empty list to catch all the likeness codes
    all_likeness = []
    # loop through all strat columns
    for iter in input_json['strat_columns']:
        # check if we already have this likeness code
        if iter['likeness_code'] not in all_likeness:
            # if not, add it to the list
            all_likeness.append(iter['likeness_code'])

    # do the same for strat data and grid data
    for iter in input_json['strat_data']:
        if iter['likeness_code'] not in all_likeness:
            all_likeness.append(iter['likeness_code'])

    for iter in input_json['grid_data']:
        if iter['likeness_code'] not in all_likeness:
            all_likeness.append(iter['likeness_code'])

    # just return the list at the end
    return all_likeness
    
# get the indices of where an item is in a list of strings
def string_find(solo_string, string_list):
    # set up empty match list and counter for going through the list
    match_inds = []
    i = 0

    # loop through and add the index if a match
    while i < len(string_list):
        if solo_string == string_list[i]:
            match_inds.append(i)
        i += 1

    # return the final match indices
    return match_inds

In [151]:
# define some functions to test properties of the json

# we end up checking a lot of lengths against each other, so define a general length checking function
def length_checker(list_one, list_two, identifier, input_errors=[], comparison_terms='', print_flag=False):
    # make a list of errors to output at the end of the function, which is just the input to start
    output_errors = input_errors

    # start by checking the length
    if len(list_one) != len(list_two):
        # if not equal, make an error statement, add it to output_errors and print it
        error_statement = identifier + ' ' + comparison_terms + ' not equal length'
        output_errors.append(error_statement)
        print(error_statement)
    elif (len(list_one) == len(list_two)) and (print_flag):  
        # otherwise can print that they pass
        print(identifier + ' thicknesses and lithologies passed length test')    

    # return the errors
    return output_errors


# check if we have one bed thickness for every bed in the strat columns 
def test_cols(input_json):
    # set up an empty list of errors
    errors = []
    # loop through each strat column
    for sect in input_json['strat_columns']:
        #check lengths of thicknesses and lithologies
        errors.append(length_checker(sect['thicknesses'], sect['lithologies'], sect['likeness_code'], input_errors=errors, comparison_terms='thicknesses and lithologies'))

        # while already in the loop, to the same check for notes vs. thicknesses
        errors.append(length_checker(sect['thicknesses'], sect['notes'], sect['likeness_code'], input_errors=errors, comparison_terms='thicknesses and notes'))

    # return errors at the end if we want them
    return errors


# check that gridded data positions are all of the same length
def test_positions(input_json):
    # set up an empty list of errors
    errors = []
    # loop through each gridded data section
    for grid in input_json['grid_data']:
        # check that latitude and longitude are of the same length
        errors.append(length_checker(grid['latitude'], grid['longitude'], grid['likeness_code'], input_errors=errors, comparison_terms='latitude and longitude'))

        # check msl, hae, and gnss_height, but only do so if those entries are filled
        if grid['hae']:
            errors.append(length_checker(grid['latitude'], grid['hae'], grid['likeness_code'], input_errors=errors, comparison_terms='latitude and hae'))
        
        if grid['msl']:
            errors.append(length_checker(grid['latitude'], grid['msl'], grid['likeness_code'], input_errors=errors, comparison_terms='latitude and hae'))

        if grid['gnss_height']:
            errors.append(length_checker(grid['latitude'], grid['gnss_height'], grid['likeness_code'], input_errors=errors, comparison_terms='latitude and GNSS height'))

    # return errors at the end if we want them
    return errors

# check that for every grid point in each gridded dataset, there is one of every measurement
def test_grid_data(input_json):
    # set up an empty list of errors
    errors = []
    # loop through each gridded data section
    for grid in input_json['grid_data']:
        # subloop through all of the measurements in the grid
        for meas in grid['quantities']:
            # run the length checker on latitude (representing position) and the measurement values
            errors.append(length_checker(grid['latitude'], meas['values'], grid['likeness_code'], input_errors=errors, comparison_terms='latitude and ' + meas['measurement_name']))

    # return errors at the end if we want them
    return errors

# check if we have one measurement per stratigraphic height entry
def test_strat_data(input_json):
    # set up an empty list of errors
    errors = []
    # loop through each strat data entry
    for sect in input_json['strat_data']:
        # subloop through all of the measurements in the section
        for meas in sect['quantities']:
            # run the length checker on latitude (representing position) and the measurement values
            errors.append(length_checker(sect['heights_measured'], meas['values'], sect['likeness_code'], input_errors=errors, comparison_terms='heights measured and ' + meas['measurement_name']))

    # return errors at the end if we want them
    return errors

# check if the measurements for a given strat column fall within the height of the column
def test_strat_height(input_json):
    # set up an empty list of errors
    errors = []
    # and a list of all likeness codes in the strat_data entries
    data_likeness = [sub['likeness_code'] for sub in input_json['strat_data']]
    # loop through each strat column entry
    for col in input_json['strat_columns']:
        # we only need to do anything if this column's likeness code corresponds to one from strat_data
        if col['likeness_code'] in data_likeness:
            # get the total height
            total_height = sum(col['thicknesses'])
            # and the indices at which this likeness code matches to one from strat_data
            match_inds = string_find(col['likeness_code'], data_likeness)
            
            # lastly iterate through the matched_inds and check that the max height is less than the total thickness
            for ind in match_inds:
                if max(input_json['strat_data'][ind]['heights_measured']) > total_height:
                    # if it is, we need to append and print the error
                    error_statement = input_json['strat_data'][ind]['likeness_code'] + ' has a measurement height that exceeds total column thickness.'
                    errors.append(error_statement)
                    print(error_statement)

    # return errors at the end if we want them
    return errors

# run through all of the tests in one function
def test_all(input_json, print_flag=False):
    # set up an empty list of errors
    errors = []

    # and do all the tests, appending errors if there are any
    errors.append(test_cols(input_json))
    errors.append(test_positions(input_json))
    errors.append(test_grid_data(input_json))
    errors.append(test_strat_data(input_json))
    errors.append(test_strat_height(input_json))

    # print errors or passed message
    if print_flag:
        if not errors:
            print('JSON passes all tests!')
        else:
            print(errors)

    # and return errors
    return errors



In [152]:
# run through all tests
all_errors = test_all(outcrop_data, print_flag=True)


[[[...], [...], [...], [...], [...], [...], [...], [...], [...], [...]], [[...], [...], [...], [...]], [[...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...]], [[...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...]], []]


In [147]:
print(all_errors)

[[[...], [...], [...], [...], [...], [...], [...], [...], [...], [...]], [[...], [...], [...], [...]], [[...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...]], [[...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...], [...]], []]
