In [1]:
import json
import requests
import plotly.graph_objects as go

from glob import glob
from pandas import concat, DataFrame
from numpy import arange

# Settings

In [21]:
dir_batches = 'output/json_5MB_chunks_2026-02-14/'
dir_file_mapping = 'output/json_5MB_chunks_2026-02-14/map_batch_bboxes.json'

In [4]:
user_input_address = "Depot Boijmans Van Beuningen"

In [16]:
colors = {
    'NaN': "#f20808", # '#ffffff',
    'veen': '#64564c',
    'klei':'#b2a38d', 
    'kleiig_zand':'#8a8783', 
    'vervallen':'#ee82ee', 
    'zand_fijn':'#000000', 
    'zand_matig_grof': '#c5c5c5',  
    'zand_grof': '#616160',
    'grind': '#ffff82',
    'schelpen': '#eb611e' 
}

# Convert Address to Coordinates

In [6]:
try:
    geo = requests.get(
        url="https://nominatim.openstreetmap.org/search", 
        headers={"User-Agent": "CaraLogic (contact: silvia@caralogic.com)"}, 
        params={"q": user_input_address, "format": "json", "limit": 1}
        )

    geo.raise_for_status()
    if len(geo.json()) == 0:
        print(f"no data found for {user_input_address}")
        latitude, longitude = None, None
    else:  
        location = geo.json()[0]
        latitude, longitude = float(location['lat']), float(location['lon']) 
except:
    latitude, longitude = 51.9139529, 4.4711320
    print(
        'Could not fetch coordinates for address %s, using default coordinates %s %s',
        user_input_address, latitude, longitude
        )


latitude, longitude

(51.9138029, 4.4712212)

# Identify batches to Load

In [7]:
with open(dir_file_mapping, 'r', encoding='utf-8') as f:
    data = json.load(f)

df_coords_in_batch = concat(
    dict(map(lambda data_set: (data_set[0], DataFrame.from_dict(data_set[1], orient='index')), data.items())), 
    axis=1).T
df_coords_in_batch.index = df_coords_in_batch.index.levels[0]
df_coords_in_batch.index = df_coords_in_batch.index.rename("batchID")

df_coords_in_batch

Unnamed: 0_level_0,minLon,maxLon,minLat,maxLat
batchID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,3.985117,4.019859,51.916354,51.983193
1,4.019863,4.035307,51.92207,51.986955
2,4.035334,4.051472,51.925849,51.993445
3,4.051488,4.070367,51.929627,51.992596
4,4.070389,4.091834,51.932762,51.989182
5,4.09186,4.107678,51.924786,51.989567
6,4.107703,4.118501,51.935665,51.996882
7,4.118526,4.129867,51.93404,52.000571
8,4.129873,4.141555,51.932382,51.99882
9,4.14158,4.154074,51.929825,51.989958


In [22]:
delta = 0.01

df_crop = df_coords_in_batch[
    (df_coords_in_batch.minLat - delta < latitude) & 
    (df_coords_in_batch.maxLat + delta > latitude) & 
    (df_coords_in_batch.minLon - delta < longitude) & 
    (df_coords_in_batch.maxLon + delta > longitude)
]


print(f'Batches required for user_input with delta in latitude and longitude of {delta}: \n{list(df_crop.index)}')

Batches required for user_input with delta in latitude and longitude of 0.01: 
['38', '39', '40', '41', '42']


# Load batches

In [23]:
ls_files_for_read = []
for file in glob(dir_batches + 'litho_batch*.json'):
    ls_files_for_read.append(file)
        
ls_files_for_read

['output/json_5MB_chunks_2026-02-14/litho_batch_39.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_54.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_42.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_15.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_35.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_23.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_19.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_6.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_7.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_18.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_22.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_34.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_14.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_43.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_55.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_38.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_33.json',
 'output/json_5M

In [9]:
ls_files_for_read = []
for file in glob(dir_batches + 'litho_batch*.json'):
    fname = file.split('/')[-1]
    if fname.split('_')[-1].split('.')[0] in df_crop.index:
        ls_files_for_read.append(file)
        
ls_files_for_read

['output/json_5MB_chunks_2026-02-14/litho_batch_39.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_42.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_38.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_40.json',
 'output/json_5MB_chunks_2026-02-14/litho_batch_41.json']

In [24]:
ls_df = []
for en, path in enumerate(ls_files_for_read):
    with open(path, 'r', encoding='utf-8') as f:
        data = json.load(f)
        print(f"Reading file {en+1} from {len(ls_files_for_read)}...")

        dfs = []

        for f_sub in data:
            if isinstance(f_sub, list):
                if len(f_sub) > 0 and isinstance(f_sub[0], dict):
                    dfs.append(DataFrame(f_sub))
                else:
                    dfs.append(DataFrame({"value": f_sub}))
            
            elif isinstance(f_sub, dict):
                dfs.append(DataFrame([f_sub]))

            else:
                dfs.append(DataFrame({"value": [f_sub]}))

        file_df = concat(dfs, ignore_index=True)
        ls_df.append(file_df)


df_import = concat(ls_df)

Reading file 1 from 57...
Reading file 2 from 57...
Reading file 3 from 57...
Reading file 4 from 57...
Reading file 5 from 57...
Reading file 6 from 57...
Reading file 7 from 57...
Reading file 8 from 57...
Reading file 9 from 57...
Reading file 10 from 57...
Reading file 11 from 57...
Reading file 12 from 57...
Reading file 13 from 57...
Reading file 14 from 57...
Reading file 15 from 57...
Reading file 16 from 57...
Reading file 17 from 57...
Reading file 18 from 57...
Reading file 19 from 57...
Reading file 20 from 57...
Reading file 21 from 57...
Reading file 22 from 57...
Reading file 23 from 57...
Reading file 24 from 57...
Reading file 25 from 57...
Reading file 26 from 57...
Reading file 27 from 57...
Reading file 28 from 57...
Reading file 29 from 57...
Reading file 30 from 57...
Reading file 31 from 57...
Reading file 32 from 57...
Reading file 33 from 57...
Reading file 34 from 57...
Reading file 35 from 57...
Reading file 36 from 57...
Reading file 37 from 57...
Reading fi

# Crop to a subset

In [25]:
delta = 0.005

df_voxel = df_import[
    (df_import.lat - delta < latitude) & 
    (df_import.lat + delta > latitude) & 
    (df_import.lon - delta < longitude) & 
    (df_import.lon + delta > longitude)
]

df_voxel.shape

(2287, 16)

In [27]:
df_import.lithoklasse.unique()

array([3, 1, 2, 7, 5, 6])

# Display Voxel

In [33]:
dz = 0.2 
x_all, y_all, z_all, color_all = [], [], [], []

for idx, row in df_voxel.iterrows():
    z_points = arange(row['z_bottom'], row['z_top']+dz, dz)
    x_all.extend([row['lon']] * len(z_points))
    y_all.extend([row['lat']] * len(z_points))
    z_all.extend(z_points)
    color_all.extend([colors.get(row['lithoklasse_material'], "#06D9EC")] * len(z_points))

fig = go.Figure(data=[go.Scatter3d(
    x=x_all, y=y_all, z=z_all,
    mode='markers',
    marker=dict(size=6, color=color_all, opacity=0.8)
)])

fig.update_layout(
    scene=dict(
        xaxis_title='Longitude',
        yaxis_title='Latitude',
        zaxis_title='Depth (m)',
    ),
    width=800,
    height=800,
    title='Interpolated 3D Lithology Profile'
)

fig.show()
