In [37]:
import pandas as pd
import numpy as np
import requests
from tqdm.notebook import tqdm
tqdm.pandas()

### Connecting to CA Geologic map to identify the rock type that each well sits on

[Data from this map](https://maps-cadoc.opendata.arcgis.com/maps/9eba56d981df4f839769ce9a2adc01f4/explore)

In [30]:
def identify_geology(lat, lon):
    identify_url = (
        "https://gis.conservation.ca.gov/server/rest/services/"
        "CGS/Geologic_Map_of_California/MapServer/identify"
    )
    
    params = {
        "f": "json",
        "geometry": f"{lon},{lat}",
        "geometryType": "esriGeometryPoint",
        "sr": "4326",
        "mapExtent": f"{lon-0.01},{lat-0.01},{lon+0.01},{lat+0.01}",
        "imageDisplay": "800,600,96",
        "tolerance": 1,
        "returnGeometry": "false",
        "layers": "all",
    }
    
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"
        )
    }
    
    try:
        response = requests.get(identify_url, params=params, headers=headers)
        response.raise_for_status()
        data = response.json()
        return data.get("results", [])
    
    except Exception as e:
        print(f"Error querying service: {e}")
        return None

# trying it on a random point in CA to see what the response looks like
if __name__ == "__main__":
    lat = 34.0522
    lon = -118.2437
    
    results = identify_geology(lat, lon)
    if results:
        print("Number of results:", len(results))
        for r in results:
            print(r)


Number of results: 1
{'layerId': 12, 'layerName': 'Generalized Rock Types', 'displayFieldName': 'PTYPE', 'value': 'Q', 'attributes': {'OBJECTID': '13872', 'PTYPE': 'Q', 'GENERAL_LITHOLOGY': 'marine and nonmarine (continental) sedimentary rocks', 'AGE': 'Pleistocene-Holocene', 'DESCRIPTION': 'Alluvium, lake, playa, and terrace deposits; unconsolidated and semi-consolidated. Mostly nonmarine, but includes marine deposits near the coast.', 'SHAPE': 'Polygon', 'SHAPE.STArea()': '3942876694.487556', 'SHAPE.STLength()': '1860311.283161'}}


### Hooking back into CA DWR wells to map rock type to depths

Getting station IDs and performing geologic mapping top stage.

In [17]:
url = "https://data.cnra.ca.gov/api/3/action/datastore_search"
params = {
    "resource_id": "af157380-fb42-4abf-b72a-6f9f98868077",
    "limit": 100000,
    "offset": 0
}
response = requests.get(url, params=params)
data = response.json()['result']['records']
station_mapping = pd.DataFrame(data)
station_depth = station_mapping[~station_mapping.well_depth.isna()] # approx 15k stations

In [46]:
def station_geologic(row):
    lat = row.latitude
    lon = row.longitude
    geology = identify_geology(lat, lon)
    rock_type = [item for item in geology if item["layerId"] == 12]
    if len(rock_type) > 1:
        print(f"Point has multiple geology: {len(rock_type)}")
    try:
        return [rock["attributes"]["PTYPE"] for rock in rock_type]
    except KeyError:
        print(f"Warning: no geology found at {lat}, {lon} !")
        return None

In [48]:
rocks = []
for idx, row in tqdm(station_depth.iterrows(), total=len(station_depth)):
    val = station_geologic(row)
    rocks.append(val)

  0%|          | 0/14563 [00:00<?, ?it/s]

Point has multiple geology: 2


KeyboardInterrupt: 