# Match Rural Routes to Wells

## Import necessary libraries

In [79]:
import pandas as pd
import geopandas as gpd
import configparser
from pathlib import Path
import re

## Import Parcels, Wells

## Read Config File

In [80]:
# Config file contains file paths and name of township (uesd to match parcel TWP_CITY column)
config = configparser.ConfigParser()
config.read('/home/petermitchell/Documents/ContractWork/config.ini')
config.sections()

twp = config['DEFAULT']['township']
verified_wells_path = config['DEFAULT']['verified_wells']
unverified_wells_path = config['DEFAULT']['unverified_wells']
parcels_path = config['DEFAULT']['parcels']
samples_path = config['DEFAULT']['samples']
output_path = config['DEFAULT']['output']

output_dir = Path(output_path)
output_dir.mkdir(parents=True, exist_ok=True)

In [None]:
parcels = gpd.read_file(parcels_path)
# Limit parcels to just those in the desired township
twp_parcels = parcels[parcels['TWP_CITY'] == twp]

verified_wells = gpd.read_file(verified_wells_path)
unverified_wells = gpd.read_file(unverified_wells_path)

verified_wells['Verified'] = True
unverified_wells['Verified'] = False

# Join verified and unverified wells into a single dataframe
wells = gpd.GeoDataFrame(pd.concat([verified_wells, unverified_wells], ignore_index=True))

# Export all wells
wells.to_file(output_dir / "wells.gpkg")

## Import Samples file as Pandas dataframe

In [82]:
samples = pd.read_csv(samples_path)

# Create new column with rural route addresses stripped of city and ZIP
rr_addresses = []
for value in samples['SampleAddress']:
    match = re.search('R[RT]\s\d*\sBOX\s\d*', value)
    rr_addresses.append(match if match is None else match.group())
samples['rr_addresses'] = rr_addresses

print(len(samples), "samples provided")

51 samples provided


## Match Rural Routes to Parcels

In [83]:
# Address_1 field in parcels contains the RR address

# Join on ADDRESS_1, rr_addresses

located_samples = twp_parcels.merge(samples.dropna(subset='rr_addresses'), left_on='ADDRESS_1', right_on='rr_addresses')
print(len(located_samples), "samples match at least one parcel")

# Remove uncessary columns, drop cases where multiple parcels have the same RR address
located_samples = located_samples[['Lab_SampleID', 'rr_addresses', 'ADDRESS_1', 'geometry']].drop_duplicates(subset='Lab_SampleID')
print(len(located_samples), "samples match exactly one parcel")

# Set the right coordinate system (NAD27 UTM 15N, EPSG 26915)
located_samples = located_samples.to_crs(26915)

45 samples match at least one parcel
26 samples match exactly one parcel


## Check for matching well

In [None]:
sample_well_match = gpd.sjoin(located_samples, wells, how="inner", predicate="intersects")
sample_well_match.to_file(output_dir / 'sample_well_match.gpkg')

# Filter to only necessary columns and remove cases where there are multiple wells on a parcel
unique_sample_well_match = sample_well_match[['Lab_SampleID', 'rr_addresses', 'UTME', 'UTMN', 'WELLID', 'Verified', 'geometry']].drop_duplicates(subset='Lab_SampleID')
unique_sample_well_match.to_file(output_dir / 'unique_sample_well_match.gpkg')
unique_sample_well_match.to_csv(output_dir / 'unique_sample_well_match.csv')

print("Matches with at least one well:", len(sample_well_match))
print("Matches with exactly one well:", len(unique_sample_well_match))


Matches with at least one well: 16
Matches with exactly one well: 15
