In [1]:
import numpy as np
import geopandas as gpd
import pandas as pd
from config.config_loader import get_config
import AnalyticsAndDBScripts.sql_connect as sql
import AnalyticsAndDBScripts.sql_schemas as schema
import AnalyticsAndDBScripts.well_spacing as ws
from concurrent.futures import ProcessPoolExecutor, as_completed
from multiprocessing import Lock, Manager
import matplotlib.pyplot as plt
import folium
from branca.element import Template, MacroElement
import gc
import warnings

# Ignore warnings
warnings.filterwarnings(action='ignore')

In [2]:
# Load credentials for SQL
sql_creds_dict = get_config('credentials', 'sql1_sa')

# Add db_name to the dictionary
sql_creds_dict['db_name'] = 'Analytics'

# Load parameters
params = get_config('well_spacing')
fit_groups_config = params['fit_groups']
projection = params['final_projection']
min_lat_length = params['minimum_lateral_length']
update_date = pd.Timestamp.now()
day_offset = 30

In [3]:
# Function to create sql statements
def create_statement(config, group_name, min_lat_length, day_offset=0):
    # Function to extract basin list from config
    def get_basins(config, group_name):
        return next((group['Basins'] for group in config if group['name'] == group_name), None)

    # Functions to create sql statements
    def create_statement_inclusive(config, group_name, min_lat_length, day_offset):
        basin_sql = "', '".join(get_basins(config, group_name))
        return f'''
        WITH MinUpdateDate AS (
            SELECT      MIN(S.UpdateDate) AS MinDate
            FROM        dbo.WELL_SPACING S
            INNER JOIN  dbo.WELL_HEADER W ON S.WellID = W.WellID
            WHERE       W.Trajectory = 'HORIZONTAL' 
            AND         W.FirstProdDate >= '2003-01-01' 
            AND         W.LateralLength_FT > {min_lat_length}
            AND         W.Basin IN ('{basin_sql}')
        ),
        FilteredWells AS (
            SELECT      W.WellID, W.API_UWI_Unformatted, W.Basin, W.FirstProdDate, W.LateralLength_FT, W.Geometry.STAsText() AS Geometry, 
                        W.Geometry.STSrid AS EPSGCode, W.Latitude, W.Longitude, W.Latitude_BH, W.Longitude_BH, U.MinDate
            FROM        dbo.WELL_HEADER W
            CROSS JOIN	MinUpdateDate U
            WHERE       W.Trajectory = 'HORIZONTAL' 
            AND         W.FirstProdDate >= '2003-01-01' 
            AND         W.LateralLength_FT > {min_lat_length}
            AND         W.Geometry IS NOT NULL
            AND         W.Basin IN ('{basin_sql}')
        )
        SELECT      * 
        FROM        FilteredWells
        WHERE       MinDate IS NULL OR DATEADD(day, {day_offset}, MinDate) <= GETDATE()
        '''

    # Function to create SQL statement for basins not in any group
    def create_statement_exclusive(config, min_lat_length, day_offset):
        all_basins = [basin for group in config for basin in group['Basins']]
        all_basins_sql = "', '".join(all_basins)
        return f'''
        WITH MinUpdateDate AS (
            SELECT      MIN(S.UpdateDate) AS MinDate
            FROM        dbo.WELL_SPACING S
            LEFT JOIN   dbo.WELL_HEADER W ON S.WellID = W.WellID
            WHERE       W.Trajectory = 'HORIZONTAL' 
            AND         W.FirstProdDate >= '2003-01-01' 
            AND         W.LateralLength_FT > {min_lat_length}
            AND         (W.Basin NOT IN ('{all_basins_sql}') OR W.Basin IS NULL)
        ),
        FilteredWells AS (
            SELECT      W.WellID, W.API_UWI_Unformatted, W.Basin, W.FirstProdDate, W.LateralLength_FT, W.Geometry.STAsText() AS Geometry, 
                        W.Geometry.STSrid AS EPSGCode, W.Latitude, W.Longitude, W.Latitude_BH, W.Longitude_BH, U.MinDate
            FROM        dbo.WELL_HEADER W
            CROSS JOIN	MinUpdateDate U
            WHERE       W.Trajectory = 'HORIZONTAL' 
            AND         W.FirstProdDate >= '2003-01-01' 
            AND         W.LateralLength_FT > {min_lat_length}
            AND         W.Geometry IS NOT NULL
            AND         (W.Basin NOT IN ('{all_basins_sql}') OR W.Basin IS NULL)
        )
        SELECT      * 
        FROM        FilteredWells
        WHERE       MinDate IS NULL OR DATEADD(day, {day_offset}, MinDate) <= GETDATE()
        '''
    
    if group_name == 'OTHER':
        return create_statement_exclusive(config, min_lat_length, day_offset)
    else:
        return create_statement_inclusive(config, group_name, min_lat_length, day_offset)

In [4]:
# Execute query and store results in a dataframe
def load_data(creds, statement):
    engine = sql.sql_connect(
        username=creds['username'], 
        password=creds['password'], 
        db_name=creds['db_name'], 
        server_name=creds['servername'], 
        port=creds['port']
    )
    try:
        df = pd.read_sql(statement, engine)
    finally:
        engine.dispose()
    return df

In [5]:
# Function to process each basin
def process_data(args):
    config, group_name, projection, min_lat_length, day_offset, update_date, sql_creds_dict, lock = args
    try:
        # Load data from SQL Server
        statement = create_statement(config, group_name, min_lat_length, day_offset)

        df = load_data(sql_creds_dict, statement)

        if df.empty:
            print(f"No data for fit_group {group_name}")
            return
    
        # Apply optimize_buffer function to dataframe
        df = ws.optimize_buffer(df, geo_col='Geometry', sfc_lat_col='Latitude', sfc_long_col='Longitude', buffer_distance_ft=params['buffer_distance'])

        # Clean dataframe and prep for distance calculations
        df = ws.prep_df_distance(df, well_id_col='WellID')

        # Apply calculations to the dataframe
        df_cols = ['MinDistance', 'MedianDistance', 'MaxDistance', 'AvgDistance', 'neighbor_IntersectionFraction', 'RelativePosition']
        df[df_cols] = df.apply(ws.calculate_distance, axis=1, result_type='expand')

        # Columns that contain spatial data
        geometry_columns = ['clipped_lateral_geometry', 'lateral_geometry_buffer', 'clipped_neighbor_lateral_geometry', 'neighbor_lateral_geometry_buffer']

        # Reproject geometries from EPSG:6579 to defined projection
        for col in geometry_columns:
            gdf = gpd.GeoDataFrame(df, geometry=col, crs='EPSG:6579')
            gdf = gdf.to_crs(projection)
            df[col] = gdf.geometry

        # Add a few columns to the dataframe
        df['Projection'] = projection
        df['UpdateDate'] = update_date

        # Drop rows with null values
        df = df.dropna()

        # Divide dataframes into chunks
        def split_dataframe(df, chunk_size):
            return [df.iloc[i:i + chunk_size] for i in range(0, len(df), chunk_size)]

        clean_df_list = split_dataframe(df, 500000)
        
        # Manage memory
        del df
        gc.collect()

        # Convert geometry columns to text
        for df in clean_df_list:
            df = df.map(ws.geom_to_wkt)
            sql.load_data_to_sql(df, sql_creds_dict, schema.well_spacing_stage, lock)
            del df
            gc.collect()

        # Manage memory
        del clean_df_list
        gc.collect()

        # Move data from dbo.WELL_SPACING_STAGE to dbo.WELL_SPACING and drop dbo.WELL_SPACING_STAGE
        sql.execute_stored_procedure(sql_creds_dict, 'sp_InsertFromStagingToWellSpacing', lock)

        print(f"Processing fit_group {group_name} complete")

    except Exception as e:
        print(f"Error processing fit_group {group_name}: {e}")

In [6]:
# Calculate well spacing calcuations and load data into Axia_Anaytics
def main():
    fit_group_list = ['WILLISTON'] # [group['name'] for group in fit_groups_config] + ['OTHER']

    with Manager() as manager:
        lock = manager.Lock()
        args_list = [(fit_groups_config, group, projection, min_lat_length, day_offset, update_date, sql_creds_dict, lock) for group in fit_group_list]

        with ProcessPoolExecutor(max_workers=4) as executor:
            futures = [executor.submit(process_data, args) for args in args_list]

            for future in as_completed(futures):
                try:
                    future.result()  # Get the result to catch any exceptions
                except Exception as exc:
                    print(f'Generated an exception: {exc}')

if __name__ == '__main__':
    main()

Created table WELL_SPACING_STAGE
Data loaded into table WELL_SPACING_STAGE
Stored procedure sp_InsertFromStagingToWellSpacing executed successfully.
Processing fit_group WILLISTON complete


In [7]:
# Function to plot the clipped well survey, surface location, and the buffer around the well survey
def plot_rows(gdf, start_row, end_row):
    for row in range(start_row, end_row + 1):
        fig, ax = plt.subplots()
        current_row = gdf.iloc[[row]]

        # Plot the surface location
        current_row.set_geometry('sfc_loc').plot(ax=ax, color='blue', marker='o', markersize=50)

        # Plot the LateralLine
        current_row.set_geometry('clipped_lateral_geometry').plot(ax=ax, color='red')

        # Plot the buffer (add this line)
        current_row.set_geometry('lateral_geometry_buffer').plot(ax=ax, color='green', alpha=0.5)

        # Show rectangle conformity
        conformity = current_row['optimal_conformity'].values[0]
        optimal_buffer = current_row['optimal_buffer'].values[0]
        ax.set_title(f"Rectangle Conformity: {conformity:.2f}, Optimal Buffer: {optimal_buffer}")

        plt.show()

In [8]:
SHOW_PLOTS = False
basin = 'SAN JUAN'
if SHOW_PLOTS:
    # Plot the clipped well survey, surface location, and the buffer around the well survey
    new_df = ws.optimize_buffer(df[df['Basin'] == basin], geo_col='Geometry', sfc_lat_col='Latitude', sfc_long_col='Longitude', buffer_distance_ft=params['buffer_distance'])
    plot_rows(new_df, start_row=0, end_row=10)

In [9]:
# Show a map of all of the LateralLine and clipped_LateralLine geometries
SHOW_FOLIUM_MAP = SHOW_PLOTS
SHOW_BUFFER = False
if SHOW_PLOTS & SHOW_FOLIUM_MAP:
    # Convert WKT to GeoDataFrame
    gdf = gpd.GeoDataFrame(new_df, geometry='Geometry', crs="EPSG:6579")
    gdf = gdf.to_crs("EPSG:4326")
    gdf = gdf.dropna(subset=['Geometry'])

    clipped_gdf = gpd.GeoDataFrame(new_df, geometry='clipped_lateral_geometry', crs="EPSG:6579")
    clipped_gdf = clipped_gdf.to_crs("EPSG:4326")
    clipped_gdf = clipped_gdf.dropna(subset=['clipped_lateral_geometry'])

    buffer_gdf = gpd.GeoDataFrame(new_df, geometry='lateral_geometry_buffer', crs="EPSG:6579")
    buffer_gdf = buffer_gdf.to_crs("EPSG:4326")
    buffer_gdf = buffer_gdf.dropna(subset=['lateral_geometry_buffer'])

    # Create a Folium map
    m = folium.Map(location=[34.846, -96.111], zoom_start=10, tiles='OpenStreetMap')

    # Add LateralLine geometries to the map
    for idx, row in gdf.iterrows():
        color = 'blue'
        folium.GeoJson(
            row['Geometry'],
            style_function=lambda x, color=color: {'color': color},
            tooltip=f"WellID: {row['WellID']}"
        ).add_to(m)
    # Add clipped_LateralLine geometries to the map
    for idx, row in clipped_gdf.iterrows():
        color = 'red'
        folium.GeoJson(
            row['clipped_lateral_geometry'],
            style_function=lambda x, color=color: {'color': color},
            tooltip=f"Clipped Distance: {row['optimal_buffer']:.0f}"
        ).add_to(m)
    if SHOW_BUFFER:
        # Add optimal_buffer geometries to the map
        for idx, row in buffer_gdf.iterrows():
            color = 'green'
            folium.GeoJson(
                row['lateral_geometry_buffer'],
                style_function=lambda x, color=color: {'color': color},
                tooltip=f"Rectangular Conformity: {row['optimal_conformity']:.3f}"
            ).add_to(m)

    # Add WMS layer
    wms = folium.WmsTileLayer(
        url='https://gis.blm.gov/arcgis/services/Cadastral/BLM_Natl_PLSS_CadNSDI/MapServer/WmsServer?',
        layers=[1, 2, 3],
        fmt='image/png',
        transparent=True,
        version='1.3.0'
    )

    wms.add_to(m)

    # Add legend to the map
    legend_html = '''
    {% macro html(this, kwargs) %}
    <div style="position: fixed; bottom: 50px; left: 50px; z-index:9999; background-color: white; border:2px solid grey; border-radius:6px; padding: 10px;">
        <p><strong>Legend</strong></p>
        <p><span style="color:blue;">●</span> LEFT</p>
        <p><span style="color:red;">●</span> RIGHT</p>
        <p><span style="color:grey;">●</span> OTHER</p>
    </div>
    {% endmacro %}
    '''
    legend = MacroElement()
    legend._template = Template(legend_html)
    m.get_root().add_child(legend)
    m