### This book performs a deep dive into the large well completion report dataset

In [None]:
import gpdvega
import numpy as np 
import pandas as pd

import altair as alt
from shapely import wkt

import geopandas as gpd
import matplotlib.pyplot as plt

#This is to support large datasets
alt.data_transformers.enable('default', max_rows=None)



DataTransformerRegistry.enable('default')

##### Notes

There is only one type of Well Yield Unit of Measure. The rest are blank. Since this data is from the same region.

In [None]:

#Read the file as a simple Pandas dataframe and perform pandas operations such as transform
well_completion_clean_df = gpd.read_file("../assets/outputs/well_completion_clean.csv", ignore_geometry=True,)
well_completion_clean_df = well_completion_clean_df[well_completion_clean_df['YEARWORKENDED'] > '2014']
well_completion_clean_df['YEARLY_NEW_WELLS'] = well_completion_clean_df.groupby(['YEARWORKENDED'])['WCRNUMBER'].transform('count')

#After performing Pandas operations, convert into a gdf using store geometry column
well_completion_clean_df['geometry'] = well_completion_clean_df['geometry'].apply(wkt.loads)
well_completion_clean_gdf = gpd.GeoDataFrame(well_completion_clean_df, crs='epsg:4326')


In [None]:
### Aggregate counts by categories and year
county_aggregate_gdf = well_completion_clean_gdf[['TownshipRange', 'COUNTY', 'YEARWORKENDED', 'YEARLY_NEW_WELLS', 'geometry', 'WCRNUMBER']].dissolve(by=['TownshipRange', 'COUNTY', 'YEARWORKENDED', 'YEARLY_NEW_WELLS'], aggfunc='count').reset_index()
county_aggregate_gdf['NEW_WELLS_NORMALIZED']= county_aggregate_gdf['WCRNUMBER']/county_aggregate_gdf['YEARLY_NEW_WELLS']

use_aggregate_gdf =  well_completion_clean_gdf[['USE', 'COUNTY', 'TownshipRange', 'YEARWORKENDED','YEARLY_NEW_WELLS', 'geometry', 'WCRNUMBER']].dissolve(by=['USE', 'COUNTY', 'TownshipRange', 'YEARWORKENDED', 'YEARLY_NEW_WELLS' ], aggfunc = 'count').reset_index()
year_month_aggregate_df =  well_completion_clean_gdf[['MONTHWORKENDED', 'YEARWORKENDED', 'YEARLY_NEW_WELLS', 'WCRNUMBER']].groupby(['YEARWORKENDED', 'MONTHWORKENDED']).agg(WELL_COUNT = ('WCRNUMBER','count')).reset_index()
year_month_aggregate_df['FIRST_MONTH_DATE']   = pd.to_datetime(year_month_aggregate_df['YEARWORKENDED'].str[:4] + "-" +  year_month_aggregate_df['MONTHWORKENDED'].str.strip('.0').str.zfill(2) + "-" + '01')



In [None]:
use_aggregate_gdf.explore('USE')

The map above shows a heavy usage of wells and new well construction primarily for agricultural and do.

### Check for seasonality

In [None]:
alt.Chart(year_month_aggregate_df
        ).mark_bar(

        ).encode(
            y= 'WELL_COUNT:Q',
            x= 'FIRST_MONTH_DATE:T',
            tooltip=['YEARWORKENDED', 'MONTHWORKENDED', 'WELL_COUNT']
        ).properties(width=800)

In [None]:
def view_construction_per_year(df, color_col='NEW_WELLS_NORMALIZED'):
    # Create Slider
    df.YEARWORKENDED = df.YEARWORKENDED.astype(float).astype(int)

    min_year_num = df.YEARWORKENDED.min()
    max_year_num = df.YEARWORKENDED.max()
    slider = alt.binding_range(
        min=min_year_num,
        max=max_year_num,
        step=1,
        name="Year Work Ended: ",
    )

    slider_selection = alt.selection_single(
        fields=["YEARWORKENDED"], bind=slider, name="Year Work Ended",
        init={"YEARWORKENDED": 2021}
    )


    return alt.Chart(df).mark_geoshape(
    ).encode( 
        color=f'{color_col}:Q', 
        tooltip= list(df.columns)
    ).transform_filter(
        slider_selection
    ).add_selection(
        slider_selection
    ).properties( 

        width=500,
        height=300
    )

In [None]:
county_year_chart = (alt.Chart(county_aggregate_df[county_aggregate_df['YEARWORKENDED'] > '2000'])
    .mark_bar(
        color='grey'
    )
    .encode(
        y="COUNTY_WELL_COUNT:Q",
        x = "YEARWORKENDED:O",
        color = 'COUNTY:N',
        tooltip= ['COUNTY:N', "COUNTY_WELL_COUNT:Q", 'YEARWORKENDED']
    )
).properties(width=800, height=200)

In [None]:
drought_df = pd.read_csv(r"../assets/outputs/california_weekly_drought_index.csv")
drought_df.DATE  = pd.to_datetime(drought_df.DATE.str.replace("d_", ""))
drought_df = drought_df[drought_df.DATE.dt.year > 2000][['DATE', 'D0', 'D1', 'D2', 'D3', 'D4']].copy()
drought_year_chart = alt.Chart(drought_df.melt(
                        id_vars='DATE',
                        value_vars=['D1', 'D2','D3','D4'],
                        var_name='DROUGHT_LEVEL',
                        value_name='DROUGHT_AREA',

                         )
).mark_area(
    color="lightblue",
    interpolate='step-after',
    line=True
).encode(
    x='DATE:T',
    y='DROUGHT_AREA',
    color = 'DROUGHT_LEVEL',
    tooltip=['DATE','DROUGHT_LEVEL', 'DROUGHT_AREA']
).properties(
    width = 800, height=200
)

In [None]:
(county_year_chart & drought_year_chart)

The graphs above show that the droughts in the years 2007, then 2014-2015 and again in 2021  caused a spike in water well construction, especially in Tulare county followed by Fresno county. * we do not have data for entire 2022 as of yet.

The San Joaquin Valley that produces the majority of the 12.8% of the United States' agricultural production (as measured by dollar value) that comes from California comprises seven counties of&nbsp;Northern&nbsp;and one of&nbsp;Southern California, including, in the north, all of&nbsp;San Joaquin&nbsp;and&nbsp;Kings&nbsp;counties, most of&nbsp;Stanislaus,&nbsp;Merced, and&nbsp;Fresno&nbsp;counties, and parts of&nbsp;Madera&nbsp;and&nbsp;Tulare&nbsp;counties, along with a majority of&nbsp;Kern County, in Southern California.

In [None]:
#Although thecleaned csv was saved with the geometry, loading it with gpd causes issues
#But since we saved the latitude and longitude, we cabn reconstruct a geodataframe
#For every year find the number of well constructed pwr county and township
well_township_gdf = well_gdf[['TownshipRange', 'COUNTY', 'YEARWORKENDED', 'WCRNUMBER','YEARLY_NEW_WELLS', 'geometry']].dissolve(by=['TownshipRange', 'COUNTY', 'YEARWORKENDED', 'YEARLY_NEW_WELLS'], aggfunc='count').reset_index()


In [None]:
view_construction_per_year(county_aggregate_gdf, color_col='NEW_WELLS_NORMALIZED')



<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=b042e2da-6536-449d-95b8-d85fa08825de' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>