In [None]:
import pandas as pd
import numpy as np

from bs4 import BeautifulSoup
import requests

import datetime

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)


import json
import geopandas as gpd
import pygeos

import altair as alt

In [None]:
#Get well completion reports data
wellcompletion_plss_df = pd.read_csv(r"assets/clean_data/well_completion_clean.csv")
wellcompletion_plss_df = wellcompletion_plss_df[~wellcompletion_plss_df.YEARWORKENDED.isna()].copy()
wellcompletion_plss_df.YEARWORKENDED = wellcompletion_plss_df.YEARWORKENDED.astype('int64')
wellcompletion_plss_df.rename(columns={'YEARWORKENDED':'year'}, inplace=True)

#Precipitation data
all_years_precipitation_station = pd.read_csv(r"assets/clean_data/precipitation_stations.csv")
# Set the county name to be camel case for join
all_years_precipitation_station.COUNTY = all_years_precipitation_station.COUNTY.str.title()


#Reservoir data
weekly_reservoir_station_data = pd.read_csv(r"assets/clean_data/weekly_reservoir_station_data.csv")

#Drought years determined visually as per chart in notebook drought_reservoir_data
drought_years = [2019, 2020, 2021, 2018, 2012, 2013, 2014,2015, 2016, 2007, 2008, 2009 ]
wellcompletion_plss_df['drought_year'] = np.where(wellcompletion_plss_df.year.isin(drought_years), 1, 0)

  wellcompletion_plss_df = pd.read_csv(r"assets/clean_data/well_completion_clean.csv")


In [None]:
all_years_precipitation_station.columns = [col.strip() for col in all_years_precipitation_station.columns]
all_years_precipitation_station.columns

Index(['station_id', 'STATION NAME', 'OCT', 'NOV', 'DEC', 'JAN', 'FEB', 'MAR',
       'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'average_year_precip', 'year',
       'LATITUDE', 'LONGITUDE', 'COUNTY'],
      dtype='object')

In [None]:
# load the plss shapefile (these only include TRS areas that are within the San Joaquin subbasin)
SJ_subbasin_plss = gpd.read_file("assets/clean_data/plss_subbasin.geojson")
# aggregate by TownshipRange
SJ_subbasin_plss_range = SJ_subbasin_plss.dissolve(by='TownshipRange').reset_index()
SJ_subbasin_plss_range.explore()

In [None]:
# load the plss shapefile (these only include TRS areas that are within the San Joaquin subbasin)
california_plss = gpd.read_file("assets/clean_data/california_plss.geojson")
# aggregate by TownshipRange
california_plss_range = california_plss.dissolve(by='TownshipRange').reset_index()
#california_plss_range.explore()

In [None]:
# create wells geodataframe
# In case of geographic coordinates, it is assumed that longitude is captured by x coordinates and latitude by y.
 
precipitation_data_gdf = gpd.GeoDataFrame(all_years_precipitation_station, geometry=gpd.points_from_xy(all_years_precipitation_station.LONGITUDE, all_years_precipitation_station.LATITUDE))
#Set the coordinate reference system (the projection that denote the axis for the points)
precipitation_data_gdf = precipitation_data_gdf.set_crs('epsg:4326')
# spatial join based on geometry
precipitation_data_plss = precipitation_data_gdf.sjoin(SJ_subbasin_plss, how="left")
precipitation_california_data_plss = precipitation_data_gdf.sjoin(california_plss, how="left")
precipitation_data_plss = precipitation_data_plss[~precipitation_data_plss.MTRS.isna()].copy()
precipitation_california_data_plss = precipitation_california_data_plss[~precipitation_california_data_plss.MTRS.isna()].copy()

# drop the ones that aren't in the san joaquin valley basin


In [None]:
precipitation_data_plss.shape #((144, 29)

(160, 29)

In [None]:
precipitation_data_plss.explore()

In [None]:
print(precipitation_california_data_plss.shape) #(1538, 29)
precipitation_california_data_plss.explore()

(1708, 29)


In [None]:
len(set(wellcompletion_plss_df.TownshipRange).intersection(set(precipitation_data_plss.TownshipRange)))

16

In [None]:
well_precip_tr = list(set(wellcompletion_plss_df.TownshipRange).intersection(set(precipitation_data_plss.TownshipRange)))

### The precipitation  stations are disperesed along the length of the San Joaquin river basin (almost uniformly)
- A decision is to be made about the precipitation in the TownshipRanges where there is no station to provide data 
- We can average out the entire regions precipitation

In [None]:
combined_well_precip_reser_df = wellcompletion_plss_df.merge(precipitation_data_plss, how='left', on=['TownshipRange', 'COUNTY', 'year'], indicator=True, suffixes = ('_wellcompletion', '_precipitation_station'))
#create a column with average precipitation across all toenship ranges for each year
#For toenship ranges with no precipitation data (since station is not present in that township, we use this average yearly amount )

combined_well_precip_reser_df['avg_precip_all_tr_year'] = combined_well_precip_reser_df.groupby('year')['average_year_precip'].transform('mean')

## THere are several year for which we do not have precipitation data, remove them
combined_well_precip_reser_df = combined_well_precip_reser_df[~combined_well_precip_reser_df['avg_precip_all_tr_year'].isnull()].copy()

combined_well_precip_reser_df['average_year_precip_corrected'] = np.where(combined_well_precip_reser_df['average_year_precip'].isnull(), combined_well_precip_reser_df['avg_precip_all_tr_year'], combined_well_precip_reser_df['average_year_precip'] )

In [None]:
print(combined_well_precip_reser_df.shape, wellcompletion_plss_df.shape, precipitation_data_plss.shape)


(98080, 62) (98080, 34) (160, 29)


In [None]:
pd.options.display.max_columns=100
combined_well_precip_reser_df.sample(5)

Unnamed: 0,LATITUDE_wellcompletion,LONGITUDE_wellcompletion,TOWNSHIP,RANGE,SECTION,WELLLOCATION,CITY,COUNTY,BOTTOMOFPERFORATEDINTERVAL,TOPOFPERFORATEDINTERVAL,GROUNDSURFACEELEVATION,STATICWATERLEVEL,RECORDTYPE,USE,WCRNUMBER,TOTALDRILLDEPTH,TOTALCOMPLETEDDEPTH,DATEWORKENDED,CASINGDIAMETER,TOTALCOMPLETEDDEPTH_CORRECTED,DATEWORKENDED_CORRECTED,year,MONTHWORKENDED,geometry_wellcompletion,index_right_wellcompletion,OBJECTID_wellcompletion,Township_wellcompletion,Range_wellcompletion,Meridian_wellcompletion,Source_wellcompletion,Section_wellcompletion,MTRS_wellcompletion,TownshipRange,drought_year,station_id,STATION NAME,OCT,NOV,DEC,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,average_year_precip,LATITUDE_precipitation_station,LONGITUDE_precipitation_station,geometry_precipitation_station,index_right_precipitation_station,OBJECTID_precipitation_station,Township_precipitation_station,Range_precipitation_station,Meridian_precipitation_station,Source_precipitation_station,Section_precipitation_station,MTRS_precipitation_station,_merge,avg_precip_all_tr_year,average_year_precip_corrected
9499,36.7717,-120.0873,13S,17E,26.0,2040 N LASSEN,KERMAN,Fresno,400.0,240.0,,96.0,WellCompletion/New/Production or Monitoring/NA,Domestic,WCR2014-012709,,420.0,2014-07-10,5.0,420.0,2014-07-10,2014,7.0,POINT (-120.0873 36.7717),6147.0,45046.0,T13S,R17E,MDM,BLM,26.0,MDM-T13S-R17E-26,T13S R17E,1,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only,0.4687,0.4687
92348,36.39247,-119.28765,18S,25E,5.0,NO AVE 328 & WS RD 132,VISALIA,Tulare,260.0,160.0,,138.0,WellCompletion/New/Production or Monitoring/NA,Agriculture,WCR2016-012441,,270.0,2016-08-01,10.75,270.0,2016-08-01,2016,8.0,POINT (-119.28765 36.39247),8630.0,59727.0,T18S,R25E,MDM,BLM,5.0,MDM-T18S-R25E-5,T18S R25E,1,VSL,VISALIA,1.27,0.81,1.5,2.45,0.75,1.96,0.66,0.32,0.05,0.0,0.0,0.0,0.814167,36.333,-119.300003,POINT (-119.30000 36.33300),8622.0,59719.0,T18S,R25E,MDM,BLM,30.0,MDM-T18S-R25E-30,both,0.976895,0.814167
4504,36.5982,-119.74593,15S,20E,25.0,9117 S. ROWELL,FRESNO,Fresno,300.0,240.0,,104.0,WellCompletion/New/Production or Monitoring/NA,Domestic,WCR2016-016187,,310.0,2016-01-14,6.625,310.0,2016-01-14,2016,1.0,POINT (-119.74593 36.5982),7181.0,50773.0,T15S,R20E,MDM,BLM,25.0,MDM-T15S-R20E-25,T15S R20E,1,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only,0.976895,0.976895
32268,35.06951,-118.98469,11N,20W,2.0,MARICOPA HWY & S SABODAN STREET,BAKERSFIELD,Kern,920.0,900.0,,445.0,WellCompletion/New/Production or Monitoring/NA,Agriculture,WCR2016-015242,,1820.0,2016-03-20,30.0,1820.0,2016-03-20,2016,3.0,POINT (-118.98469 35.06951),14256.0,149558.0,T11N,R20W,SBM,BLM,2.0,SBM-T11N-R20W-2,T11N R20W,1,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only,0.976895,0.976895
65030,38.217556,-121.246225,04N,07E,7.0,5882 E Collier RD,Acampo,San Joaquin,300.0,260.0,,,WellCompletion/New/Production or Monitoring/NA,Domestic,WCR2020-014052,300.0,300.0,2020-08-27,,300.0,2020-08-27,2020,8.0,POINT (-121.246225 38.217556),103.0,3874.0,T04N,R07E,MDM,BLM,7.0,MDM-T04N-R07E-7,T04N R07E,1,,,,,,,,,,,,,,,,,,,,,,,,,,,left_only,0.746146,0.746146


In [None]:
combined_well_precip_reser_df.columns

Index(['LATITUDE_wellcompletion', 'LONGITUDE_wellcompletion', 'TOWNSHIP',
       'RANGE', 'SECTION', 'WELLLOCATION', 'CITY', 'COUNTY',
       'BOTTOMOFPERFORATEDINTERVAL', 'TOPOFPERFORATEDINTERVAL',
       'GROUNDSURFACEELEVATION', 'STATICWATERLEVEL', 'RECORDTYPE', 'USE',
       'WCRNUMBER', 'TOTALDRILLDEPTH', 'TOTALCOMPLETEDDEPTH', 'DATEWORKENDED',
       'CASINGDIAMETER', 'TOTALCOMPLETEDDEPTH_CORRECTED',
       'DATEWORKENDED_CORRECTED', 'year', 'MONTHWORKENDED',
       'geometry_wellcompletion', 'index_right_wellcompletion',
       'OBJECTID_wellcompletion', 'Township_wellcompletion',
       'Range_wellcompletion', 'Meridian_wellcompletion',
       'Source_wellcompletion', 'Section_wellcompletion',
       'MTRS_wellcompletion', 'TownshipRange', 'drought_year', 'station_id',
       'STATION NAME', 'OCT', 'NOV', 'DEC', 'JAN', 'FEB', 'MAR', 'APR', 'MAY',
       'JUN', 'JUL', 'AUG', 'SEP', 'average_year_precip',
       'LATITUDE_precipitation_station', 'LONGITUDE_precipitation_station',

In [None]:
combined_well_precip_reser_df = combined_well_precip_reser_df[['WCRNUMBER',  'TownshipRange', 'COUNTY',
                                                               'BOTTOMOFPERFORATEDINTERVAL', 'TOPOFPERFORATEDINTERVAL',
                                                               'GROUNDSURFACEELEVATION', 'average_year_precip_corrected', 
                                                               'STATICWATERLEVEL', 'RECORDTYPE', 'USE',
                                                               'TOTALDRILLDEPTH', 'TOTALCOMPLETEDDEPTH', 'DATEWORKENDED',
                                                               'CASINGDIAMETER', 'TOTALCOMPLETEDDEPTH_CORRECTED',
                                                               'DATEWORKENDED_CORRECTED', 'year', 'MONTHWORKENDED',
                                                               'geometry_wellcompletion', 'MTRS_wellcompletion']].copy()

In [None]:
combined_well_precip_reser_df[~combined_well_precip_reser_df.average_year_precip_corrected.isnull()]

Unnamed: 0,WCRNUMBER,TownshipRange,COUNTY,BOTTOMOFPERFORATEDINTERVAL,TOPOFPERFORATEDINTERVAL,GROUNDSURFACEELEVATION,average_year_precip_corrected,STATICWATERLEVEL,RECORDTYPE,USE,TOTALDRILLDEPTH,TOTALCOMPLETEDDEPTH,DATEWORKENDED,CASINGDIAMETER,TOTALCOMPLETEDDEPTH_CORRECTED,DATEWORKENDED_CORRECTED,year,MONTHWORKENDED,geometry_wellcompletion,MTRS_wellcompletion
12,WCR2016-017657,T01S R04E,Alameda,160.0,60.0,,0.976895,7.0,WellCompletion/New/Production or Monitoring/NA,Domestic,,160.0,2016-06-13,10.0,160.0,2016-06-13,2016,6.0,POINT (-121.56094 37.81932),MDM-T01S-R04E-29
20,WCR2015-013926,T02S R04E,Alameda,680.0,80.0,,0.676060,1.0,WellCompletion/New/Production or Monitoring/NA,Agriculture,,710.0,2015-09-29,12.0,710.0,2015-09-29,2015,9.0,POINT (-121.5724 37.7802),MDM-T02S-R04E-7
34,WCR2016-017810,T03S R11E,Alameda,360.0,340.0,,0.976895,40.0,WellCompletion/New/Production or Monitoring/NA,Agriculture,,500.0,2016-01-08,30.0,500.0,2016-01-08,2016,1.0,POINT (-120.747685 37.629699),MDM-T03S-R11E-34
35,WCR2018-010783,T02S R04E,Alameda,500.0,140.0,,0.560585,0.0,WellCompletion/New/Production or Monitoring/NA,Public,520.0,520.0,2018-11-22,,520.0,2018-11-22,2018,11.0,POINT (-121.585661 37.739607),MDM-T02S-R04E-19
58,WCR2014-004482,T05N R09E,Amador,266.0,166.0,,0.468700,,WellCompletion/New/Production or Monitoring/NA,Agriculture,,325.0,2014-06-04,10.0,325.0,2014-06-04,2014,6.0,POINT (-120.93 38.2891667),MDM-T05N-R09E-13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98075,WCR2015-011444,T04N R06E,,290.0,210.0,,0.676060,,WellCompletion/New/Production or Monitoring/NA,Domestic,,290.0,2015-05-19,6.0,290.0,2015-05-19,2015,5.0,POINT (-121.320278 38.179722),MDM-T04N-R06E-21
98076,WCR2016-010754,T04N R07E,,205.0,145.0,,0.976895,,WellCompletion/New/Production or Monitoring/NA,Domestic,,205.0,2016-09-28,6.0,205.0,2016-09-28,2016,9.0,POINT (-121.187222 38.166111),MDM-T04N-R07E-26
98077,WCR2015-012485,T04N R08E,,350.0,250.0,,0.676060,,WellCompletion/New/Production or Monitoring/NA,Domestic,,350.0,2015-03-04,6.0,350.0,2015-03-04,2015,3.0,POINT (-121.070278 38.179167),MDM-T04N-R08E-23
98078,WCR2015-014442,T04N R05E,,165.0,105.0,,0.676060,,WellCompletion/New/Production or Monitoring/NA,Domestic,,165.0,2015-10-21,6.0,165.0,2015-10-21,2015,10.0,POINT (-121.466892 38.227086),MDM-T04N-R05E-6


In [None]:
combined_well_precip_reser_df.columns

Index(['wcrnumber', 'townshiprange', 'county', 'bottomofperforatedinterval',
       'topofperforatedinterval', 'groundsurfaceelevation',
       'average_year_precip', 'staticwaterlevel', 'recordtype', 'use',
       'totaldrilldepth', 'totalcompleteddepth', 'dateworkended',
       'casingdiameter', 'totalcompleteddepth_corrected',
       'dateworkended_corrected', 'year', 'monthworkended',
       'geometry_wellcompletion', 'mtrs_wellcompletion'],
      dtype='object')

In [None]:
precipitation_data_plss

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=b042e2da-6536-449d-95b8-d85fa08825de' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>