In [1]:
import sys
sys.path.append('..')

In [2]:
import numpy as np
import altair as alt

from lib.shortage_v2 import ShortageReportsDataset
from lib.viz import draw_missing_data_chart, get_base_map, view_year_with_slider, view_year_side_by_side, display_data_on_map, visualize_seasonality_by_month



In [3]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [4]:
shortage = ShortageReportsDataset()

In [5]:
print(shortage.map_df.shape)
shortage.map_df.sample(4)

(3771, 10)


Unnamed: 0,REPORT_DATE,COUNTY,LATITUDE,LONGITUDE,STATUS,SHORTAGE_TYPE,PRIMARY_USAGES,YEAR,MONTH,geometry
505,SHORTAGE_REPORTED_2015-06-03,Madera,37.210692,-119.907219,Undefined,Dry well (groundwater),Household,2015,6,POINT (-119.90722 37.21069)
1188,SHORTAGE_REPORTED_2021-08-02,Tulare,36.428936,-119.28057,Undefined,Dry well (groundwater),Household,2021,8,POINT (-119.28057 36.42894)
2030,SHORTAGE_REPORTED_2015-10-12,Tulare,36.115734,-119.053122,Resolved,Dry well (groundwater),Household,2015,10,POINT (-119.05312 36.11573)
3757,SHORTAGE_REPORTED_2014-12-11,Madera,36.990802,-119.98132,Outage,Dry well (groundwater),Household,2014,12,POINT (-119.98132 36.99080)


Pre-process the shortage report data, geospatial location and combine them into the the geospatial map_df dataset and overlay the San Joaquin Valley boundaries to keep only the data in the San Joaquin Valley

In [6]:
shortage.preprocess_map_df(features_to_keep= ['REPORT_DATE', 'COUNTY', 'LATITUDE', 'LONGITUDE', 'STATUS', 'SHORTAGE_TYPE', 'PRIMARY_USAGES', "YEAR", "MONTH" , "geometry"])
shortage.keep_only_sjv_data()
shortage.map_df

Unnamed: 0,REPORT_DATE,COUNTY,LATITUDE,LONGITUDE,STATUS,SHORTAGE_TYPE,PRIMARY_USAGES,YEAR,MONTH,geometry
0,SHORTAGE_REPORTED_2017-10-31,Kern,35.280117,-118.896376,Undefined,Dry well (groundwater),Household,2017,10,POINT (-118.89638 35.28012)
1,SHORTAGE_REPORTED_2018-06-01,Kern,35.290510,-118.627383,Undefined,Dry well (groundwater),Household,2018,6,POINT (-118.62738 35.29051)
2,SHORTAGE_REPORTED_2018-05-24,Kern,35.290852,-118.628775,Undefined,Owner of well will not fix problem with well,Household,2018,5,POINT (-118.62878 35.29085)
3,SHORTAGE_REPORTED_2019-05-29,Kern,35.304669,-118.914434,Undefined,"smell like oil, sand in water",Household,2019,5,POINT (-118.91443 35.30467)
4,SHORTAGE_REPORTED_2017-08-07,Kern,35.325274,-118.911520,Undefined,Dry well (groundwater),Household,2017,8,POINT (-118.91152 35.32527)
...,...,...,...,...,...,...,...,...,...,...
2524,SHORTAGE_REPORTED_2017-08-07,Merced,37.400548,-120.585828,Undefined,Dry well (groundwater),Household,2017,8,POINT (-120.58583 37.40055)
2525,SHORTAGE_REPORTED_2014-09-02,Merced,37.405558,-120.649061,Undefined,Dry well (groundwater),Household,2014,9,POINT (-120.64906 37.40556)
2526,SHORTAGE_REPORTED_2015-06-03,Merced,37.405719,-120.641678,Outage,Dry well (groundwater),Household,2015,6,POINT (-120.64168 37.40572)
2527,SHORTAGE_REPORTED_2021-09-10,Merced,37.416058,-120.661143,Undefined,Dry well (groundwater),Household,2021,9,POINT (-120.66114 37.41606)


Look at missing data in the dataset

In [7]:
draw_missing_data_chart(shortage.map_df)

Map of the shortages in wells in the year 2021 in the San Joaquin Valley

In [8]:
display_data_on_map(shortage.map_df, feature="COUNTY", year=2015)


Next, for all Township-Ranges and every year:

1. Count the number of wells
2. Fill NaN values with 0

In [9]:
shortage.compute_features_by_township( add_well_count=True, fill_na_with_zero=False)
shortage.map_df

Unnamed: 0,TOWNSHIP_RANGE,YEAR,geometry,WELL_COUNT
0,T01N R07E,2021,"POLYGON ((-121.25785 37.88341, -121.25785 37.9...",1
1,T01N R08E,2021,"POLYGON ((-121.14913 37.88453, -121.14913 37.9...",1
2,T01S R08E,2021,"POLYGON ((-121.03392 37.88617, -121.03392 37.7...",4
3,T01S R09E,2021,"POLYGON ((-121.03686 37.88683, -120.92335 37.8...",3
4,T02N R06E,2021,"POLYGON ((-121.37076 37.97036, -121.37076 38.0...",1
...,...,...,...,...
14,T23S R28E,2019,"POLYGON ((-119.00029 35.87699, -119.00029 35.9...",1
15,T25S R26E,2019,"POLYGON ((-119.22324 35.70335, -119.22324 35.7...",1
16,T29S R26E,2019,"POLYGON ((-119.23435 35.35432, -119.23435 35.4...",1
17,T30S R28E,2019,"POLYGON ((-119.02127 35.26682, -119.02127 35.3...",1


In [10]:
shortage.fill_townships_with_no_data(features_to_fill=["WELL_COUNT"], feature_value=0)
shortage.map_df

Unnamed: 0,TOWNSHIP_RANGE,YEAR,geometry,WELL_COUNT
0,T01N R07E,2021,"POLYGON ((-121.25785 37.88341, -121.25785 37.9...",1
1,T01N R08E,2021,"POLYGON ((-121.14913 37.88453, -121.14913 37.9...",1
2,T01S R08E,2021,"POLYGON ((-121.03392 37.88617, -121.03392 37.7...",4
3,T01S R09E,2021,"POLYGON ((-121.03686 37.88683, -120.92335 37.8...",3
4,T02N R06E,2021,"POLYGON ((-121.37076 37.97036, -121.37076 38.0...",1
...,...,...,...,...
473,T32S R26E,2012,"POLYGON ((-119.23510 35.09371, -119.23510 35.1...",0
474,T32S R27E,2012,"POLYGON ((-119.12837 35.09439, -119.12837 35.1...",0
475,T32S R28E,2012,"POLYGON ((-119.02170 35.09292, -119.02170 35.1...",0
476,T32S R29E,2012,"POLYGON ((-118.91470 35.09263, -118.91470 35.1...",0


In [11]:
shortage.map_df

Unnamed: 0,TOWNSHIP_RANGE,YEAR,geometry,WELL_COUNT
0,T01N R07E,2021,"POLYGON ((-121.25785 37.88341, -121.25785 37.9...",1
1,T01N R08E,2021,"POLYGON ((-121.14913 37.88453, -121.14913 37.9...",1
2,T01S R08E,2021,"POLYGON ((-121.03392 37.88617, -121.03392 37.7...",4
3,T01S R09E,2021,"POLYGON ((-121.03686 37.88683, -120.92335 37.8...",3
4,T02N R06E,2021,"POLYGON ((-121.37076 37.97036, -121.37076 38.0...",1
...,...,...,...,...
473,T32S R26E,2012,"POLYGON ((-119.23510 35.09371, -119.23510 35.1...",0
474,T32S R27E,2012,"POLYGON ((-119.12837 35.09439, -119.12837 35.1...",0
475,T32S R28E,2012,"POLYGON ((-119.02170 35.09292, -119.02170 35.1...",0
476,T32S R29E,2012,"POLYGON ((-118.91470 35.09263, -118.91470 35.1...",0


## Visualizing the Normalized Well Count


Compute the WELL_COUNT normalized by year

In [12]:
shortage.return_yearly_normalized_township_feature( 'WELL_COUNT',  "sum")

  self.map_df[f"YEARLY_{feature_name}"]= self.map_df.groupby('YEAR').transform(normalize_method)[feature_name]


Unnamed: 0,TOWNSHIP_RANGE,YEAR,geometry,WELL_COUNT,YEARLY_WELL_COUNT,WELL_COUNT_NORMALIZED
0,T01N R07E,2021,"POLYGON ((-121.25785 37.88341, -121.25785 37.9...",1,478,0.002092
1,T01N R08E,2021,"POLYGON ((-121.14913 37.88453, -121.14913 37.9...",1,478,0.002092
2,T01S R08E,2021,"POLYGON ((-121.03392 37.88617, -121.03392 37.7...",4,478,0.008368
3,T01S R09E,2021,"POLYGON ((-121.03686 37.88683, -120.92335 37.8...",3,478,0.006276
4,T02N R06E,2021,"POLYGON ((-121.37076 37.97036, -121.37076 38.0...",1,478,0.002092
...,...,...,...,...,...,...
473,T32S R26E,2012,"POLYGON ((-119.23510 35.09371, -119.23510 35.1...",0,1,0.000000
474,T32S R27E,2012,"POLYGON ((-119.12837 35.09439, -119.12837 35.1...",0,1,0.000000
475,T32S R28E,2012,"POLYGON ((-119.02170 35.09292, -119.02170 35.1...",0,1,0.000000
476,T32S R29E,2012,"POLYGON ((-118.91470 35.09263, -118.91470 35.1...",0,1,0.000000


In [13]:
normalized_df = shortage.return_yearly_normalized_township_feature(feature_name="WELL_COUNT", normalize_method = "sum")

  self.map_df[f"YEARLY_{feature_name}"]= self.map_df.groupby('YEAR').transform(normalize_method)[feature_name]


### Comparison per year

In [14]:
normalized_df.columns

Index(['TOWNSHIP_RANGE', 'YEAR', 'geometry', 'WELL_COUNT', 'YEARLY_WELL_COUNT',
       'WELL_COUNT_NORMALIZED'],
      dtype='object')

In [15]:
view_year_side_by_side(normalized_df, feature="WELL_COUNT_NORMALIZED", title="Well Counts")

### Map visualization for 2018

In [16]:
display_data_on_map(normalized_df, feature="WELL_COUNT_NORMALIZED", year=2018)

### Map visualization for 2021

In [17]:
display_data_on_map(normalized_df, feature="WELL_COUNT_NORMALIZED", year=2021)

In [18]:
shortage.prepare_output_from_map_df()


In [19]:
shortage.output_dataset_to_csv(output_filename="../assets/outputs/shortage_reports.csv")

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=b042e2da-6536-449d-95b8-d85fa08825de' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>