# San Joaquin  Valley TownshipRange  Precipitation data and Stations

Related links:

* For the documentation about this dataset, its source, how to download, and the features of interest, please refer to our [Well Completion Reports Dataset](/doc/assets/precipitation.md) documentation.

*  For the explanations on how the sortage mapping datasets are mapped to TownshipRange please refer to our [Public Land Survey System](../assets/plss_sanjoaquin_riverbasin.md) documentation.


In [None]:
import sys
sys.path.append('..')

In [None]:
import pandas as pd
import numpy as np

from bs4 import BeautifulSoup
import requests

import datetime

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)


import json
import geopandas as gpd
import pygeos

import altair as alt

##### NOTE: The outputof this notebook after scraped data was cleaned and merged is stored in 
 **"/work/milestone2_waterwells_deepnote/assets/outputs/precipitation/precipitation_stations.csv"**

In [None]:
from lib.precipitation import PrecipitationDataset

In [None]:
precipitation_data = PrecipitationDataset(2013, 2023, "../assets/outputs")

In [None]:
precipitation_data.scrape_monthly_precipitation_data().sample(10)

Unnamed: 0,STATION_ID,STATION NAME,OCT,NOV,DEC,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,AVERAGE_YEARLY_PRECIPITATION,YEAR
80,HLS,HOLLISTER,0.02,0.36,0.22,0.22,,1.33,1.14,0.02,0.0,0.0,0.01,0.26,0.325455,2014
105,MKV,MARKLEEVILLE,,,,,,1.0,,,,,,,1.0,2013
163,TRM,TERMINUS DAM,0.28,0.91,2.99,0.71,0.87,0.49,0.02,0.51,0.0,0.06,0.0,0.06,0.575,2013
75,HSD,HENSHAW DAM,0.7,3.97,2.91,5.45,16.48,2.44,0.36,2.56,0.0,0.05,0.0,0.29,2.934167,2019
7,BFS,BARSTOW,0.86,0.1,1.03,2.48,0.75,0.0,0.0,0.0,0.0,0.0,0.01,0.06,0.440833,2017
117,MNH,MOUNTAIN HOME,0.38,3.14,0.03,3.97,0.95,4.87,3.15,0.58,0.43,0.0,0.0,0.01,1.459167,2018
71,GVL,GROVELAND R S,0.8,2.84,6.38,9.7,1.67,10.17,3.02,,,,,,4.94,2016
176,YSV,YOSEMITE HEADQUARTERS,3.37,3.74,9.05,9.53,1.87,7.46,2.7,0.54,0.51,0.0,0.0,0.03,3.233333,2016
21,CCH,CACHUMA LAKE,5.23,0.87,5.88,1.0,0.51,0.08,0.36,0.26,0.42,0.03,0.0,,1.330909,2015
106,MLD,MCCLOUD,1.6,10.53,13.56,0.87,0.56,2.97,1.64,2.22,2.19,0.26,0.0,2.68,3.256667,2013


In [None]:
precipitation_data.all_years_precipitation_data.sample()

Unnamed: 0,STATION_ID,STATION NAME,OCT,NOV,DEC,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,AVERAGE_YEARLY_PRECIPITATION,YEAR
119,NDL,NEEDLES,0.1,0.0,0.39,0.67,0.45,,,,0.21,,,,0.303333,2015


In [None]:
precipitation_data.all_years_precipitation_data.shape


(1767, 16)

In [None]:
precipitation_data.all_years_precipitation_data.YEAR.unique()

array([2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022])

In [None]:
precipitation_data.get_precipitation_station_data('daily')

Unnamed: 0,STATION,STATION_ID,LATITUDE,LONGITUDE,COUNTY,RIVER BASIN
2,ACTON,ACN,34.445999,-118.199997,LOS ANGELES,SANTA CLARA R
3,ADIN MOUNTAIN,ADM,41.237000,-120.792000,MODOC,PIT R
4,AGNEW PASS,AGP,37.726631,-119.141731,MADERA,SAN JOAQUIN R
5,AL SMITH CANAL,ASA,40.483925,-121.855339,SHASTA,BATTLE CREEK
6,ALAMO POWER PLANT,ALA,34.320900,-118.221199,LOS ANGELES,SAN GABRIEL R
...,...,...,...,...,...,...
1090,YUBA RIVER NEAR MARYSVILLE,MRY,39.175724,-121.524963,YUBA,YUBA R
1091,YUBA RIVER NEAR SMARTVILLE,YRS,39.235172,-121.274124,YUBA,YUBA R
1092,YUCCA VALLEY,YUC,34.123001,-116.407997,SAN BERNARDINO,WHITEWATER R
1093,ZAPATO CHINO,ZPC,36.015999,-120.292999,FRESNO,LOS GATOS CR (NEAR COALINGA)


In [None]:
precipitation_data.get_precipitation_station_data('monthly')

Unnamed: 0,STATION,STATION_ID,LATITUDE,LONGITUDE,COUNTY,OPERATOR AGENCY
2,CRESCENT CITY MC NAMARA FIELD,CEC,41.779999,-124.239998,DEL NORTE,National Weather Service
3,YREKA,YRK,41.700001,-122.633003,SISKIYOU,US Forest Service
4,HAPPY CAMP RS,HAP,41.799999,-123.366997,SISKIYOU,US Forest Service
5,KLAMATH RIVER AT ORLEANS,OLS,41.303459,-123.534500,HUMBOLDT,USGS/DWR-DFM-Hydro-SMN
6,CALLAHAN,CAL,41.317001,-122.800003,SISKIYOU,National Weather Service
...,...,...,...,...,...,...
200,TWENTYNINE PALMS,29P,34.126999,-116.037003,SAN BERNARDINO,National Weather Service
201,NEEDLES,NDL,34.833000,-114.599998,SAN BERNARDINO,National Weather Service
202,BLYTHE,BLY,33.617001,-114.717003,RIVERSIDE,CA Dept of Forestry and Fire Protection
203,IMPERIAL VALLEY,IMP,32.849998,-115.570000,IMPERIAL,National Weather Service


In [None]:
precipitation_data.retrieve_merge_precipitation_stations()


Unnamed: 0,STATION_ID,STATION NAME,OCT,NOV,DEC,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,AVERAGE_YEARLY_PRECIPITATION,YEAR,LATITUDE,LONGITUDE,COUNTY
0,ALT,ALTURAS RS,0.29,1.48,2.68,0.48,0.25,0.32,1.44,1.58,0.36,0.00,0.72,0.51,0.842500,2013,41.500000,-120.550003,MODOC
1,ALT,ALTURAS RS,0.17,0.29,0.17,0.12,1.48,2.16,0.62,0.61,0.42,0.41,0.46,0.36,0.605833,2014,41.500000,-120.550003,MODOC
2,ALT,ALTURAS RS,1.45,0.93,2.13,0.11,1.02,0.53,0.55,2.60,0.59,1.17,0.27,0.36,0.975833,2015,41.500000,-120.550003,MODOC
3,ALT,ALTURAS RS,1.32,1.15,3.51,2.00,0.50,1.84,1.32,1.17,0.56,0.05,0.00,0.03,1.120833,2016,41.500000,-120.550003,MODOC
4,ALT,ALTURAS RS,1.47,0.53,1.75,2.50,2.95,1.18,1.51,0.19,1.16,0.82,0.53,0.31,1.241667,2017,41.500000,-120.550003,MODOC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1663,NMQ,NEW MELONES DAM HQ,0.57,6.86,2.49,6.31,12.95,8.24,1.14,4.35,0.00,0.00,0.00,0.47,3.615000,2019,38.000000,-120.489998,TUOLUMNE
1664,NMQ,NEW MELONES DAM HQ,0.00,1.90,8.79,1.92,0.05,5.60,2.87,2.35,0.00,0.00,0.01,0.01,1.958333,2020,38.000000,-120.489998,TUOLUMNE
1665,NMQ,NEW MELONES DAM HQ,0.00,1.34,2.52,6.11,2.01,2.81,0.23,0.07,0.01,0.00,0.00,0.02,1.260000,2021,38.000000,-120.489998,TUOLUMNE
1666,NMQ,NEW MELONES DAM HQ,2.43,0.82,8.38,0.04,,,,,,,,,2.917500,2022,38.000000,-120.489998,TUOLUMNE


### The below are functions that were then transformed into methids of a class and do not need to be created any more

### SECTION 1 Scrape Precipitation data from daily reporting stations
- Web scraping with BeautifulSoup4 
- The trick is to understanding the HTML structure to rerieve the rows of data
- Data is retieved from the daily reporting site
-  We scrape the data at weekly level

In [None]:
def scrape_monthly_precipitation_data(year_start, year_end):
    """
        This function loops through a set of years in a list
        It creates URLS at yearly level for precipitation In each URL, the data is at monthly level
        It creates one dataframe containing precipitation data at monthly level for years for which we have data
    """
    
    all_years_precipitation_data = pd.DataFrame()
    for curr_year in range(year_start,year_end + 1):
    
        url=f"https://cdec.water.ca.gov/reportapp/javareports?name=PRECIPMON.{curr_year}"

        # Make a GET request to fetch the raw HTML content
        html_content = requests.get(url).text

        # Parse the html content
        soup = BeautifulSoup(html_content, "lxml")

        precipitation_table = soup.find("table", attrs={"id":"data", "class": "data"})
      
        if precipitation_table is None:
                continue
        
        precipitation_table_header = precipitation_table.thead.find_all("th")  
        precipitation_table_header = [th.text for th in precipitation_table_header]
        precipitation_table_header = precipitation_table_header[1:]
        precipitation_table_rows = precipitation_table.find_all('tr')
        all_rows_list = []
        for eachTableRow in precipitation_table_rows:
            this_row = []
            for td in eachTableRow.find_all("td"):
                this_row.append(td.text.strip())

            if this_row and len(this_row) > 1:
                all_rows_list.append(this_row)

        data_table = pd.DataFrame(all_rows_list )
        data_table.columns = precipitation_table_header
        for col in ['OCT', 'NOV', 'DEC', 'JAN', 'FEB', 'MAR','APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP']:
            data_table[col] = pd.to_numeric(data_table[col], errors='coerce')
        data_table['AVERAGE_YEARLY_PRECIPITATION'] =  data_table[ ['OCT', 'NOV', 'DEC', 'JAN', 'FEB', 'MAR','APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP']].mean(axis='columns')
        data_table['YEAR'] = curr_year
        data_table.rename(columns = {'STATION ID':'STATION_ID'}, inplace=True)
      
        if all_years_precipitation_data.empty:
                all_years_precipitation_data = data_table 
        else:
                all_years_precipitation_data  = all_years_precipitation_data.append(data_table)
    return all_years_precipitation_data


### Get Monthly Precipitation Reporting Station locations using webscraping

In [None]:
def get_precipitation_station_data(level):
    
    if level == 'daily':
        url=f"https://cdec.water.ca.gov/reportapp/javareports?name=DailyStations"
    else:
        url=  "https://cdec.water.ca.gov/reportapp/javareports?name=MonthlyPrecip"


    # Make a GET request to fetch the raw HTML content
    html_content = requests.get(url).text

    # Parse the html content
    soup = BeautifulSoup(html_content, "lxml")

    if level == 'daily':
            station_table = soup.find("table", attrs={"id":"DLY_STNLIST", "class": "data"})
    else:
            station_table = soup.find("table", attrs={"id":"REALPRECIP_LIST", "class": "data"})

    all_rows_list = []
    for eachRow in station_table.find_all("tr"):
        this_row = []
        for  td in eachRow.find_all("td"):
            this_row.append(td.text.strip())

        if this_row and len(this_row) > 1:
             all_rows_list.append(this_row)
    station_table = pd.DataFrame(all_rows_list )
    station_table.columns = station_table.iloc[0,:]
    station_table =  station_table.iloc[2:,:].copy()
    station_table.rename(columns={'ID':'STATION_ID'}, inplace=True)
    station_table.drop(columns=['ELEV(FEET)'], inplace = True)
    return station_table


In [None]:
def retrieve_merge_precipitation_stations(): 
    all_years_precipitation_data = scrape_monthly_precipitation_data(2013, 2022) #1767


    daily_station_table = get_precipitation_station_data('daily')
    monthly_station_table = get_precipitation_station_data('monthly')

    full_station_table = daily_station_table.append(monthly_station_table)
    group_full_station_count_df = full_station_table.groupby(['STATION', 'STATION_ID','LATITUDE', 'LONGITUDE', 'COUNTY' ]).agg(count_latitude=('LATITUDE', 'count')).reset_index()
    #Making sure we do not have duplicates
    #group_full_station_count_df[group_full_station_count_df.station_id.str.contains('ASM|ATW|BFK|BAL|YSV')]
    group_full_station_count_df.drop(columns=['count_latitude'], inplace=True)

    all_years_precipitation_station = all_years_precipitation_data.merge(group_full_station_count_df, how='inner', left_on='STATION_ID', right_on='STATION_ID')
    all_years_precipitation_station.drop(columns=[ 'STATION'], inplace=True)
    return all_years_precipitation_station

In [None]:
#list(set(all_years_precipitation_data.station_id) - set(monthly_station_table.station_id))
#list(set(all_years_precipitation_data.station_id) - set(full_station_table.station_id))
# There are stations not found on the site either ['MTP', 'CHL', 'LSB', 'HRR', 'GNV', 'LGB', 'LYT']
# all_years_precipitation_station = all_years_precipitation_data.merge(monthly_station_table, how='left', left_on='STATION_ID', right_on='STATION_ID', indicator=True)
# all_years_precipitation_station[all_years_precipitation_station['_merge']  != 'both'].station_id.unique()

In [None]:
#all_years_precipitation_station.describe()

In [None]:
def save_precipitation_data(all_years_precipitation_station):
    all_years_precipitation_station.to_csv(r"/work/milestone2_waterwells_deepnote/assets/inputs/precipitation/precipitation_stations.csv", index=False)

In [None]:
save_precipitation_data(all_years_precipitation_station)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=b042e2da-6536-449d-95b8-d85fa08825de' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>