# Working with raw precipitation versus Synoptic's derived precipitation product

Here's an example that illustrates some of the challenges of working with real time precipitation data from stations across many different networks.

In [2]:
import requests
import json
import pandas as pd

# Our main plotting package (must have explicit import of submodules)
import bokeh.io
import bokeh.plotting
from bokeh.layouts import column
# Enable viewing Bokeh plots in the notebook
bokeh.io.output_notebook()

# Example case: Missoula, MT

For this example we'll focus on a wet weekend in Missoula, MT back in June of 2022. First let's take a look at all the stations in Missoula County and see the number of ways in which precipitation is being reported.

In [3]:
token = '07a920b581a1444a97ab4b722d6c9ed9'
url = 'https://api.synopticdata.com/v2/stations/metadata?'
params = {'state': 'mt',
          'county': 'missoula',
          'sensorvars': 1,
          'token': token}
r = requests.get(url, params=params)
data = r.json()

In [4]:
precip_vars = []
for station in data['STATION']:
  for var in station['SENSOR_VARIABLES']:
    if 'precip' in var:
      precip_vars.append(var)

In [5]:
set(precip_vars)

{'precip_accum',
 'precip_accum_12_hour',
 'precip_accum_24_hour',
 'precip_accum_30_minute',
 'precip_accum_fifteen_minute',
 'precip_accum_five_minute',
 'precip_accum_one_hour',
 'precip_accum_since_00utc',
 'precip_accum_since_7_local',
 'precip_accum_since_local_midnight',
 'precip_accum_six_hour',
 'precip_accum_three_hour',
 'precip_smoothed'}

##Challenges dealing with stations reporting precipitation in different formats
Now that we see how many different ways stations in Missoula County report precipitation, lets dig into just a few of them to demonstrate the challenges working with these different data formats.

In this request we'll grab data from 3 stations which I know report three different formats:

**KMSO** - ASOS station reporting `precip_accum_one_hour`

**BLMM7** - RAWS station reporting `precip_accum`

**C4884** - CWOP station reporting `precip_accum_since_local_midnight`


In [18]:
start = '202406160000'
end = '202406200000'

url = 'https://api.synopticdata.com/v2/stations/timeseries?'
params = {'stids': 'KMSO,BLMM8,C4884',
          'vars':'precip_accum_one_hour,precip_accum,precip_accum_since_local_midnight',
          'start': start,
          'end': end,
          'token': token}
r = requests.get(url, params=params)
data = r.json()

In [12]:
data['SUMMARY']

{'NUMBER_OF_OBJECTS': 3,
 'RESPONSE_CODE': 1,
 'RESPONSE_MESSAGE': 'OK',
 'METADATA_PARSE_TIME': '0.2 ms',
 'METADATA_DB_QUERY_TIME': '3.0 ms',
 'DATA_QUERY_TIME': '40.5 ms',
 'QC_QUERY_TIME': '27.6 ms',
 'DATA_PARSING_TIME': '15.5 ms',
 'TOTAL_DATA_TIME': '83.7 ms',
 'VERSION': 'v2.25.1'}

Let's create a dataframe with station data

In [9]:
data.keys()

dict_keys(['SUMMARY'])

In [16]:
def build_df(data):
  i = 0
  for station in data['STATION']:
    # Add data
    df = pd.DataFrame()
    datetime = pd.to_datetime(station['OBSERVATIONS']['date_time'], format='ISO8601')
    multi_index = pd.MultiIndex.from_product([[station['STID']], datetime], names=['STID','date_time'])
    if i==0:
      data_df = pd.DataFrame(station['OBSERVATIONS'], index=multi_index)
    else:
      data_df = pd.concat([data_df, pd.DataFrame(station['OBSERVATIONS'], index=multi_index)], axis=0)
    i+=1

  # Sort the resulting data dataframe by time
  data_df.sort_index(inplace=True)
  data_df.drop(['date_time'], axis=1, inplace=True)

  return data_df

data_df = build_df(data)

Visualizing the precip timeseries records from the three stations demonstrates the challenge clearly

In [17]:
# create three plots
p1 = bokeh.plotting.figure(
    width=450,
    height=250,
    x_axis_label='Date/time',
    y_axis_label=data['UNITS']['precip_accum'],
    x_axis_type='datetime')
p1.line(data_df.loc['BLMM8'].index, data_df.loc['BLMM8']['precip_accum_set_1'], line_color='blue', line_width=2, legend_label='BLMM8')

p2 = bokeh.plotting.figure(
    width=450,
    height=250,
    x_axis_label='Date/time',
    y_axis_label=data['UNITS']['precip_accum_one_hour'],
    x_axis_type='datetime')
p2.line(data_df.loc['KMSO'].index, data_df.loc['KMSO']['precip_accum_one_hour_set_1'], line_color='green', line_width=2, legend_label='KMSO')

p3 = bokeh.plotting.figure(
    width=450,
    height=250,
    x_axis_label='Date/time',
    y_axis_label=data['UNITS']['precip_accum_since_local_midnight'],
    x_axis_type='datetime')
p3.line(data_df.loc['C4884'].index, data_df.loc['C4884']['precip_accum_since_local_midnight_set_1'], line_color='red', line_width=2, legend_label='C4884')

# put the results in a column and show
bokeh.io.show(column(p1, p2, p3))

# One solution: derived precipitation

Synoptic processes precipitation observations in real time, building a derived precipitation product that collapses different precipitation reporting into a consistent format. The derived precipitation product is accessed from the Timeseries endpoint by setting the `precip` argument.

In [19]:
params2 = {'stid': 'KMSO,C4884,BLMM8',
           'start': start,
           'end': end,
           'precip':'1',
           'vars': 'air_temp',
           'sensorvars':'1',
           'token': token}
r2 = requests.get(url, params=params2)
data2 = r2.json()

In [20]:
data2['STATION'][0]['SENSOR_VARIABLES']

{'air_temp': {'air_temp_set_1': {'position': '2.0',
   'PERIOD_OF_RECORD': {'start': '1997-04-12T12:55:00Z',
    'end': '2025-02-03T20:40:00Z'}}},
 'precip_accumulated': {'precip_accumulated_set_1d': {'derived_from': 'precip_accum_one_hour',
   'position': ''}},
 'precip_intervals': {'precip_intervals_set_1d': {'derived_from': 'precip_accum_one_hour',
   'position': ''}}}

Let's plot up the resulting precip accumulations

In [21]:
data_df2 = build_df(data2)

In [22]:
p = bokeh.plotting.figure(
    width=600,
    height=400,
    x_axis_label='Date/time',
    y_axis_label=data['UNITS']['precip_accum'],
    x_axis_type='datetime')
p.line(data_df2.loc['BLMM8'].index, data_df2.loc['BLMM8']['precip_accumulated_set_1d'], legend_label='BLMM8', line_color='blue', line_width=2)
p.line(data_df2.loc['KMSO'].index, data_df2.loc['KMSO']['precip_accumulated_set_1d'], legend_label='KMSO', line_color='green', line_width=2)
p.line(data_df2.loc['C4884'].index, data_df2.loc['C4884']['precip_accumulated_set_1d'], legend_label='C4884', line_color='red', line_width=2)
p.legend.location = "top_left"
bokeh.io.show(p)

Despite the different reporting formats among the 3 stations, the measured precipitation shows a similar pattern over the weekend.