In [1]:
import os
import datetime
import numpy as np
import pandas as pd
from scipy import spatial
import netCDF4 as nc
from matplotlib import pyplot as plt
from sklearn.metrics import roc_curve, auc
import read_data, concatenation, radar

import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

In [2]:
radar_panel = \
read_data.radar_to_panel( '../data/data_20140811.nc',
               var_name='radar', time_unit='ns', dim_map = {'lat' : 'latitude', 'lon' :
                                             'longitude', 'time' : 'time'})

# Read vehicle data
veh = pd.read_csv('../data/camera_observations_1m.csv')
veh['Time'] = pd.to_datetime(veh['Time'])
veh.set_index('Time', inplace=True, drop=True)

# Correct vehicle data
ymin = radar_panel.major_axis.min()
ymax = radar_panel.major_axis.max()
xmin = radar_panel.minor_axis.min()
xmax = radar_panel.minor_axis.max()
veh = veh[(veh['Latitude'] > ymin) & (veh['Latitude'] < ymax) &
        (veh['Longitude'] > xmin) & (veh['Longitude'] < xmax)]
veh = veh[veh['GPS_Speed'] > 1]
invalid = [10150, 10151, 10589, 10139, 10615]
veh = veh[~veh['Device'].isin(invalid)]
veh['radar'] = veh['radar'].fillna(0)
veh = veh.dropna()
veh811 = veh.loc['2014-08-11']
veh612 = veh.loc['2014-06-12']
veh628 = veh.loc['2014-06-28']

# Read aggregated hi-resolution
comparison = pd.read_csv('../data/camera_observations_comparison.csv')
comparison['Time'] = pd.to_datetime(comparison['Time'])
comparison.set_index('Time', inplace=True)
comparison['radar'] = comparison['radar'].fillna(0)
comparison_sub = comparison.iloc[:, [0,1,2,3,4,5,-1,7,8]]
comparison_sub.columns = veh.columns
combined = pd.concat([comparison_sub, veh612, veh628])

# TPR and FPR when only the 1-minute resolution dataset is used

In [3]:
# When the wiper data is resampled to regular 1-minute increments, it sometimes "spills over"
# into neighboring intervals due to aggregation. We can use a threshold so that intervals containing very
# small fraction of wiper activity are not counted as raining
# Not accounting for "spill over", set alpha to 0
alpha = 0.0

In [4]:
perf = {'Wiper' : {}, 'Radar' : {}, 'Gage' : {}}

perf['Wiper']['TPR'] = ((veh['Wiper'] > alpha) & (veh['camera_rain'] != 0)).sum() / (veh['camera_rain'] != 0).sum()
perf['Wiper']['TNR'] = ((veh['Wiper'] <= alpha) & (veh['camera_rain'] == 0)).sum() / (veh['camera_rain'] == 0).sum()
perf['Radar']['TPR'] = ((veh['radar'] > 0.0) & (veh['camera_rain'] != 0)).sum() / (veh['camera_rain'] != 0).sum()
perf['Radar']['TNR'] = ((veh['radar'] == 0.0) & (veh['camera_rain'] == 0)).sum() / (veh['camera_rain'] == 0).sum()
perf['Gage']['TPR'] = ((veh['gage'] > 0.0) & (veh['camera_rain'] != 0)).sum() / (veh['camera_rain'] != 0).sum()
perf['Gage']['TNR'] = ((veh['gage'] == 0.0) & (veh['camera_rain'] == 0)).sum() / (veh['camera_rain'] == 0).sum()
pd.DataFrame.from_dict(perf).sort_index(ascending=False)

Unnamed: 0,Gage,Radar,Wiper
TPR,0.445093,0.89486,0.931075
TNR,0.96742,0.97547,0.982369


# TPR and FPR when 2.4-second resolution ground-truth data are aggregated to 1-minute increments and used for 2014-08-11

#### Note that the 2.4-second resolution data is aggregated to 1-minute increments

In [5]:
alpha = 0.0

In [6]:
perf = {'Wiper' : {}, 'Radar' : {}, 'Gage' : {}}

perf['Wiper']['TPR'] = ((combined['Wiper'] > alpha) & (combined['camera_rain'] != 0)).sum() / (veh['camera_rain'] != 0).sum()
perf['Wiper']['TNR'] = ((combined['Wiper'] <= alpha) & (combined['camera_rain'] == 0)).sum() / (veh['camera_rain'] == 0).sum()
perf['Radar']['TPR'] = ((combined['radar'] > 0.0) & (combined['camera_rain'] != 0)).sum() / (veh['camera_rain'] != 0).sum()
perf['Radar']['TNR'] = ((combined['radar'] == 0.0) & (combined['camera_rain'] == 0)).sum() / (veh['camera_rain'] == 0).sum()
perf['Gage']['TPR'] = ((combined['gage'] > 0.0) & (combined['camera_rain'] != 0)).sum() / (veh['camera_rain'] != 0).sum()
perf['Gage']['TNR'] = ((combined['gage'] == 0.0) & (combined['camera_rain'] == 0)).sum() / (veh['camera_rain'] == 0).sum()
pd.DataFrame.from_dict(perf).sort_index(ascending=False)

Unnamed: 0,Gage,Radar,Wiper
TPR,0.476636,0.929907,0.949766
TNR,0.951322,0.955922,0.966654


# Cross comparison of camera observations

In [7]:
# True positive rate of low-resolution camera observations compared to aggregated hi-resolution ground truth
((comparison['camera_lowres'] > 0.0) & (comparison['camera_hires'] > 0.0)).sum() / (comparison['camera_hires'] > 0.0).sum()

0.9260115606936417

In [8]:
# True negative rate of low-resolution camera observations compared to aggregated hi-resolution ground truth
((comparison['camera_lowres'] <= 0.0) & (comparison['camera_hires'] <= 0.0)).sum() / (comparison['camera_hires'] <= 0.0).sum()

0.9925788497217068

In [9]:
# Fraction of time where both labelers detect rainfall vs. time where either labeler detects rainfall
((comparison['camera_lowres'] > 0.0) & (comparison['camera_hires'] > 0.0)).sum() / ((comparison['camera_lowres'] > 0.0) | (comparison['camera_hires'] > 0.0)).sum()

0.9217491369390104

In [10]:
# Fraction of time where neither labeler detects rainfall vs. time where either labeler detects rainfall
((comparison['camera_lowres'] <= 0.0) & (comparison['camera_hires'] <= 0.0)).sum() / ((comparison['camera_lowres'] <= 0.0) | (comparison['camera_hires'] <= 0.0)).sum()

0.8872305140961857