In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from calibration.errormetrics import MAE, MSE, NMSE, NLPD, compute_test_data
from calibration.synthetic import generate_synthetic_dataset, getstaticsensortranform, getmobilesensortranform
from calibration.simple import compute_simple_calibration, compute_simple_predictions


In [9]:
#we can consider a simple network of 4 sensors.
#colocations between 0 and 1 happened from day 1-20
#then between 1 and 2 between 5-25,
#then between 2 and 3 on days 15-40.
X = np.c_[np.arange(1,21)[:,None],np.full(20,0),np.full(20,1)]
X = np.r_[X,np.c_[np.arange(5,26)[:,None],np.full(21,1),np.full(21,2)]]
X = np.r_[X,np.c_[np.arange(15,41)[:,None],np.full(26,2),np.full(26,3)]]
#sensor 0 AND sensor 3 are reference sensors, but we'll only tell the model
#about sensor 0:
refsensor = np.array([1,0,0,0])

#generate some synthetic pollution data
np.set_printoptions(precision=1,suppress=True)
Y = np.repeat(20*np.cos(X[:,0]/20)[:,None],2,1)
Y[X[:,1:]==1]*=2
Y[X[:,1:]==2]*=3

#in a real scenario we will only know true pollution
#data where a reference instrument is. To that end
#we'll use instrument 3 (which we know is a reference
#instrument but haven't told the model about) as a source
#for this trueY:
trueY = np.full(len(Y),np.nan)
keep = X[:,2]==3
trueY[keep]=Y[keep,1]
trueY

array([ nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,
        nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,
        nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan,
        nan,  nan,  nan,  nan,  nan,  nan,  nan,  nan, 14.6, 13.9, 13.2,
       12.4, 11.6, 10.8, 10. ,  9.1,  8.2,  7.2,  6.3,  5.3,  4.4,  3.4,
        2.4,  1.4,  0.4, -0.6, -1.6, -2.6, -3.6, -4.5, -5.5, -6.5, -7.4,
       -8.3])

In [14]:
testX, testY, testtrueY = compute_test_data(X,Y,trueY,refsensor)
#we just keep those that we have a true value for 
#AND those that aren't a reference sensor itself
#(as this we can get 100% accuracy just by reporting
#the same number:
keep = (~np.isnan(testtrueY)[:,0]) & (testX[:,1]!=3)
testX = testX[keep,:]
testY = testY[keep,:]
testtrueY = testtrueY[keep,:]

In [15]:
delta = 8
G,allsp,allcals,allcallists,allpopts,allpcovs,allpoptslists = compute_simple_calibration(X,Y,delta,refsensor,mincolocationsinperiod=1)
preds,res2,res = compute_simple_predictions(testX,testY,testtrueY,allcals,delta)

In [17]:
print("Corrected:")
nmse = NMSE(testtrueY[:,0],preds[:,0])
mse = MSE(testtrueY[:,0],preds[:,0])
mae = MAE(testtrueY[:,0],preds[:,0])
print("nmse=%5.5f mse=%5.2f mae=%5.2f" % (nmse,mse,mae))

print("Raw:")
nmse = NMSE(testtrueY[:,0],testY[:,0])
mse = MSE(testtrueY[:,0],testY[:,0])
mae = MAE(testtrueY[:,0],testY[:,0])
print("nmse=%5.5f mse=%5.2f mae=%5.2f" % (nmse,mse,mae))

Corrected:
nmse=0.00000 mse= 0.00 mae= 0.00
Raw:
nmse=4.00000 mse=251.70 mae=13.49
