In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
import geopandas as gpd
sys.path.append("../../../")
%load_ext autoreload
%autoreload 2

In [3]:
df = pd.read_csv("../dataset/usa_o3.csv")

In [4]:
df.head()

Unnamed: 0,Latitude,Longitude,Sample Measurement
0,37.858932,-87.575291,0.040264
1,40.049604,-75.241209,0.046745
2,42.713897,-87.798634,0.045693
3,38.848232,-121.515236,0.046687
4,39.907891,-75.149066,0.038572


In [5]:
df.shape

(704, 3)

In [6]:
df.dropna(inplace=True)

In [7]:
centroids = np.load("USA_Centroids.npy")

In [8]:
X = df[['Longitude', 'Latitude']].values
y = df[['Sample Measurement']].values * 1000

## Kriging Interpolation

In [9]:
from ToolkitName.interpolate import Kriging
## PyKrige takes Longitude Values in the Range 0 to 360 => Add 360 to our values!
X_krig = X.copy()
X_test_krig = centroids.copy()
X_krig[:, 0] = X_krig[:, 0] + 360
krig = Kriging(variogram_model='spherical', coordinate_type='Geographic')
krig.fit(X_krig, y)

Kriging

In [10]:
krig_predictions = np.zeros(centroids.shape[0])

for ix in np.arange(0, centroids.shape[0], 1000):
    
    if centroids.shape[0] - ix >= 1000: 
        X_test_krig[ix: ix + 1000][:, 0] = X_test_krig[ix: ix + 1000][:, 0] + 360
        krig_predictions[ix: ix + 1000] = krig.predict(X_test_krig[ix:ix + 1000])
    
    else: 
        X_test_krig[ix:][:, 0] = X_test_krig[ix:][:, 0] + 360
        krig_predictions[ix: ] = krig.predict(X_test_krig[ix:])

## Spatial Averaging

In [11]:
from ToolkitName.interpolate import SpatialAverage
spatial = SpatialAverage(coordinate_type='Geographic', radius=16.1)
spatial.fit(X, y)

SpatialAverage

In [12]:
spatial_predictions = np.zeros(centroids.shape[0])

for ix in np.arange(0, centroids.shape[0], 1000):
    
    if centroids.shape[0] - ix >= 1000: 
        spatial_predictions[ix: ix + 1000] = spatial.predict(centroids[ix:ix + 1000]).reshape(1000, )
    
    else: 
        spatial_predictions[ix: ] = spatial.predict(centroids[ix:])

## IDW

In [13]:
from ToolkitName.interpolate import Idw
from ToolkitName.utils.distance import haversine, euclidean
idw = Idw(coordinate_type='Geographic')
idw.fit(X, y)

Idw

In [14]:
idw_predictions = np.zeros(centroids.shape[0])
for ix, data_point in enumerate(centroids):
    distances = haversine(data_point, X)
    mask = distances <= 250
    X_train = X[mask]
    y_train = y[mask]
    idw_predictions[ix] = idw.predict(data_point.reshape(1, 2))

### $k$-NN, with $k = 1$.

In [15]:
from ToolkitName.custom import CustomInterpolator
from sklearn.neighbors import KNeighborsRegressor
knn = CustomInterpolator(KNeighborsRegressor, reg_kwargs={'n_neighbors':1})
knn.fit(X, y)
knn_predictions = knn.predict(centroids)

## Summary Statistics

### Station - Dataset

In [16]:
## Station Mean
print("Station Statistics")
print("------------------")
print("Mean", y.mean())
## Station 25 Percentile
print("25 Percentile", np.percentile(y, 25))
## Station 50 Percentile / Median
print("50 Percentile / Median", np.percentile(y, 50))
## Station 75 Percentile
print("75 Percentile", np.percentile(y, 75))

Station Statistics
------------------
Mean 46.75552599495552
25 Percentile 40.68717611387817
50 Percentile / Median 46.26571303054882
75 Percentile 51.866755238219426


### IDW

In [17]:
## IDW Mean
print("IDW Statistics")
print("------------------")
print("Mean", idw_predictions.mean())
##  25 Percentile
print("25 Percentile", np.percentile(idw_predictions, 25))
##  50 Percentile / Median
print("50 Percentile / Median", np.percentile(idw_predictions, 50))
##  75 Percentile
print("75 Percentile", np.percentile(idw_predictions, 75))

IDW Statistics
------------------
Mean 45.74114403870057
25 Percentile 42.23944166288551
50 Percentile / Median 45.51863263863817
75 Percentile 49.009370937965386


### Spatial Averaging

In [18]:
temp = []
for i in spatial_predictions:
    try:
        j = i[0]
        temp.append(j)
    except:
        temp.append(i)
spatial_predictions = np.array(temp)
t_spatial_predictions = spatial_predictions[~np.isnan(spatial_predictions)]

In [19]:
## Spatial Averaging Mean
print("Spatial Averaging Statistics")
print("------------------")
print("Mean", t_spatial_predictions.mean())
##  25 Percentile
print("25 Percentile", np.percentile(t_spatial_predictions, 25))
##  50 Percentile / Median
print("50 Percentile / Median", np.percentile(t_spatial_predictions, 50))
##  75 Percentile
print("75 Percentile", np.percentile(t_spatial_predictions, 75))

Spatial Averaging Statistics
------------------
Mean 45.18464149438545
25 Percentile 39.94368384670303
50 Percentile / Median 44.333830617433534
75 Percentile 49.245551215277814


### $1$-NN

In [20]:
## KNN Mean
print("KNN Statistics")
print("------------------")
print("Mean", knn_predictions.mean())
##  25 Percentile
print("25 Percentile", np.percentile(knn_predictions, 25))
##  50 Percentile / Median
print("50 Percentile / Median", np.percentile(knn_predictions, 50))
##  75 Percentile
print("75 Percentile", np.percentile(knn_predictions, 75))

KNN Statistics
------------------
Mean 45.22803815395025
25 Percentile 39.06444444444447
50 Percentile / Median 45.15916463909166
75 Percentile 50.27720385674931
