### Imports

In [31]:
from rich import print
from river import stream, compose, preprocessing, neighbors
from river import evaluate
from river import metrics

### Load and Preprocess Data

In [32]:
dataset_path = 'data/air_pollution_dataset.csv'

# Wind direction mapping
mapping = {'SE': 1, 'cv': 2, 'NW': 3, 'NE': 4}
converter = lambda x: float(mapping[x])

pollution_stream = stream.iter_csv(dataset_path, target='pollution', converters={'pollution': float, 'dew': float,
                                                                                 'temp': float, 'press': float, 
                                                                                 'wnd_dir': converter, 'wnd_spd': float,
                                                                                 'snow': float, 'rain': float})
sample, target = next(pollution_stream)
print(sample)
print(target)

In [33]:
# Discarding date column and scaling the data
to_discard=['date']
model = compose.Pipeline(compose.Discard(*to_discard), neighbors.KNNRegressor(n_neighbors=5))

# Check the model's samples after preprocessing
for x, y in pollution_stream:
    model.learn_one(x, y)
    transformed_x = model.transform_one(x)
    print("Transformed:", {key: "{:.5f}".format(value) for key, value in transformed_x.items()})
    print("Predicted: ", model.predict_one(x))
    print("True Label: ", y)
    break

print(evaluate.progressive_val_score(dataset=pollution_stream, model=model, metric=metrics.MAE(), print_every=1000))


# Manually transform and inspect some samples
'''for x, y in pollution_stream:
    print(x)
    # Transform the sample using the pipeline
    transformed_x = model.transform_one(x)
    
    # Print the transformed sample along with its target label
    print("Original Sample:", x)
    print("Transformed Sample:", transformed_x)
    print("Target Label:", y)
    
    # Break the loop after transforming a few samples'''


[1,000] MAE: 35.722067
[2,000] MAE: 37.475633
[3,000] MAE: 38.687422
[4,000] MAE: 39.676217
[5,000] MAE: 39.863693
[6,000] MAE: 39.646644
[7,000] MAE: 41.483667
[8,000] MAE: 42.522958
[9,000] MAE: 41.335496
[10,000] MAE: 40.734227
[11,000] MAE: 41.650442
[12,000] MAE: 41.609956
[13,000] MAE: 41.360651
[14,000] MAE: 42.550048
[15,000] MAE: 42.510618
[16,000] MAE: 43.295392
[17,000] MAE: 43.209745
[18,000] MAE: 43.254615
[19,000] MAE: 43.207309
[20,000] MAE: 42.797523
[21,000] MAE: 42.665832
[22,000] MAE: 42.782185
[23,000] MAE: 42.724951
[24,000] MAE: 42.360786
[25,000] MAE: 42.294131
[26,000] MAE: 42.141633
[27,000] MAE: 42.911477
[28,000] MAE: 43.550638
[29,000] MAE: 44.112057
[30,000] MAE: 43.847549
[31,000] MAE: 43.814189
[32,000] MAE: 43.380058
[33,000] MAE: 43.174844
[34,000] MAE: 43.049873
[35,000] MAE: 42.866242
[36,000] MAE: 42.985952
[37,000] MAE: 43.381948
[38,000] MAE: 43.664812
[39,000] MAE: 43.425479
[40,000] MAE: 43.159242
[41,000] MAE: 42.717797
[42,000] MAE: 42.56633
[4

'for x, y in pollution_stream:\n    print(x)\n    # Transform the sample using the pipeline\n    transformed_x = model.transform_one(x)\n    \n    # Print the transformed sample along with its target label\n    print("Original Sample:", x)\n    print("Transformed Sample:", transformed_x)\n    print("Target Label:", y)\n    \n    # Break the loop after transforming a few samples'