# Active learning

In [16]:
import inspect
from river import conf
from river import datasets
from river import linear_model
from river import metrics
from river import preprocessing
from river import stats

dataset = datasets.TrumpApproval()

model = conf.RegressionJackknife(
    (
        preprocessing.StandardScaler() |
        linear_model.LinearRegression(intercept_lr=.1)
    ),
    confidence_level=0.9
)

inspect.signature(model.learn_one)

<Signature (x, y)>

In [17]:
arg_info = inspect.getfullargspec(model.learn_one)
arg_info

FullArgSpec(args=['self', 'x', 'y'], varargs=None, varkw=None, defaults=None, kwonlyargs=[], kwonlydefaults=None, annotations={})

In [7]:
from river import datasets
from river import evaluate
from river import imblearn
from river import metrics
from river import preprocessing
from river import rules

model = (
    preprocessing.StandardScaler() |
    imblearn.ChebyshevUnderSampler(
        regressor=rules.AMRules(
            n_min=50, delta=0.01,
        ),
        seed=42
    )
)
inspect.signature(model.learn_one)

<Signature (x: 'dict', y=None, **params)>

In [15]:
arg_info = inspect.getfullargspec(model.learn_one)
arg_info

FullArgSpec(args=['self', 'x', 'y'], varargs=None, varkw='params', defaults=(None,), kwonlyargs=[], kwonlydefaults=None, annotations={'x': 'dict'})

## Online active learning

In [3]:
from river import datasets

dataset = datasets.SMSSpam()
for x, y in dataset:
    print(x)

Downloading https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip (198.65 KB)
Uncompressing into /Users/max/river_data/SMSSpam
{'body': 'Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...\n'}
{'body': 'Ok lar... Joking wif u oni...\n'}
{'body': "Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's\n"}
{'body': 'U dun say so early hor... U c already then say...\n'}
{'body': "Nah I don't think he goes to usf, he lives around here though\n"}
{'body': "FreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, £1.50 to rcv\n"}
{'body': 'Even my brother is not like to speak with me. They treat me like aids patent.\n'}
{'body': "As per your request 'Melle Melle (Oru Minnaminunginte Nurungu Vettam)' has been set as your calle

This is well summarized in the following schema from [Online Active Learning Methods for Fast Label-Efficient Spam Filtering](https://citeseerx.ist.psu.edu/document?repid=rep1&type=pdf&doi=6fef6272cd72292e2f5a54d02d6e5352664e20cb).

<div align="center">
    <img width="50%" src="../img/online_active_learning.png" />
</div>

In [1]:
from river import linear_model

model = linear_model.LogisticRegression()
model

LogisticRegression (
  optimizer=SGD (
    lr=Constant (
      learning_rate=0.01
    )
  )
  loss=Log (
    weight_pos=1.
    weight_neg=1.
  )
  l2=0.
  l1=0.
  intercept_init=0.
  intercept_lr=Constant (
    learning_rate=0.01
  )
  clip_gradient=1e+12
  initializer=Zeros ()
)

## Reduce training time

#

## Production considerations