# Simple training example

In [1]:
from datetime import timedelta
from kilroyshare import Face, OfflineModule, OnlineModule
from kilroylib.data import MemoryCachingDatasetFactory, FileCachingDatasetFactory
from kilroylib.training.offline.trainer import Trainer as OfflineTrainer, PostsLoader
from kilroylib.training.online.trainer import (
    Trainer as OnlineTrainer,
    PostGenerator,
    PostScheduler,
)
from kilroylib.training.offline.stop import MaxEpochs
from kilroylib.training.online.stop import MaxUpdates

In [2]:
def mean(it):
    return sum(it) / len(it)

### Face definition

In [3]:
class MyFace(Face[int, bool]):
    def __init__(self):
        self.i = 0
        self.posts = {}

    def scrap(self, limit=None):
        n = limit or 100
        self.i = n + 1
        for i in range(n):
            yield i, i % 3 == 0  # every third is True

    def post(self, data):
        post_id = self.i
        self.i += 1
        self.posts[post_id] = data
        return post_id

    def score(self, post_id):
        return int(self.posts[post_id]) * 2 - 1  # True -> +1, False -> -1

When scrapping posts every third post has x equal True, so around $33\%$ of posts have x equal True. The module should learn that distribution after offline training.

When scoring posts all posts with x equal True are scored $+1$, all posts with x equal False are scored $-1$. The module should learn to generate posts with True only after online training.

### Module definition

In [4]:
import random


class Model:
    def __init__(self, p=0.5):
        self._p = p

    @property
    def p(self):
        return self._p

    @p.setter
    def p(self, value):
        self._p = max(min(value, 1), 0)


class MyOfflineModule(OfflineModule[bool]):
    def __init__(self, model, alpha=0.001):
        self.model = model
        self.deltas = []
        self.alpha = alpha

    def fit(self, posts):
        p_est = sum(int(post) for post in posts) / len(posts)
        self.deltas.append(p_est - self.model.p)

    def step(self):
        self.model.p = self.model.p + self.alpha * mean(self.deltas)
        self.deltas = []
        return self


class MyOnlineModule(OnlineModule[int, bool]):
    def __init__(self, model, alpha=0.1):
        self.model = model
        self.deltas = []
        self.alpha = alpha
        self.posts = {}
        self.i = 0

    def sample(self, n=1):
        posts = random.choices(
            [True, False], weights=[self.model.p, 1 - self.model.p], k=n
        )
        for post in posts:
            self.posts[self.i] = post
            yield self.i, post
            self.i += 1

    def fit(self, scores):
        scores = {self.posts[post_id]: score for post_id, score in scores.items()}
        delta = sum(
            (int(x) - self.model.p) * score for x, score in scores.items()
        ) / len(scores)
        self.deltas.append(delta)

    def step(self):
        self.model.p = self.model.p + self.alpha * mean(self.deltas)
        self.deltas = []
        return self

Simple Bernoulli distribution. 

When offline training the module steps towards parameter estimated from real posts. 

When online training the module steps in the right direction depending on scores.

### Setup

In [5]:
face = MyFace()
model = Model()
offline_module = MyOfflineModule(model)
online_module = MyOnlineModule(model)
offline_trainer = OfflineTrainer(
    stop_condition=MaxEpochs(100),
    posts_loader=PostsLoader(
        batch_size=1, dataset_factory=MemoryCachingDatasetFactory()
    ),
)
online_trainer = OnlineTrainer(
    stop_condition=MaxUpdates(100),
    generator=PostGenerator(10),
    scheduler=PostScheduler(timedelta(seconds=0)),
)

In [6]:
await face.init()

In [7]:
model.p

0.5

Initial parameter value.

### Offline training

In [8]:
offline_module = await offline_trainer.train(offline_module, face)

In [9]:
model.p

0.34001284216836875

After offline training the parameter is around $0.33$, as it should be.

### Online training

In [10]:
online_module = await online_trainer.train(online_module, face)

In [11]:
model.p

0.9999995369167228

Ater online training the parameter is around $1$, also as it should be.