# Simple training example

In [1]:
import asyncio
from datetime import timedelta
from kilroyshare import Face
from kilroylib.modules import Module
from kilroylib.data import MemoryCachingDatasetFactory, FileCachingDatasetFactory
from kilroylib.training.offline import OfflineTrainer, PostsLoader
from kilroylib.training.online import OnlineTrainer, PostGenerator, PostScheduler
from kilroylib.training.stop import MaxEpochsStopCondition, MaxUpdatesStopCondition

In [2]:
async def dummy_wait():
    await asyncio.sleep(0)  # shows that you can await things without any problem

### Face definition

In [3]:
class MyFace(Face[int, bool]):
    def __init__(self):
        self.i = 0
        self.posts = {}
    
    async def scrap(self, limit = None):
        n = limit or 100
        self.i = n+1
        for i in range(n):
            await dummy_wait()
            yield i, i % 3 == 0  # every third is True

    async def post(self, data):
        post_id = self.i
        self.i += 1
        self.posts[post_id] = data
        await dummy_wait()
        return post_id

    async def score(self, post_id):
        await dummy_wait()
        return int(self.posts[post_id]) * 2 - 1  # True -> +1, False -> -1

When scrapping posts every third post has x equal True, so around $33\%$ of posts have x equal True. The module should learn that distribution after offline training.

When scoring posts all posts with x equal True are scored $+1$, all posts with x equal False are scored $-1$. The module should learn to generate posts with True only after online training.

### Module definition

In [4]:
import random

class MyModule(Module[int, bool]):
    def __init__(self):
        self.p = 0.5  # Bernoulli distribution parameter
        self.i = 0
        self.posts = {}
    
    async def generate(self, n=1):
        # generate according to parameter
        posts = random.choices([True, False], weights=[self.p, 1-self.p], k=n)
        for post in posts:
            self.posts[self.i] = post
            await dummy_wait()
            yield self.i, post
            self.i += 1

    async def mimic(self, posts):
        # estimate new parameter from posts and step towards it
        p_est = sum(int(post) for post in posts) / len(posts)
        self.p = max(min(self.p + 0.001 * (p_est - self.p), 1), 0)
        await dummy_wait()
        return self

    async def reinforce(self, scores):
        # update parameter according to scores
        scores = {self.posts[post_id]: score for post_id, score in scores.items()}
        diff = sum((int(x) - self.p) * score for x, score in scores.items()) / len(scores)
        self.p = max(min(self.p + 0.1 * diff, 1), 0)
        await dummy_wait()
        return self

Simple Bernoulli distribution. 

When mimicking the module steps towards parameter estimated from real posts. 

When reinforcing the module steps in the right direction depending on scores.

### Setup

In [5]:
face = MyFace()
module = MyModule()
offline_trainer = OfflineTrainer(
    stop_condition=MaxEpochsStopCondition(100),
    posts_loader=PostsLoader(batch_size=1, dataset_factory=MemoryCachingDatasetFactory())
)
online_trainer = OnlineTrainer(
    stop_condition=MaxUpdatesStopCondition(100),
    generator=PostGenerator(10),
    scheduler=PostScheduler(timedelta(seconds=0))
)

In [6]:
module.p

0.5

Initial parameter value.

### Offline training

In [7]:
module = await offline_trainer.train(module, face)

In [8]:
module.p

0.34001284216836875

After offline training the parameter is around $0.33$, as it should be.

### Online training

In [9]:
module = await online_trainer.train(module, face)

In [10]:
module.p

0.9999995369167228

Ater online training the parameter is around $1$, also as it should be.