From da7a823081fd604031a72fb9e7ef14233c1883c8 Mon Sep 17 00:00:00 2001 From: Etienne Kintzler Date: Sun, 15 Nov 2020 13:27:23 +0100 Subject: [PATCH 01/14] [WIP] first layout for bandits classes --- benchmarks/Bandits_for_online_learning.ipynb | 469 +++++++++++++++++++ river/expert/__init__.py | 14 +- river/expert/bandit.py | 263 +++++++++++ 3 files changed, 745 insertions(+), 1 deletion(-) create mode 100644 benchmarks/Bandits_for_online_learning.ipynb create mode 100644 river/expert/bandit.py diff --git a/benchmarks/Bandits_for_online_learning.ipynb b/benchmarks/Bandits_for_online_learning.ipynb new file mode 100644 index 0000000000..86e11f92ec --- /dev/null +++ b/benchmarks/Bandits_for_online_learning.ipynb @@ -0,0 +1,469 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import namedtuple\n", + "import itertools \n", + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import tqdm\n", + "\n", + "from river import (\n", + " optim,\n", + " datasets,\n", + " preprocessing,\n", + " linear_model,\n", + " metrics,\n", + " compose\n", + " )\n", + "from river.stream import iter_sklearn_dataset\n", + "from river.expert import (\n", + " EpsilonGreedyBandit,\n", + " UCBBandit,\n", + " Exp3Bandit,\n", + " RandomBandit,\n", + " OracleBandit\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "\n", + "def gen_xy_numpy(N=1000, p=1, intercept=0.0, seed=None, drift=False):\n", + " N = int(N)\n", + " \n", + " if seed:\n", + " np.random.seed(seed)\n", + "\n", + " beta = np.random.normal(size=p, scale=2)\n", + "\n", + " x_arr = np.random.uniform(size=(N, p))\n", + " noise = np.random.normal(size=N, scale=0.15)\n", + " xb = intercept + np.sum(x_arr*beta, axis=1) \n", + " \n", + " if drift:\n", + " tier = N // 3\n", + " xb[tier:(2 * tier)] *= -1\n", + " xb[(2 * tier):] *= -1\n", + " beta *= -1\n", + " \n", + " y_arr = xb + noise\n", + "\n", + " return x_arr, y_arr, beta\n", + "\n", + "\n", + "Dataset = namedtuple(\"dat\", [\"target\", \"data\", \"feature_names\", \"beta\"])\n", + "\n", + "\n", + "def gen_dataset(**kwargs):\n", + " x_arr, y_arr, beta = gen_xy_numpy(**kwargs)\n", + " dataset = Dataset(y_arr, x_arr, [\"x{}\".format(i) for i in range(p)], beta)\n", + " return dataset\n", + "\n", + "\n", + "def gen_stream(dataset=None, take=None):\n", + " if dataset is None:\n", + " dataset = gen_dataset()\n", + " \n", + " gen = iter_sklearn_dataset(dataset)\n", + " \n", + " if take:\n", + " return itertools.islice(gen, take)\n", + " else:\n", + " return gen\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Studying loss behavior" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Paper on rewards scaling : \n", + "\n", + " - [Learning values across many orders of magnitude](https://arxiv.org/pdf/1602.07714.pdf)\n", + " \n", + " - [Stack message](https://datascience.stackexchange.com/questions/20098/why-do-we-normalize-the-discounted-rewards-when-doing-policy-gradient-reinforcem)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "def test():\n", + " \n", + " sc = preprocessing.MinMaxScaler()\n", + " \n", + " def compute_reward(y_pred, y_true, power=2):\n", + " loss = np.power(np.abs(y_pred - y_true), power)\n", + " loss_d = dict(l=loss)\n", + " loss = sc.learn_one(loss_d).transform_one(loss_d)[\"l\"]\n", + " return (1 / (1+ loss))\n", + " return 1 - loss\n", + " \n", + " return compute_reward" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "def compute_reward(y_pred, y_true, power=2):\n", + " loss = np.power(np.abs(y_pred - y_true), power)\n", + " return 1/ (1 + np.sqrt(loss))\n", + "\n", + "limit = 30\n", + "round_factor = 3\n", + "x = np.arange(-limit, limit, 0.25)\n", + "y = np.arange(-limit, limit, 0.25)\n", + "xx, yy = np.meshgrid(x, y, sparse=True)\n", + "\n", + "f = test()\n", + "z = compute_reward(xx, yy, power=1)\n", + "z_df = (pd.DataFrame(z)\n", + " .set_index(np.round(x, round_factor))\n", + " .rename(columns={\n", + " i:val for (i, val) in enumerate(np.round(y, round_factor))\n", + " }))\n", + "\n", + "f, ax = plt.subplots(figsize=(9, 6))\n", + "ax = sns.heatmap(z_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dataset generation" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "N = 1e5\n", + "p = 20\n", + "dataset = gen_dataset(p=p, N=N, intercept=.5, drift=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training and bandits" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "l2_val=[1e-6, 1e-4, 1e-2, 1e-1, 1e-0, 2, 3]\n", + "optimz = [optim.SGD, optim.Adam]" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "from itertools import product\n", + "\n", + "def get_models():\n", + " grid = product(\n", + " l2_val,\n", + " [preprocessing.StandardScaler, preprocessing.MaxAbsScaler, preprocessing.MinMaxScaler, preprocessing.RobustScaler],\n", + " optimz\n", + " )\n", + " \n", + " return [generate_pipeline(l2=l2, scaler=scaler, optimizer=optimizer) for (l2, scaler, optimizer) in grid]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "def generate_pipeline(lr=0.2, l2=0, scaler=None, optimizer=None):\n", + " if scaler is None:\n", + " scaler = preprocessing.MinMaxScaler()\n", + " if optimizer is None:\n", + " optimizer = optim.SGD\n", + "\n", + " pipeline = compose.Pipeline(\n", + " scaler,\n", + " linear_model.LinearRegression(optimizer=optimizer(), l2=l2)\n", + " )\n", + " \n", + " return pipeline" + ] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "from river import datasets\n", + "\n", + "dataset = datasets.Bananas()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "50000it [20:50, 40.00it/s]\n" + ] + } + ], + "source": [ + "mae = metrics.MSE()\n", + "take_N = 50000\n", + "\n", + "f = test()\n", + "f = compute_reward\n", + "default_par = dict(compute_reward=f,\n", + " metric=mae, \n", + " verbose=False,\n", + " save_rewards=True)\n", + "\n", + "bandits = {\n", + " \"epsilon\" : EpsilonGreedyBandit(models=get_models(), epsilon=0.1, reduce_epsilon=None, **default_par),\n", + " \"ucb\" : UCBBandit(models=get_models(), delta=0.5, **default_par),\n", + " \"random\" : RandomBandit(models=get_models(), **default_par),\n", + " #\"exp3\" : Exp3Bandit(models=get_models(), gamma=0.1, **default_par)\n", + "}\n", + "\n", + "\n", + "bandit_oracle = OracleBandit(models=get_models(), **default_par)\n", + "\n", + "print_every = N // 10\n", + "\n", + "gen = gen_stream(dataset, take=take_N)\n", + "#gen = dataset.take(take_N)\n", + "\n", + "for i, (x, y) in tqdm.tqdm(enumerate(gen)):\n", + " \n", + " for bandit in bandits.values():\n", + " bandit.learn_one(x=x, y=y)\n", + " \n", + " bandit_oracle.learn_one(x=x, y=y)\n", + " \n", + " if i >= 2500:\n", + " pass#break\n", + " \n", + " \n", + "#print(bandit.percentage_pulled)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Inspecting oracle results" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "cumsum_final = pd.Series(np.array(bandit_oracle.rewards).cumsum(axis=0)[-1,:])\n", + "(cumsum_final - cumsum_final.mean()).plot(kind=\"bar\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Comparing results between bandits" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df_rewards = pd.DataFrame({label: bandit.rewards for (label, bandit) in bandits.items()})\n", + "df_rewards[\"oracle\"] = bandit_oracle.max_rewards\n", + "\n", + "sns.displot(df_rewards, kde=True)\n", + "\n", + "(df_rewards\n", + " .cumsum()\n", + " .plot(color=[\"red\", \"blue\", 'green', \"cyan\" ,\"magenta\"])\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 190, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 190, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXIAAAEECAYAAAA1X7/VAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAYTElEQVR4nO3df5xV5X3g8c9XREfEigVeakMQ1mTRglBhEH8gaqqE1SQEzS/3VYVNKLZrmsTatRoTiTFNs9V1XdxmfRGDJv7YbqKiiT8SpFuLWlBmCFgEDd3UH9hUkVoUXDcQn/3jHnAYZpgzcw4z5wyf9+t1XnPuued57vc89873Pve5zzk3UkpIkurrgL4OQJJUjIlckmrORC5JNWcil6SaM5FLUs2ZyCWp5gon8ohoioinI2JNRDwbEdeWEZgkKZ8oOo88IgI4NKW0NSIGAk8AX0wprSgjQEnS3h1YtILUeCfYmt0cmC2eZSRJvaRwIgeIiAFAK/AB4C9TSk91sM88YB7AoYceOum4444r46Elab/R2tr6ekppePvthYdWdqssYgiwGPijlNLazvZrbm5OLS0tpT2uJO0PIqI1pdTcfnups1ZSSv8K/A0wo8x6JUmdK2PWyvCsJ05EHAKcAzxXtF5JUj5ljJEfDXwvGyc/APhBSunBEuqVJOVQxqyVZ4ATS4hFktQDntkpSTVnIpekmjORS1LNmcglqeZKObNTknrLqCsf2u32C986r48iqQ575JJUcyZySao5E7kk1ZyJXJJqzkQuSTVnIpekmjORS1LNmcglqeY8IUiV5YkfUj72yCWp5kzkklRzJnJJqjkTuSTVnIlckmrORC5JNWcil6SaM5FLUs2ZyCWp5kzkklRzJnJJqjkTuSTVXOGLZkXE+4HvA0cCCViYUvpvRevtC16kSVIdlXH1wx3A5SmlVRFxGNAaEY+mlNaVULckqQuFh1ZSSr9MKa3K1t8C1gPvK1qvJCmfUsfII2IUcCLwVAf3zYuIloho2bRpU5kPK0n7tdJ+WCIiBgP3Al9KKb3Z/v6U0kJgIUBzc3Mq63GrpP0YOzjOLmnfK6VHHhEDaSTxu1JK95VRpyQpn8KJPCIC+C6wPqV0Y/GQJEndUUaP/DTgIuBDEbE6W84toV5JUg6Fx8hTSk8AUUIskqQe8MxOSao5E7kk1ZyJXJJqzkQuSTVnIpekmivtzE5JUn5lXm3VHrkk1ZyJXJJqzkQuSTXnGHk/5C8dSfsXe+SSVHMmckmqORO5JNWcY+TSPuZ3FtrX7JFLUs2ZyCWp5kzkklRzJnJJqjkTuSTVnIlckmrORC5JNWcil6SaM5FLUs2ZyCWp5kzkklRzpSTyiFgUEa9FxNoy6pMk5VdWj/x2YEZJdUmSuqGURJ5SWgb8Sxl1SZK6p9fGyCNiXkS0RETLpk2beuthJanf67VEnlJamFJqTik1Dx8+vLceVpL6PWetSFLNmcglqebKmn74P4HlwJiI2BgRnyujXklS10r5zc6U0oVl1CNJ6j6HViSp5krpkUtSn/na4e1ub+mbOPqQPXJJqjkTuSTVnEMrqg8/QksdskcuSTVnIpekmnNoZV9zOEDSPmYi3xuTsKQacGhFkmrORC5JNWcil6Sac4x8f+BYv1R97f9PIff/qj1ySao5E7kk1ZxDK1JvK/ARWuqIPXJJqjl75JL2O6OufGi32y9867w+iqQc9sglqeZM5JJUcyZySao5E7kk1ZyJXJJqzkQuSTVXiemH/W0qkCT1pkokcknqUzW/sFwpQysRMSMino+If4iIK8uoU5KUT+EeeUQMAP4SOAfYCKyMiB+llNb1uFKvRSFJuZXRIz8J+IeU0i9SSr8C/gqYWUK9kqQcIqVUrIKITwAzUkpzs9sXAVNSSp9vt988YB7AyJEjJ7344ouFHre9Pb4wbfr3u+9Qkx59VY6jaBzty/ekjjKU0Z5VeE6qcBxlPKddxpCjjiroq9d3RLSmlJrbb++1LztTSguBhQDNzc3F3j1UTzX4B5XqqIyhlVeA97e5PSLbJknqBWX0yFcCH4yI0TQS+GeADj4vaX/iuQBS7ymcyFNKOyLi88BPgQHAopTSs4UjkyTlUsoYeUrpYeDhMuqSKs+xflWM11qRpJozkUtSzZnIJanmvGhW1TkeK6kL9sglqebskWv/4icc9UP2yCWp5uyRKx97slJl2SOXpJozkUtSzTm0Ikll6MPhRxO5tL+qwvceVYihH+i/idwXiKT9hGPkklRzJnJJqrn+O7QilaSSv3bk0KHasEcuSTVnIpekmjORS1LNmcglqeZM5JJUcyZySao5E7kk1ZyJXJJqzhOCKqaSJ59InfHEpEoo1COPiE9GxLMR8W5ENJcVlCQpv6JDK2uB84FlJcQiSeqBQkMrKaX1ABFRTjSSpG7rtS87I2JeRLRERMumTZt662Elqd/rskceEUuBozq46+qU0gN5HyiltBBYCNDc3JxyRyhJ2qsuE3lK6ezeCESS1DNOP1S/5nRO7Q+KTj+cFREbgVOAhyLip+WEJUnKq+islcXA4pJikST1gKfoS1LNmcglqeZM5JJUcyZySao5E7kk1ZyJXJJqzkQuSTXnmZ3qkGdESvVhj1ySas5ELkk1ZyKXpJozkUtSzZnIJanmTOSSVHMmckmqORO5JNWciVySas5ELkk15yn6ktRNVbuERb9J5FVrWKlq/B/pvxxakaSa6zc9cknV56eCfcMeuSTVnIlckmrORC5JNVcokUfE9RHxXEQ8ExGLI2JISXFJknIq2iN/FBiXUhoP/By4qnhIkqTuKDRrJaW0pM3NFcAnioUjqSPO9tDelDlG/lngkRLrkyTl0GWPPCKWAkd1cNfVKaUHsn2uBnYAd+2lnnnAPICRI0f2KFhJ0p66TOQppbP3dn9EzAE+AvxuSintpZ6FwEKA5ubmTveTJHVPoTHyiJgBXAGckVJ6u5yQJEndUXSM/L8DhwGPRsTqiLilhJgkSd1QdNbKB8oKRJLUM57ZKUk1V5mrH27fvp2NGzfyzjvv9HUoldXU1MSIESMYOHBgX4ciqUIqk8g3btzIYYcdxqhRo4iIvg6nclJKbN68mY0bNzJ69Oi+DkdShVRmaOWdd95h6NChJvFORARDhw71E4ukPVQmkQMm8S7YPpI6UqlELknqvsqMkbc36sqHSq2vrIsOnXvuudx9990MGTKk032uueYapk2bxtln7/Wk2A499thj3HDDDTz44IMFopS0P6lsIq+alBIpJR5++OEu9/3617/eCxFJUoNDK23ceOONjBs3jnHjxnHTTTfxwgsvMGbMGC6++GLGjRvHyy+/zKhRo3j99dcBuO666xgzZgxTp07lwgsv5IYbbgBgzpw53HPPPQCMGjWK+fPnM3HiRE444QSee+45AJ5++mlOOeUUTjzxRE499VSef/75vjloSbVnjzzT2trKbbfdxlNPPUVKiSlTpnDGGWewYcMGvve973HyySfvtv/KlSu59957WbNmDdu3b2fixIlMmjSpw7qHDRvGqlWr+Pa3v80NN9zArbfeynHHHcfjjz/OgQceyNKlS/nyl7/Mvffe2xuHKqmfMZFnnnjiCWbNmsWhhx4KwPnnn8/jjz/OMcccs0cSB3jyySeZOXMmTU1NNDU18dGPfrTTus8//3wAJk2axH333QfAli1bmD17Nhs2bCAi2L59+z44Kkn7A4dWurAzsRdx8MEHAzBgwAB27NgBwFe/+lXOOuss1q5dy49//GPnh0vqMRN55vTTT+f+++/n7bffZtu2bSxevJjTTz+90/1PO+20XQl469at3Z5lsmXLFt73vvcBcPvttxcJXdJ+rrJDK739G4UTJ05kzpw5nHTSSQDMnTuXI444otP9J0+ezMc+9jHGjx/PkUceyQknnMDhhx+e+/GuuOIKZs+ezTe+8Q3OO8/fY5TUc7GXH/XZZ5qbm1NLS8tu29avX8/xxx/f67EUsXXrVgYPHszbb7/NtGnTWLhwIRMnTtynj1nHdlL/0NG5Hf4odO+KiNaUUnP77ZXtkdfBvHnzWLduHe+88w6zZ8/e50lckjpiIi/g7rvv7usQJMkvOyWp7kzkklRzJnJJqjkTuSTVXHW/7Pxa/jnZ+erb0uUugwcPZuvWreU+bg+NGjWKlpYWhg0b1tehSKo4e+T7wK9//eu+DkHSfsRE3onrr7+eyZMnM378eObPn79r+8c//nEmTZrE2LFjWbhw4a7tgwcP5vLLL2fChAksX76cwYMHc/XVVzNhwgROPvlkXn31VQA2bdrEBRdcwOTJk5k8eTJPPvkkAJs3b2b69OmMHTuWuXPn0hcnakmqJxN5B5YsWcKGDRt4+umnWb16Na2trSxbtgyARYsW0draSktLCwsWLGDz5s0AbNu2jSlTprBmzRqmTp3Ktm3bOPnkk1mzZg3Tpk3jO9/5DgBf/OIXueyyy3ZdBnfu3LkAXHvttUydOpVnn32WWbNm8dJLL/XNwUuqneqOkfehJUuWsGTJEk488USgcSr+hg0bmDZtGgsWLGDx4sUAvPzyy2zYsIGhQ4cyYMAALrjggl11HHTQQXzkIx8BGpevffTRRwFYunQp69at27Xfm2++ydatW1m2bNmuS9yed955e73OiyS1VSiRR8R1wEzgXeA1YE5K6Z/KCKwvpZS46qqruOSSS3bb/thjj7F06VKWL1/OoEGDOPPMM3ddfrapqYkBAwbs2nfgwIG7fvW+7eVr3333XVasWEFTU1MvHY2k/q7o0Mr1KaXxKaXfAR4ErikeUt/78Ic/zKJFi3bNYHnllVd47bXX2LJlC0cccQSDBg3iueeeY8WKFd2ue/r06dx88827bq9evRqAadOm7Trl/5FHHuGNN94ofiCS9guFeuQppTfb3DwUKO8buhzTBfeV6dOns379ek455RSg8UXmnXfeyYwZM7jllls4/vjjGTNmTIe/HNSVBQsWcOmllzJ+/Hh27NjBtGnTuOWWW5g/fz4XXnghY8eO5dRTT2XkyJFlH5akfqrwZWwj4s+Ai4EtwFkppU2d7DcPmAcwcuTISS+++OJu93t51nxsJ2n/1dllbLscWomIpRGxtoNlJkBK6eqU0vuBu4DPd1ZPSmlhSqk5pdQ8fPjwIsciSWqjy6GVlNLZOeu6C3gYmN/VjpKk8hT6sjMiPtjm5kzguSL1eRLM3tk+kjpSdB75tyJiDI3phy8Cf9DTipqamti8eTNDhw7dNW1P70kpsXnzZqctStpD0VkrF3S9Vz4jRoxg48aNbNrU4XelovFmN2LEiL4OQ1LFVObMzoEDBzJ69Oi+DkOSasdrrUhSzZnIJanmTOSSVHOFz+zs0YNGbKIxy6Uzw4DXCz5Mf6mjCjFUpY4qxFBGHVWIoSp1VCGGqtSRp/wxKaU9z6hMKVVuAVqsozoxVKWOKsTgcdgWVWwLh1YkqeZM5JJUc1VN5Au73mW/qaMKMVSljirEUEYdVYihKnVUIYaq1NHj8n3yZackqTxV7ZFLknIykUtSzZnIJanmTOSSVHOVSOQRcXhEfDoi/jhbPh0RQ0qo95xu7PsbEXFsB9vHd6OOoyLiqGx9eEScHxFj85bvpM5vFig7OovhuG6UGRkRTdl6RMR/iIibI+IPIyLX1TIj4mM76ygiIqZl17snIk6LiD+JiPO6UX5wRHwiIi6LiC9ExIyIyP2aj4gDI+KSiPhJRDyTLY9ExB9ExMCeHFNfiIhBEXFFRPyniGiKiDkR8aOI+IuIGFyg3p+XGWddRMS/iYhFEfGN7DX2neznL38YEaP6IqY+T+QRcTGwCjgTGJQtZwGt2X1FfDdnDJ+i8etG90bEsxExuc3dt+es4xJgObAiIv4QeBA4D7gvIj6Xs44F7Zabgf+483aO8ve3WZ8J/G/go8ADETEnTww0fq5v5+viW9kxPAVMJv/0qP8FbIyIOyLi3IgYkLPcLhFxU/b4d0TEdcD1wCHAZRFxfY7yn6Jx/DNo/JbsZOAiYHVEnJAzjDuA3wG+BpybLdcCE4A78x9NpzF22Z4RMSB7M7kuIk5rd99Xcj7U7cCRwGjgIaCZRnsG8D9yxvpWRLyZLW9FxFvAsTu356xjfJv1gRHxlewN5ZsRMShH+c9HxLBs/QMRsSwi/jUinsr7nEbEfRHxe0XewGi050pgK7CCRu74d8BPgEU54zggIj4bEQ9FxJqIWBURfxURZ/YooqKnpZZwWuvzwJAOth8B/DxH+R91svwY2JYzhtXA0dn6SdkTMyu7/bOcdfw9jTehodkTfFSb41ids46XaSSIi4HZ2bJp53qO8j9rs/53wOhsfRiwJmcM69qstwIHtLmdt46fZcf9+8BfA68CtwBndON18SyNRDMIeAMYlG0fCKzNUf6ZNmWGAT/N1scDf5czhk5ff3lem9l+v9nJMhTYmKP8rcDdwJey5+PGNvetyvv6zv4G8M+8N+04gGdy1rEA+D5wZJtt/5j3+WwfL/BfaCTEM4D/Cnw/z2uizfpDbf5HzwSezBnDK8A9wL8APwBmAQd18zh+1mb9pc7u66KO22h0EKYCNwFfB84BlgJ/1J14UkqVSOQ/Bw7vYPvhwIYc5d+g0Ws8o91yJvBqzhj+vt3to7N/mi9045+l7Yt0Tbv78j65h2VP6t3Ab2XbftGNtmwbw9M9jOGnwIey9XtpXKSHLPHkTeSr2t0+KmvL5cDLOetYm/1typ7jQ7LbA2jzZrO357RNwjqk3T9fl28E2X4rgE+y+5vZAcCngady1vFr4BfAP7ZZdt7+VY7yz7RZP5DGp6L7gIO78ZyubrO+qN19uZ7TbN9JND7lfCFrh9yvzfavQRqdp4HZeq43FOD5NusrO2unPDEAv0HjE9rDNDpLtwHTc9bRCvxbGp/yXgeas+0f6EYcz7S7vSL7ezCwvjvtmlKqxC8E/RmwKiKW0OiRAoyk8e50XY7yK4C3U0p/2/6OiHg+ZwxvRcSxKaX/A5BS+mX2Eed+IO8Yd4qIgSml7TTeWHbG0ETOIayU0lvAlyJiInBXRDyUt2xmQvYxN4CmiDg6O5aDaCTAPOYC34+IrwFbaAxFrAaGAH+cs47dfnQ1pfTPNHp0CyLimJx1PBQRj9NI5LcCP4iIFTTeoPd4rjvwMPCTiFhGY3jlhwAR8Zvt49uLzwD/Gfh2RLyRbRsC/E12Xx6/AH43pfRS+zsi4uUO9m/voJ0rKaUdwLyIuIZGQs07PNASEYNTSltTSp9t8/jHAm/lrIOUUmtEnE1jqOpvaTw33XF4RMyi8Zo+OPtfIaWUIiLPmYn3RMTtNHqviyPiS8Bi4EPAHu3biZQ95ps0hs7uiIihNN6wrwSW5KjjChqf+N8FPg5cFRETaLw5/H7OOLbvzDnZ//uvsrj+X8622F13M/++WGh81PwKcHm2fIbGR/Mzu1HHb3ewLVd5GmOeH2xfB42P8RflrGNktn/7On4LOKeb7fHbNJLNpcCd3TmWbN+x7W4PAa7sZgwnADOBC4ApNP758rbnmUWfk2zfU4DTsvVjgT8BPgWclbP8ucDVbds/a9cZPXiNDgWG9qDcpcCETu7r8iM0jaG2PeKl8Ya7vbvxdFBP9LDc0cC53SxzW7vlyGz7UcBf56xjDo3vbF6n8Sa0DvgmHXyq76T8sqJt1km9w4AB3dh/55vPBhqfzqZk24cDf9Htx98XB9XDhlgL/CnvjYveDCzvYflDulu+gnVcUbAtrqjIcVShLQrF0Em93XpzrupSxnH0l7bo7fbMXpPDynjcPp+10sYU4P00vqR7Gvgn4LS9lui8/MoelK9aHSMp1hY7y/f1cVShLYrG0JFcM6L2JroxPXZflM8UPo4y6qhCW/R2e6aGPX5IoidxVGGMfKftwP+l0WtqovGN+Lu9WL4/1VGFGKpSR4/LR8SPOruLxlBLUd+l8Sa1T8uXcRz9pS3KqKOKbVGlRL4SeIDGN8HDgFsi4oKU0id7qXx/qqMKMVSljiLlTwd+j8Z00raCxjTVLhX9py8paRQ+jjLqqEJbVKU9y34zqFIi/1xKqSVb/yUwMyIu6sXy/amOKsRQlTqKlC9jRlTRf/oyknAZx9Ff2qIq7VlGHO/p6y8HXFyqvlBsRtQjdDLLhhwzKIqWL+s4+ktbVKU9y4wjpQrNWnFxqepCObNvik7FLCMJV2UmUhXaoirtWTiOlKo1a0WqqjJmvvwgIv40Gg6JxnV0/rwXy0N1ZiJVoS2q0p5lxGEil3IoY/ZN0X/6MpJGFWYRQTXaoirtWcr0WBO51LWVNP5hJ9P4kurCiPhhN+uowpTSMo6jv7RFVdqzjDgcI3dx6WohuyhSu225Lt3QZv81NK4RMpDG6e0PAD/srfIlHkd/aYuqtGfhOFJKu64OJ2kfiojm9N40yJ3bLkop3dEb5aukCm1RlfYsKw4TuSTVnGPkklRzJnJJqjkTuSTVnIlckmru/wPiKGJF+NuhzgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "pd.DataFrame({\"original\": {feature_name:beta \n", + " for (feature_name, beta) \n", + " in zip(dataset.feature_names, dataset.beta)},\n", + " \"learned\": bandits[\"ucb\"].best_model[\"LinearRegression\"].weights}).plot(kind=\"bar\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "river-env", + "language": "python", + "name": "river-env" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/river/expert/__init__.py b/river/expert/__init__.py index 9ad4e87653..cc777fc485 100644 --- a/river/expert/__init__.py +++ b/river/expert/__init__.py @@ -16,6 +16,13 @@ """ +from .bandit import ( + EpsilonGreedyBandit, + Exp3Bandit, + OracleBandit, + UCBBandit, + RandomBandit +) from .ewa import EWARegressor from .sh import SuccessiveHalvingClassifier from .sh import SuccessiveHalvingRegressor @@ -23,8 +30,13 @@ __all__ = [ + 'EpsilonGreedyBandit', + 'Exp3Bandit', 'EWARegressor', + 'OracleBandit', + 'RandomBandit', 'SuccessiveHalvingClassifier', 'SuccessiveHalvingRegressor', - 'StackingClassifier' + 'StackingClassifier', + 'UCBBandit' ] diff --git a/river/expert/bandit.py b/river/expert/bandit.py new file mode 100644 index 0000000000..77992b78a2 --- /dev/null +++ b/river/expert/bandit.py @@ -0,0 +1,263 @@ +import abc + +import numpy as np + +from river import metrics + + +__all__ = [ + 'EpsilonGreedyBandit', + 'UCBBandit', + 'RandomBandit', + 'OracleBandit', + 'Exp3Bandit' +] + +# TODO : +# type annotation in __init__ +# loss-based reward ('compute_reward') +# tests for classification +# tests on real datasets + + +class Bandit(metaclass=abc.ABCMeta): + """Abstract class for bandit. + + References + ---------- + [^1] [Python code from Toulouse univ](https://www.math.univ-toulouse.fr/~jlouedec/demoBandits.html) + [^2] [Slivkins, A. Introduction to Multi-Armed Bandits](https://arxiv.org/pdf/1904.07272.pdf) + """ + + def __init__(self, models, compute_reward, metric: metrics.Metric = None, + verbose=False, save_rewards=False): + if len(models) <= 1: + raise ValueError(f"You supply {len(models)} models. At least 2 models should be supplied.") + + self.compute_reward = compute_reward + self.metric = metric + self.verbose = verbose + self.models = models + self.save_rewards = save_rewards + if save_rewards: + self.rewards = [] + + self._n_arms = len(models) + self._Q = np.zeros(self._n_arms, dtype=np.float) + self._N = np.ones(self._n_arms, dtype=np.int) + + @abc.abstractmethod + def _pull_arm(self): + pass + + @abc.abstractmethod + def _update_arm(self, arm, reward): + pass + + @property + def _best_model_idx(self): + return np.argmax(self._Q) + + @property + def best_model(self): + return self.models[self._best_model_idx] + + def print_percentage_pulled(self): + percentages = self._N / sum(self._N) + for i, pct in enumerate(percentages): + print(f"Model {i}: {round(pct*100, 2)}% of the time") + + def predict_one(self, x): + best_arm = self._pull_arm() + y_pred = self.models[best_arm].predict_one(x) + + return y_pred + + def learn_one(self, x, y): + best_arm = self._pull_arm() + best_model = self.models[best_arm] + y_pred = best_model.predict_one(x) + + best_model.learn_one(x=x, y=y) + + if self.verbose: + print( + '\t'.join(( + f'best {self._best_model_idx}', + )) + ) + print("y_pred:", y_pred, ", y_true:", y) + + reward = self.compute_reward(y_pred=y_pred, y_true=y) + + if self.metric: + self.metric.update(y_pred=y_pred, y_true=y) + + if self.save_rewards: + self.rewards += [reward] + + self._update_arm(best_arm, reward) + + return self + + +class EpsilonGreedyBandit(Bandit): + """Epsilon-greedy bandit implementation + + Parameters + ---------- + models + The models to compare. + metric + Metric used for comparing models with. + epsilon + Exploration parameter (default : 0.1). + reduce_epsilon + Factor applied to reduce epsilon at each time-step (default : 0.99). + + + Examples + -------- + TODO + + References + ---------- + [^1]: [Sutton, R. S., & Barto, A. G. (2018). Reinforcement learning: An introduction. MIT press.](http://incompleteideas.net/book/RLbook2020.pdf) + """ + + def __init__(self, epsilon=0.1, reduce_epsilon=0.99, **kwargs): + super().__init__(**kwargs) + self.epsilon = epsilon + self.reduce_epsilon = reduce_epsilon + + def _pull_arm(self): + if np.random.rand() > self.epsilon: + best_arm = np.argmax(self._Q) + else: + best_arm = np.random.choice(self._n_arms) + + return best_arm + + def _update_arm(self, arm, reward): + if self.reduce_epsilon: + self.epsilon *= self.reduce_epsilon + self._N[arm] += 1 + self._Q[arm] += (1.0 / self._N[arm]) * (reward - self._Q[arm]) + + +class UCBBandit(Bandit): + """Upper Confidence Bound bandit. + + References + ---------- + [^1]: [Auer, P., Cesa-Bianchi, N., & Fischer, P. (2002). Finite-time analysis of the multiarmed bandit problem. Machine learning, 47(2-3), 235-256.](https://link.springer.com/content/pdf/10.1023/A:1013689704352.pdf) + """ + + def __init__(self, delta, **kwargs): + super().__init__(**kwargs) + self._t = 1 + self.delta = delta + + def _pull_arm(self): + #upper_bound = self._Q + np.sqrt(2*np.log(self._t)/self._N) + upper_bound = self._Q + np.sqrt(2*np.log(1/self.delta)/self._N) + best_arm = np.argmax(upper_bound) + return best_arm + + def _update_arm(self, arm, reward): + self._N[arm] += 1 + self._Q[arm] += (1.0 / self._N[arm]) * (reward - self._Q[arm]) + self._t += 1 + + +class Exp3Bandit(Bandit): + """Exp3 implementation from Lattimore and Szepesvári. The algorithm makes the hypothesis that the reward is in [0, 1] + + References + ---------- + [^1]: [Lattimore, T., & Szepesvári, C. (2020). Bandit algorithms. Cambridge University Press.](https://tor-lattimore.com/downloads/book/book.pdf) + """ + + def __init__(self, gamma=0.5, **kwargs): + super().__init__(**kwargs) + self._p = np.ones(self._n_arms, dtype=np.float) + self._s = np.zeros(self._n_arms, dtype=np.float) + self.gamma = gamma + + def _make_distr(self): + numerator = np.exp(self.gamma * self._s) + return numerator / np.sum(numerator) + + def _pull_arm(self): + self._p = self._make_distr() + best_arm = np.random.choice(a=range(self._n_arms), size=1, p=self._p)[0] + return best_arm + + def _update_arm(self, arm, reward): + self._s += 1.0 + self._s[arm]-= (1.0 - reward)/self._p[arm] + + +class RandomBandit(Bandit): + """Bandit that does random selection and update of models. + It is just created for testing purpose""" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def _pull_arm(self): + random_arm = np.random.choice(len(self._N)) + return random_arm + + def _update_arm(self, arm, reward): + pass + + +class OracleBandit(Bandit): + """Oracle bandit that draws and updates every models. + The `predict_one` is the prediction that minimize the error for this time step. + The `update_one` update all the models available.""" + + def __init__(self, save_predictions=True, **kwargs): + super().__init__(**kwargs) + self.save_predictions = save_predictions + if save_predictions: + self.predictions = [] + + @property + def _best_model_idx(self): + cum_rewards = np.max(np.array(self.rewards), axis=0) + return np.argmax(cum_rewards) + + @property + def max_rewards(self): + return np.max(np.array(self.rewards), axis=1) + + def predict_one(self, x, y): + best_arm = self._pull_arm(x, y) + y_pred = self.models[best_arm].predict_one(x) + return y_pred + + def _pull_arm(self, x, y): + preds = [model.predict_one(x) for model in self.models] + losses = [np.abs(y_pred - y) for y_pred in preds] + best_arm = np.argmin(losses) + return best_arm + + def _update_arm(self, arm, reward): + pass + + def learn_one(self, x, y): + reward = [] + prediction = [] + for model in self.models: + y_pred = model.predict_one(x=x) + prediction += [y_pred] + r = self.compute_reward(y_pred=y_pred, y_true=y) + reward += [r] + model.learn_one(x=x, y=y) + + if self.save_predictions: + self.predictions += [prediction] + if self.save_rewards: + self.rewards += [reward] From 13154a60b67d3e906e04237a8afe248d86c7a5db Mon Sep 17 00:00:00 2001 From: Etienne Kintzler Date: Sun, 22 Nov 2020 20:03:16 +0100 Subject: [PATCH 02/14] improve docstring, rm class not in PR, clean some --- river/expert/__init__.py | 19 +-- river/expert/bandit.py | 361 ++++++++++++++++++++------------------- 2 files changed, 190 insertions(+), 190 deletions(-) diff --git a/river/expert/__init__.py b/river/expert/__init__.py index cc777fc485..c867ad3617 100644 --- a/river/expert/__init__.py +++ b/river/expert/__init__.py @@ -17,12 +17,10 @@ """ from .bandit import ( - EpsilonGreedyBandit, - Exp3Bandit, - OracleBandit, - UCBBandit, - RandomBandit + EpsilonGreedyRegressor, + UCBRegressor, ) + from .ewa import EWARegressor from .sh import SuccessiveHalvingClassifier from .sh import SuccessiveHalvingRegressor @@ -30,13 +28,10 @@ __all__ = [ - 'EpsilonGreedyBandit', - 'Exp3Bandit', - 'EWARegressor', - 'OracleBandit', - 'RandomBandit', + "EpsilonGreedyRegressor", + 'EWARegressor', 'SuccessiveHalvingClassifier', 'SuccessiveHalvingRegressor', 'StackingClassifier', - 'UCBBandit' -] + 'UCBRegressor', +] \ No newline at end of file diff --git a/river/expert/bandit.py b/river/expert/bandit.py index 77992b78a2..8665c21981 100644 --- a/river/expert/bandit.py +++ b/river/expert/bandit.py @@ -1,50 +1,53 @@ import abc +import copy +import typing import numpy as np +from river import base from river import metrics +from river import preprocessing __all__ = [ - 'EpsilonGreedyBandit', - 'UCBBandit', - 'RandomBandit', - 'OracleBandit', - 'Exp3Bandit' + "EpsilonGreedyRegressor", + 'UCBRegressor', ] -# TODO : -# type annotation in __init__ -# loss-based reward ('compute_reward') -# tests for classification -# tests on real datasets +# TODO: +# Docstring +# Determine which object to store (rewards/percentages pulled/loss?) +class Bandit(base.EnsembleMixin): + def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transformer, + print_every=None, save_metric_values=False, save_percentage_pulled=False): -class Bandit(metaclass=abc.ABCMeta): - """Abstract class for bandit. - - References - ---------- - [^1] [Python code from Toulouse univ](https://www.math.univ-toulouse.fr/~jlouedec/demoBandits.html) - [^2] [Slivkins, A. Introduction to Multi-Armed Bandits](https://arxiv.org/pdf/1904.07272.pdf) - """ - - def __init__(self, models, compute_reward, metric: metrics.Metric = None, - verbose=False, save_rewards=False): if len(models) <= 1: raise ValueError(f"You supply {len(models)} models. At least 2 models should be supplied.") - - self.compute_reward = compute_reward - self.metric = metric - self.verbose = verbose - self.models = models - self.save_rewards = save_rewards - if save_rewards: - self.rewards = [] + # Check that the model and the metric are in accordance + for model in models: + if not metric.works_with(model): + raise ValueError(f"{metric.__class__.__name__} metric can't be used to evaluate a " + + f'{model.__class__.__name__}') + super().__init__(models) + self.reward_scaler = copy.deepcopy(reward_scaler) + self.metric = copy.deepcopy(metric) + self.print_every = print_every + + self.save_metric_values = save_metric_values + if save_metric_values: + self.metric_values: typing.List = [] + + self.save_percentage_pulled = save_percentage_pulled + if save_percentage_pulled: + self.store_percentage_pulled: typing.List = [] + + # Initializing bandits internals self._n_arms = len(models) - self._Q = np.zeros(self._n_arms, dtype=np.float) - self._N = np.ones(self._n_arms, dtype=np.int) + self._n_iter = 0 # number of times learn_one is called + self._N = np.zeros(self._n_arms, dtype=np.int) + self._average_reward = np.zeros(self._n_arms, dtype=np.float) @abc.abstractmethod def _pull_arm(self): @@ -54,56 +57,123 @@ def _pull_arm(self): def _update_arm(self, arm, reward): pass + @abc.abstractmethod + def _pred_func(self, model): + pass + @property def _best_model_idx(self): - return np.argmax(self._Q) + # average reward instead of cumulated (otherwise favors arms which are pulled often) + return np.argmax(self._average_reward) @property def best_model(self): - return self.models[self._best_model_idx] + return self[self._best_model_idx] - def print_percentage_pulled(self): + @property + def percentage_pulled(self): percentages = self._N / sum(self._N) - for i, pct in enumerate(percentages): - print(f"Model {i}: {round(pct*100, 2)}% of the time") + return percentages def predict_one(self, x): best_arm = self._pull_arm() - y_pred = self.models[best_arm].predict_one(x) - + y_pred = self._pred_func(self[best_arm])(x) return y_pred def learn_one(self, x, y): - best_arm = self._pull_arm() - best_model = self.models[best_arm] - y_pred = best_model.predict_one(x) + chosen_arm = self._pull_arm() + chosen_model = self[chosen_arm] + + y_pred = chosen_model.predict_one(x) + self.metric.update(y_pred=y_pred, y_true=y) + chosen_model.learn_one(x=x, y=y) + + # Update bandit internals (common to all bandit) + reward = self._compute_scaled_reward(y_pred=y_pred, y_true=y) + self._n_iter += 1 + self._N[chosen_arm] += 1 + self._average_reward[chosen_arm] += (1.0 / self._N[chosen_arm]) * \ + (reward - self._average_reward[chosen_arm]) + + # Specific update of the arm for certain bandit class + self._update_arm(chosen_arm, reward) + + if self.print_every: + if (self._n_iter % self.print_every) == 0: + self._print_info() + + if self.save_percentage_pulled: + self.store_percentage_pulled += [self.percentage_pulled] - best_model.learn_one(x=x, y=y) + if self.save_metric_values: + self.metric_values += [self.metric._eval(y_pred, y)] - if self.verbose: - print( - '\t'.join(( - f'best {self._best_model_idx}', - )) - ) - print("y_pred:", y_pred, ", y_true:", y) + return self - reward = self.compute_reward(y_pred=y_pred, y_true=y) + def add_models(self, new_models): + if not isinstance(new_models, list): + raise TypeError("Argument `new_models` must be of a list") + + length_new_models = len(new_models) + # Careful, not validation of the model is done here (contrary to __init__) + self.models += new_models + self._n_arms += length_new_models + self._N = np.concatenate([self._N, np.zeros(length_new_models, dtype=np.int)]) + self._average_reward = np.concatenate([self._average_reward, np.zeros(length_new_models, dtype=np.float)]) + + def _compute_scaled_reward(self, y_pred, y_true, update_scaler=True): + metric_value = self.metric._eval(y_pred, y_true) + metric_to_reward_dict = { + "metric": metric_value if self.metric.bigger_is_better else (-1) * metric_value + } + if update_scaler: + self.reward_scaler.learn_one(metric_to_reward_dict) + reward = self.reward_scaler.transform_one(metric_to_reward_dict)["metric"] + return reward + + def _print_info(self): + print( + str(self), + str(self.metric), + "Best model id: " + str(self._best_model_idx), + sep="\n\t" + ) - if self.metric: - self.metric.update(y_pred=y_pred, y_true=y) - if self.save_rewards: - self.rewards += [reward] +class EpsilonGreedyBandit(Bandit): + def __init__(self, epsilon=0.1, epsilon_decay=None, **kwargs): + super().__init__(**kwargs) + self.epsilon = epsilon + self.epsilon_decay = epsilon_decay + if epsilon_decay: + self._starting_epsilon = epsilon + if not self.reward_scaler: + self.reward_scaler = preprocessing.StandardScaler() - self._update_arm(best_arm, reward) + def _pull_arm(self): + if np.random.rand() > self.epsilon: + chosen_arm = np.argmax(self._average_reward) + else: + chosen_arm = np.random.choice(self._n_arms) - return self + return chosen_arm + def _update_arm(self, arm, reward): + # The arm internals are already updated in the `learn_one` phase of class `Bandit`. + if self.epsilon_decay: + self.epsilon = self._starting_epsilon*np.exp(-self._n_iter*self.epsilon_decay) + + +class EpsilonGreedyRegressor(EpsilonGreedyBandit, base.Regressor): + """Epsilon-greedy bandit algorithm for regression. + + This bandit selects the best arm (defined as the one with the highest average reward) with + probability $(1 - \epsilon)$ and draws a random arm with probability $\epsilon$. It is also + called Follow-The-Leader (FTL) algorithm. + + For this bandit, reward are supposed to be 1-subgaussian, hence the use of the StandardScaler + and MaxAbsScaler as `reward_scaler`. -class EpsilonGreedyBandit(Bandit): - """Epsilon-greedy bandit implementation - Parameters ---------- models @@ -112,152 +182,87 @@ class EpsilonGreedyBandit(Bandit): Metric used for comparing models with. epsilon Exploration parameter (default : 0.1). - reduce_epsilon - Factor applied to reduce epsilon at each time-step (default : 0.99). Examples -------- - TODO + >>> from river import linear_model + >>> from river import expert + >>> from river import preprocessing + >>> from river import metrics + + + TODO: finish ex - References + References ---------- [^1]: [Sutton, R. S., & Barto, A. G. (2018). Reinforcement learning: An introduction. MIT press.](http://incompleteideas.net/book/RLbook2020.pdf) + [^2]: [Rivasplata, O. (2012). Subgaussian random variables: An expository note. Internet publication, PDF.]: (https://sites.ualberta.ca/~omarr/publications/subgaussians.pdf) + [^3]: [Lattimore, T., & Szepesvári, C. (2020). Bandit algorithms. Cambridge University Press.](https://tor-lattimore.com/downloads/book/book.pdf) """ - def __init__(self, epsilon=0.1, reduce_epsilon=0.99, **kwargs): - super().__init__(**kwargs) - self.epsilon = epsilon - self.reduce_epsilon = reduce_epsilon - - def _pull_arm(self): - if np.random.rand() > self.epsilon: - best_arm = np.argmax(self._Q) - else: - best_arm = np.random.choice(self._n_arms) - - return best_arm - - def _update_arm(self, arm, reward): - if self.reduce_epsilon: - self.epsilon *= self.reduce_epsilon - self._N[arm] += 1 - self._Q[arm] += (1.0 / self._N[arm]) * (reward - self._Q[arm]) + def _pred_func(self, model): + return model.predict_one class UCBBandit(Bandit): - """Upper Confidence Bound bandit. - - References - ---------- - [^1]: [Auer, P., Cesa-Bianchi, N., & Fischer, P. (2002). Finite-time analysis of the multiarmed bandit problem. Machine learning, 47(2-3), 235-256.](https://link.springer.com/content/pdf/10.1023/A:1013689704352.pdf) - """ - - def __init__(self, delta, **kwargs): + def __init__(self, delta=None, explore_each_arm=1, **kwargs): super().__init__(**kwargs) - self._t = 1 + if delta is not None and (delta >= 1 or delta <= 0): + raise ValueError("The parameter delta should be comprised in ]0, 1[ (or set to None)") self.delta = delta + self.explore_each_arm = explore_each_arm - def _pull_arm(self): - #upper_bound = self._Q + np.sqrt(2*np.log(self._t)/self._N) - upper_bound = self._Q + np.sqrt(2*np.log(1/self.delta)/self._N) - best_arm = np.argmax(upper_bound) - return best_arm - - def _update_arm(self, arm, reward): - self._N[arm] += 1 - self._Q[arm] += (1.0 / self._N[arm]) * (reward - self._Q[arm]) - self._t += 1 - - -class Exp3Bandit(Bandit): - """Exp3 implementation from Lattimore and Szepesvári. The algorithm makes the hypothesis that the reward is in [0, 1] - - References - ---------- - [^1]: [Lattimore, T., & Szepesvári, C. (2020). Bandit algorithms. Cambridge University Press.](https://tor-lattimore.com/downloads/book/book.pdf) - """ - - def __init__(self, gamma=0.5, **kwargs): - super().__init__(**kwargs) - self._p = np.ones(self._n_arms, dtype=np.float) - self._s = np.zeros(self._n_arms, dtype=np.float) - self.gamma = gamma - - def _make_distr(self): - numerator = np.exp(self.gamma * self._s) - return numerator / np.sum(numerator) + if not self.reward_scaler: + self.reward_scaler = preprocessing.StandardScaler() def _pull_arm(self): - self._p = self._make_distr() - best_arm = np.random.choice(a=range(self._n_arms), size=1, p=self._p)[0] - return best_arm - - def _update_arm(self, arm, reward): - self._s += 1.0 - self._s[arm]-= (1.0 - reward)/self._p[arm] - - -class RandomBandit(Bandit): - """Bandit that does random selection and update of models. - It is just created for testing purpose""" - - def __init__(self, **kwargs): - super().__init__(**kwargs) + not_pulled_enough = self._N <= self.explore_each_arm + if any(not_pulled_enough): # Explore all arms pulled less than `explore_each_arm` times + never_pulled_arm = np.where(not_pulled_enough)[0] #[0] because returned a tuple (array(),) even when input is 1D array + chosen_arm = np.random.choice(never_pulled_arm) + else: + if self.delta: + exploration_bonus = np.sqrt(2 * np.log(1/self.delta) / self._N) + else: + exploration_bonus = np.sqrt(2 * np.log(self._n_iter) / self._N) + upper_bound = self._average_reward + exploration_bonus + chosen_arm = np.argmax(upper_bound) - def _pull_arm(self): - random_arm = np.random.choice(len(self._N)) - return random_arm + return chosen_arm def _update_arm(self, arm, reward): + # The arm internals are already updated in the `learn_one` phase of class `Bandit`. pass -class OracleBandit(Bandit): - """Oracle bandit that draws and updates every models. - The `predict_one` is the prediction that minimize the error for this time step. - The `update_one` update all the models available.""" +class UCBRegressor(UCBBandit, base.Regressor): + """Upper Confidence Bound bandit for regression. - def __init__(self, save_predictions=True, **kwargs): - super().__init__(**kwargs) - self.save_predictions = save_predictions - if save_predictions: - self.predictions = [] + The class offers 2 implementations of UCB: + - UCB1 from [^1], when the parameter delta has value None + - UCB(delta) from [^2], when the parameter delta is in (0, 1) - @property - def _best_model_idx(self): - cum_rewards = np.max(np.array(self.rewards), axis=0) - return np.argmax(cum_rewards) + For this bandit, rewards are supposed to be 1-subgaussian (see Lattimore and Szepesvári, + chapter 6, p. 91) hence the use of the `StandardScaler` and `MaxAbsScaler` as `reward_scaler`. - @property - def max_rewards(self): - return np.max(np.array(self.rewards), axis=1) + Parameters + ---------- + models + The models to compare. + metric + Metric used for comparing models with. + delta + For UCB(delta) implementation. Lower value means more exploration. - def predict_one(self, x, y): - best_arm = self._pull_arm(x, y) - y_pred = self.models[best_arm].predict_one(x) - return y_pred - def _pull_arm(self, x, y): - preds = [model.predict_one(x) for model in self.models] - losses = [np.abs(y_pred - y) for y_pred in preds] - best_arm = np.argmin(losses) - return best_arm + References + ---------- + [^1]: [Auer, P., Cesa-Bianchi, N., & Fischer, P. (2002). Finite-time analysis of the multiarmed bandit problem. Machine learning, 47(2-3), 235-256.](https://link.springer.com/content/pdf/10.1023/A:1013689704352.pdf) + [^2]: [Lattimore, T., & Szepesvári, C. (2020). Bandit algorithms. Cambridge University Press.](https://tor-lattimore.com/downloads/book/book.pdf) + [^3]: [Rivasplata, O. (2012). Subgaussian random variables: An expository note. Internet publication, PDF.]: (https://sites.ualberta.ca/~omarr/publications/subgaussians.pdf) + """ - def _update_arm(self, arm, reward): - pass + def _pred_func(self, model): + return model.predict_one - def learn_one(self, x, y): - reward = [] - prediction = [] - for model in self.models: - y_pred = model.predict_one(x=x) - prediction += [y_pred] - r = self.compute_reward(y_pred=y_pred, y_true=y) - reward += [r] - model.learn_one(x=x, y=y) - - if self.save_predictions: - self.predictions += [prediction] - if self.save_rewards: - self.rewards += [reward] From b0fefc6f33fdb13ec4bcfab4f44a960dde8dc135 Mon Sep 17 00:00:00 2001 From: Etienne Kintzler Date: Sun, 22 Nov 2020 20:16:14 +0100 Subject: [PATCH 03/14] delete nb from pr, clean some --- benchmarks/Bandits_for_online_learning.ipynb | 469 ------------------- river/expert/__init__.py | 4 +- river/expert/bandit.py | 7 +- 3 files changed, 6 insertions(+), 474 deletions(-) delete mode 100644 benchmarks/Bandits_for_online_learning.ipynb diff --git a/benchmarks/Bandits_for_online_learning.ipynb b/benchmarks/Bandits_for_online_learning.ipynb deleted file mode 100644 index 86e11f92ec..0000000000 --- a/benchmarks/Bandits_for_online_learning.ipynb +++ /dev/null @@ -1,469 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from collections import namedtuple\n", - "import itertools \n", - "\n", - "import numpy as np\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "import tqdm\n", - "\n", - "from river import (\n", - " optim,\n", - " datasets,\n", - " preprocessing,\n", - " linear_model,\n", - " metrics,\n", - " compose\n", - " )\n", - "from river.stream import iter_sklearn_dataset\n", - "from river.expert import (\n", - " EpsilonGreedyBandit,\n", - " UCBBandit,\n", - " Exp3Bandit,\n", - " RandomBandit,\n", - " OracleBandit\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "def gen_xy_numpy(N=1000, p=1, intercept=0.0, seed=None, drift=False):\n", - " N = int(N)\n", - " \n", - " if seed:\n", - " np.random.seed(seed)\n", - "\n", - " beta = np.random.normal(size=p, scale=2)\n", - "\n", - " x_arr = np.random.uniform(size=(N, p))\n", - " noise = np.random.normal(size=N, scale=0.15)\n", - " xb = intercept + np.sum(x_arr*beta, axis=1) \n", - " \n", - " if drift:\n", - " tier = N // 3\n", - " xb[tier:(2 * tier)] *= -1\n", - " xb[(2 * tier):] *= -1\n", - " beta *= -1\n", - " \n", - " y_arr = xb + noise\n", - "\n", - " return x_arr, y_arr, beta\n", - "\n", - "\n", - "Dataset = namedtuple(\"dat\", [\"target\", \"data\", \"feature_names\", \"beta\"])\n", - "\n", - "\n", - "def gen_dataset(**kwargs):\n", - " x_arr, y_arr, beta = gen_xy_numpy(**kwargs)\n", - " dataset = Dataset(y_arr, x_arr, [\"x{}\".format(i) for i in range(p)], beta)\n", - " return dataset\n", - "\n", - "\n", - "def gen_stream(dataset=None, take=None):\n", - " if dataset is None:\n", - " dataset = gen_dataset()\n", - " \n", - " gen = iter_sklearn_dataset(dataset)\n", - " \n", - " if take:\n", - " return itertools.islice(gen, take)\n", - " else:\n", - " return gen\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Studying loss behavior" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Paper on rewards scaling : \n", - "\n", - " - [Learning values across many orders of magnitude](https://arxiv.org/pdf/1602.07714.pdf)\n", - " \n", - " - [Stack message](https://datascience.stackexchange.com/questions/20098/why-do-we-normalize-the-discounted-rewards-when-doing-policy-gradient-reinforcem)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "def test():\n", - " \n", - " sc = preprocessing.MinMaxScaler()\n", - " \n", - " def compute_reward(y_pred, y_true, power=2):\n", - " loss = np.power(np.abs(y_pred - y_true), power)\n", - " loss_d = dict(l=loss)\n", - " loss = sc.learn_one(loss_d).transform_one(loss_d)[\"l\"]\n", - " return (1 / (1+ loss))\n", - " return 1 - loss\n", - " \n", - " return compute_reward" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "def compute_reward(y_pred, y_true, power=2):\n", - " loss = np.power(np.abs(y_pred - y_true), power)\n", - " return 1/ (1 + np.sqrt(loss))\n", - "\n", - "limit = 30\n", - "round_factor = 3\n", - "x = np.arange(-limit, limit, 0.25)\n", - "y = np.arange(-limit, limit, 0.25)\n", - "xx, yy = np.meshgrid(x, y, sparse=True)\n", - "\n", - "f = test()\n", - "z = compute_reward(xx, yy, power=1)\n", - "z_df = (pd.DataFrame(z)\n", - " .set_index(np.round(x, round_factor))\n", - " .rename(columns={\n", - " i:val for (i, val) in enumerate(np.round(y, round_factor))\n", - " }))\n", - "\n", - "f, ax = plt.subplots(figsize=(9, 6))\n", - "ax = sns.heatmap(z_df)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Dataset generation" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "N = 1e5\n", - "p = 20\n", - "dataset = gen_dataset(p=p, N=N, intercept=.5, drift=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Training and bandits" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "l2_val=[1e-6, 1e-4, 1e-2, 1e-1, 1e-0, 2, 3]\n", - "optimz = [optim.SGD, optim.Adam]" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "from itertools import product\n", - "\n", - "def get_models():\n", - " grid = product(\n", - " l2_val,\n", - " [preprocessing.StandardScaler, preprocessing.MaxAbsScaler, preprocessing.MinMaxScaler, preprocessing.RobustScaler],\n", - " optimz\n", - " )\n", - " \n", - " return [generate_pipeline(l2=l2, scaler=scaler, optimizer=optimizer) for (l2, scaler, optimizer) in grid]" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "def generate_pipeline(lr=0.2, l2=0, scaler=None, optimizer=None):\n", - " if scaler is None:\n", - " scaler = preprocessing.MinMaxScaler()\n", - " if optimizer is None:\n", - " optimizer = optim.SGD\n", - "\n", - " pipeline = compose.Pipeline(\n", - " scaler,\n", - " linear_model.LinearRegression(optimizer=optimizer(), l2=l2)\n", - " )\n", - " \n", - " return pipeline" - ] - }, - { - "cell_type": "raw", - "metadata": {}, - "source": [ - "from river import datasets\n", - "\n", - "dataset = datasets.Bananas()" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "50000it [20:50, 40.00it/s]\n" - ] - } - ], - "source": [ - "mae = metrics.MSE()\n", - "take_N = 50000\n", - "\n", - "f = test()\n", - "f = compute_reward\n", - "default_par = dict(compute_reward=f,\n", - " metric=mae, \n", - " verbose=False,\n", - " save_rewards=True)\n", - "\n", - "bandits = {\n", - " \"epsilon\" : EpsilonGreedyBandit(models=get_models(), epsilon=0.1, reduce_epsilon=None, **default_par),\n", - " \"ucb\" : UCBBandit(models=get_models(), delta=0.5, **default_par),\n", - " \"random\" : RandomBandit(models=get_models(), **default_par),\n", - " #\"exp3\" : Exp3Bandit(models=get_models(), gamma=0.1, **default_par)\n", - "}\n", - "\n", - "\n", - "bandit_oracle = OracleBandit(models=get_models(), **default_par)\n", - "\n", - "print_every = N // 10\n", - "\n", - "gen = gen_stream(dataset, take=take_N)\n", - "#gen = dataset.take(take_N)\n", - "\n", - "for i, (x, y) in tqdm.tqdm(enumerate(gen)):\n", - " \n", - " for bandit in bandits.values():\n", - " bandit.learn_one(x=x, y=y)\n", - " \n", - " bandit_oracle.learn_one(x=x, y=y)\n", - " \n", - " if i >= 2500:\n", - " pass#break\n", - " \n", - " \n", - "#print(bandit.percentage_pulled)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Inspecting oracle results" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAD7CAYAAACWq8i5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Il7ecAAAACXBIWXMAAAsTAAALEwEAmpwYAAAXDElEQVR4nO3dfbhdVX3g8e8PIowtlZcQeQsYRrFaO8XBK+AMPqVSEWQeA1NssVNNrZq2SnGsM2NaZ4Y+vsygz7Qz+ozSwZoKVgW0KhlBEaiO1grmIhACAXINiQmChDfBYuXtN3+sdc3O4dxk3eSee++59/t5nv3cfX57nXXWPnef/dt77XX2icxEkqQWe8x0AyRJw8OkIUlqZtKQJDUzaUiSmpk0JEnNTBqSpGYLZroBg3LggQfmkiVLZroZkjRUrr/++vsyc9FEy+ds0liyZAmjo6Mz3QxJGioRsWlHy+2ekiQ1M2lIkpqZNCRJzUwakqRmJg1JUjOThiSpmUlDktRszn5PYzZasuLyn81vPO+0GWzJrhn29kvafZ5pSJKaeabRMdkj6dl25D3b2iNp7pnTSWPYd6KDbv+wvz+Spt+cThqaWbMtKc229kjDyGsakqRmJg1JUjOThiSpmUlDktTMpCFJambSkCQ1M2lIkpqZNCRJzUwakqRmJg1JUjOThiSpmUlDktTMpCFJambSkCQ1m5KkERErI+LeiFjbiR0QEVdFxPr6d/8aj4j4cESMRcSaiDim85xltfz6iFjWib8kIm6uz/lwRMRUtFuSNDlTdabxCeCUntgK4JrMPAq4pj4GOBU4qk7LgfOhJBngXOA44Fjg3PFEU8u8pfO83teSJE2DKUkamfkN4IGe8FLgwjp/IXB6J35RFtcC+0XEIcCrgKsy84HMfBC4CjilLntWZl6bmQlc1KlLkjSNBnlN46DMvLvO3wMcVOcPAzZ3ym2psR3Ft/SJS5Km2bRcCK9nCDno14mI5RExGhGjW7duHfTLSdK8M8ik8cPatUT9e2+N3wUc3im3uMZ2FF/cJ/40mXlBZo5k5siiRYumZCUkSdsMMmmsAsZHQC0DLuvE31BHUR0P/Kh2Y10JnBwR+9cL4CcDV9ZlD0fE8XXU1Bs6dUmSptGCqagkIj4DnAgcGBFbKKOgzgMujYg3AZuA36zFrwBeDYwBjwJvBMjMByLivcDqWu49mTl+cf2tlBFazwS+XCdJ0jSbkqSRma+bYNFJfcom8LYJ6lkJrOwTHwV+eXfaKE3WkhWX/2x+43mnzWBLpNnDb4RLkpqZNCRJzUwakqRmU3JNQ5oKXkOQZj+ThoaWSUaafnZPSZKamTQkSc1MGpKkZiYNSVIzk4YkqZlJQ5LUzKQhSWpm0pAkNTNpSJKamTQkSc1MGpKkZiYNSVIzk4YkqZlJQ5LUzFujN5jsLbhn2y27h739kmYPzzQkSc1MGpKkZiYNSVIzk4YkqZlJQ5LUzKQhSWpm0pAkNTNpSJKamTQkSc1MGpKkZiYNSVIzk4YkqZlJQ5LUzKQhSWpm0pAkNTNpSJKaDTxpRMTGiLg5Im6MiNEaOyAiroqI9fXv/jUeEfHhiBiLiDURcUynnmW1/PqIWDbodkuSnm66frnv1zLzvs7jFcA1mXleRKyoj98FnAocVafjgPOB4yLiAOBcYARI4PqIWJWZD05T+zWD/OVBafaYqe6ppcCFdf5C4PRO/KIsrgX2i4hDgFcBV2XmAzVRXAWcMs1tlqR5bzqSRgJfjYjrI2J5jR2UmXfX+XuAg+r8YcDmznO31NhE8e1ExPKIGI2I0a1bt07lOkiSmJ7uqRMy866IeDZwVUTc1l2YmRkRORUvlJkXABcAjIyMbNcfJknafQM/08jMu+rfe4EvAMcCP6zdTtS/99bidwGHd56+uMYmikuSptFAk0ZE/HxE/ML4PHAysBZYBYyPgFoGXFbnVwFvqKOojgd+VLuxrgROjoj960irk2tMkjSNBt09dRDwhYgYf61PZ+ZXImI1cGlEvAnYBPxmLX8F8GpgDHgUeCNAZj4QEe8FVtdy78nMBwbcdklSj4EmjczcABzdJ34/cFKfeAJvm6CulcDKqW6jNCxDdIelnZrbput7GtqBYd8ZDHv7JbXzNiKSpGYmDUlSM5OGJKmZSUOS1MykIUlqZtKQJDUzaUiSmpk0JEnNTBqSpGYmDUlSM5OGJKmZSUOS1MykIUlqZtKQJDUzaUiSmpk0JEnNTBqSpGYmDUlSM5OGJKmZSUOS1MykIUlqZtKQJDUzaUiSmpk0JEnNTBqSpGYmDUlSM5OGJKnZgplugKTds2TF5T+b33jeaTPYEs0HnmlIkpqZNCRJzUwakqRmJg1JUjOThiSpmUlDktRsaJJGRJwSEbdHxFhErJjp9kjSfDQU39OIiD2BjwCvBLYAqyNiVWbeOrMt067wewXS8BqKpAEcC4xl5gaAiLgYWArsUtIY9p3WsLdf0vAalu6pw4DNncdbakySNI0iM2e6DTsVEWcCp2Tmm+vj1wPHZebZPeWWA8sBjjjiiJds2rRp2tuq+WuiM8Cpis+29syV+Gxqy2yIR8T1mTnCBIblTOMu4PDO48U1tp3MvCAzRzJzZNGiRdPWOEmaL4YlaawGjoqIIyNiL+AsYNUMt0mS5p2huBCemU9ExNnAlcCewMrMvGWGmyVJ885QJA2AzLwCuGKm2yFJ89mwdE9JkmYBk4YkqZlJQ5LUzKQhSWpm0pAkNTNpSJKamTQkSc1MGpKkZiYNSVIzk4YkqZlJQ5LUzKQhSWpm0pAkNTNpSJKamTQkSc1MGpKkZiYNSVIzk4YkqZlJQ5LUzKQhSWq2YKYbIGkwNp532kw3QXOQZxqSpGYmDUlSM5OGJKmZSUOS1MykIUlqZtKQJDVzyK00RSY7xNUhsRpGnmlIkpqZNCRJzUwakqRmJg1JUjOThiSpmUlDktTMpCFJaub3NKR5xu+HaHcM7EwjIv4sIu6KiBvr9OrOsj+JiLGIuD0iXtWJn1JjYxGxohM/MiKuq/FLImKvQbVbkjSxQZ9p/M/M/B/dQET8EnAW8CLgUODqiHh+XfwR4JXAFmB1RKzKzFuBD9S6Lo6IvwTeBJw/4LZLU8JvimsumYlrGkuBizPzp5l5JzAGHFunsczckJmPARcDSyMigFcAn6vPvxA4ffqbLUkadNI4OyLWRMTKiNi/xg4DNnfKbKmxieILgYcy84me+NNExPKIGI2I0a1bt07lekiS2M2kERFXR8TaPtNSSvfRc4EXA3cDf777zd2xzLwgM0cyc2TRokWDfjlJmnd265pGZv56S7mI+BjwpfrwLuDwzuLFNcYE8fuB/SJiQT3b6JaXJE2jQY6eOqTz8AxgbZ1fBZwVEXtHxJHAUcB3gNXAUXWk1F6Ui+WrMjOBrwFn1ucvAy4bVLslSRMb5OipD0bEi4EENgK/D5CZt0TEpcCtwBPA2zLzSYCIOBu4EtgTWJmZt9S63gVcHBHvA24APj7AdkuSJjCwpJGZr9/BsvcD7+8TvwK4ok98A2V0lSRpBvmNcEmzkt9XmZ2895QkqZlJQ5LUzKQhSWpm0pAkNTNpSJKaOXpK0lBxVNXM8kxDktTMpCFJamb3lKRpYbfS3GDSkKQpNpcTpElD0oyayzvYuchrGpKkZiYNSVIzu6ckaZrMha44zzQkSc1MGpKkZiYNSVIzk4YkqZlJQ5LUzKQhSWrmkFtJ2om5MFR2qpg0JGkXzcdkYveUJKmZSUOS1MzuKUma46ayG82kIUnz1K4kE7unJEnNTBqSpGZ2T0kCJt9VMR+Hm8ozDUnSJJg0JEnNTBqSpGZe05C0Q8NyrWNY2jnsTBqSNMOGKeHtVvdURLw2Im6JiKciYqRn2Z9ExFhE3B4Rr+rET6mxsYhY0YkfGRHX1fglEbFXje9dH4/V5Ut2p82SpF0XmbnrT454IfAU8H+A/5CZozX+S8BngGOBQ4GrgefXp90BvBLYAqwGXpeZt0bEpcDnM/PiiPhL4KbMPD8i3gr8Smb+QUScBZyRmb+1s7aNjIzk6OjoLq+bJM1HEXF9Zo5MtHy3zjQyc11m3t5n0VLg4sz8aWbeCYxREsixwFhmbsjMx4CLgaUREcArgM/V518InN6p68I6/zngpFpekjTNBjV66jBgc+fxlhqbKL4QeCgzn+iJb1dXXf6jWl6SNM12eiE8Iq4GDu6z6N2ZednUN2nXRcRyYDnAEUccMcOtkaS5Z6dJIzN/fRfqvQs4vPN4cY0xQfx+YL+IWFDPJrrlx+vaEhELgH1r+X5tvQC4AMo1jV1otyRpBwbVPbUKOKuOfDoSOAr4DuXC91F1pNRewFnAqixX478GnFmfvwy4rFPXsjp/JvB3uTtX7yVJu2x3h9yeERFbgJcBl0fElQCZeQtwKXAr8BXgbZn5ZD2LOBu4ElgHXFrLArwL+OOIGKNcs/h4jX8cWFjjfwz8bJiuJGl67daQ29nMIbeSNHkDHXIrSZpfTBqSpGZztnsqIrYCm+rDA4H7+hQzPnfjs6ktxo0PU/w5mbmoT5kiM+f8BIwan1/x2dQW48aHNd5vsntKktTMpCFJajZfksYFxuddfDa1xbjxYY0/zZy9EC5Jmnrz5UxDkjQFTBqSpGYmDUlSs53eGn0YRcQLKL/4N/5DTndR7qa7bgflDwOuy8wfd+LnAN/OzNX1J2xPAW7LzCs6ZS7KzDf0qfMEyi8VPg5cmJkPR8QzKTdcPAZ4JnBOZt7aec74nX9/kJlXR8RvA/+KcnPHq4DXUG4T/yTlZ3M/nZkPT/4dUquIeHZm3juJ8gszs++t+6WZNtntua/WL3QMy0S5W+6NlJ3z79RpxXisT/lzgLuBLwIbgaU1fi7wj8Ao8N+BvwNuo/yWxzrKLdv/L/DjOv9gp8631Nc7F3gU+NMavwD4X8AJwD8BPwG+CbwVWAR8Crik1vtJ4AvA64HrgB8A/xn4B+AjwPspdxE+cQbe42dPsvzCKXrdfYHz6v/hgc7/4jxgvz7lnwV8r76Xv92JHwysre/jQuDPgJvrNvBC4IA6LazbxJnAAZ02fBxYA9wCvLDGR4ANlJ82/inwV8Bze9ozQvkJgL+hJP+rKL9EeX3dNm6pj7cC1wK/B/w+5U7Ra+r0ZeAPgGf0Wd896/b0XuBfd+I/V+v4j8A/A363brMfBPbpqeMO4Fc6j59Rt7tVlLtTH17jzwO+ATxU/xcr+tT1z4GVwPuAfYCP1ff9s7UtlwM3Ad+l/PTzSa3rOwfW9URm3/b8aeCgnX4Op3uHM+ipbgj9PlB7Aev7xG8GNtf5JZQk8fYav6FuhA/Xf9h3gc/Uf9yv1n/83XV+fafO1cCiOn8bcHOd/26nzA2UxHJy/adtBR6h/HbIfsAPgT07bVzT+VB8vc4fUTfE2bThDXJH+hDllvsH97T7Q8C3KWdw3ekaSlI/nbIj+Ftgb8pOZTPlw7+GcqBxOPAU5UDhzs70eG37hvp6f0XZMTyHksi/WONfA15a5zcD9wDfp/yOzDuAQ+v8qcDrapkza/lv1fdoMeX2//+F8hs0G+r7cHxdtrjOr6QcUBzQM32ScpDy7+vz/qLWfylle/pofU/+N/By4LE6PUzZ9h6hnMU+CTxcn/vnwCco2/j9wEU1fjlwRp3fSrkFxQP1tc6gfN6+AfxhfZ/XAu+s7/Pf1/f2BMpB1HuAV9b37Js963vyBOs77Ot6NeVz+i5mz/b8Dur2PN+Sxm2Ue6f0xtdRju7X9Ez/BPy0U26f+k+4F7hxfAdf/+5R39hHgBfX2PibfxOwP2WnOtqp77PApjr/18BInb8FWN0p9wzKvbIuqRvlI2zbKa+ldItRX6Nb/yOzbMMb5I50A3Ah8N96/rdP1jZ+rWd6BPhJp9y7a71rqAkc+H5n+TspO5V/0YndyfbJ/saebWp8G7m2E/8u2w4UXk7Zgd1T27O8z+veRN3G6uPV9e8d4//3Puv7WM//ZEONPVbLLKAk3c+P1w9Ebcf4UPsPAw/SObqsdXXbciP1IAy4nW0HL91t94b6nj6LcmZ8BWXnej9wcp/1XdPzGtfWv+uBdX3WdQNlm5tL67o3nf3ObNie+z3u276dFRi2iXLdYYxyWjv+869fAZ6gdAM9p2f6B+DenjoW1A3hyfp4j86yfSlH4J+lHMV8v8Y31o15fKM+pMYPpRyVfI/SzfR4Xf4IcHTP676jLvs+pdvsGspp7g/qB+BjlKT4xlp+EfDobNrwGOCOFPgqJZl1z+oOqvV8q897sI56FtmJ/S7lQGE8kb+vz3M+C/wF8Av1/7GFkrjeWR+P74j+qLb/FZQzsw9RjlLvBj7ZU++ete4rgddSDhBOr8u6BwWvAa4cf/8o1+O6298etf4b+qzvbX3W91zKQcL6+nhlz/I7KF2v59S6N9Tp3wK/QWcnTukSfYDSFfOnlKP851C2/S/11LuwruO3Kdf27mPbAVN3fY8BvtFZ380967ue8rm9boDresYUrutLW9a1Pv4x8J/YPpHN2PZcy6/Z6T52ZwWGcaobxPF1Q/gNtp3Sn9Cn7GLg833ie9PpK+3ED6TuQIHT6Dnq7VP+54AjKUcmRwMvqRvG8ycofyhwaJ3fj9INdCzwojr/gp7yX51NGx4D3JFSzrI+QDnLeZDyoV5H6UI7ts+6fhD4r33inwLG+sSfB3yu85rX1vfx3J5pvOvx4NquSyhHoDdTjjxX07+L9Oha/svAC+p78xDlIOfWuk5/D/xiLX8MJdneS9nh3VHnrwdO7VP/3wAf7RP/JvB4n/hz6+vtQdmRfpNygPLXPdNBnfW9lXLwM342fCvlIGffPvWfRDliX0fpnvlbShJ4kHJQtp5yAHJcZ33X1WV31OWPUA76jhzQun5imtZ1rK7r8bX8ovr//wAlAc6W7fmine1f/Ub4kIuI/SldSkuBZ9fwDykb8Pszc3VP+Q8CP87M9/TEP0X58D6vJ/484LzMPDMiXkM56loCnN/TlI9m5taIOJjShfQQ8HzKWdtmyofkZVl+8rdb/9GUD8NTlDOtP6Rc17mP0gVxCKUr7/cy846IeBnw7yiDGroj3d5M+VD2joCbKP4WSrKbsDzlbO25mbl2F+qf7Ou+pZa/tif+dsoR7PcoieZllJ3XfUBmz8i+ScZvp+zQxuMvB36Ncl3vfuCpndTzohpft4PX7dbzIkpX5HiCfKK3fNaRiRGxsL4FH8rM36FjByMWdxiPiMi6w4uIQ4C1mblwEvV8MjNfP4nyX6LsrJMyGOS+nZR/OeUA8ebM/GonPj4Sc+1uxl9OOYD7Tkv5Ce0sqzgN70TtxprKOGWo8C8Pqv4dxSlHiLdTjr43sm2k2zmUM6Uv9sT/aJLxydYzVeUnip/L9iP4rqFc29lIOeLtjuybivhE9U82vqv1d0cmjk9PUI6O72H7EYvd2K7EV01RfFfrX8X2Iy7fTDlbPZdyVrOixrsjMXc33q/+N3fKf4s+I0yf9rmc6R2b0+AmOtcI5kKc0v2zT93hLGH7kW431TJzLX4DnRF8tcxayjWouRa/gXIGciLbRic+TumCfTvbj1hcX+MnNsbvGHD5ycZ/lYlHXN7Etut/0xn/+fH4jqY90FCLiDUTTD8BFs+lOGUY6rcp/c4bKR/AUyldWAkwx+IHUa4VPQp8L7d9kfNxSpfPXIu/hDKM9t3AjzLz65SuzS8D/6YT+wnwizX+7sb4CwZcflLxzPx/wKMRsX/tiovM3Mo2Od3xzPxHypndju0sqzjN7oly/eLFPH1U2FbKRdO5FP8W8GrKN+bH138B5cjtyZ73ZS7EJxrBN0pnGPgciu9L6XpczNNHJz4tNuxxJh5xuYky2GO64/vQMOR2Tt5GZJ75EuXbqTd2gxGxCjgiMzfNlXhE/BblSOjr47HMfCIiXgr8y+7z50h8MeXLj2TmU53Fr6EM85xr8WcAyzJzC/DaiDiN0n1Fv9iwxzNzCf29kHI2fec0x5+iDD/eIUdPSZKaeU1DktTMpCFJambSkCQ1M2lIkpqZNCRJzf4/gzvKsyBDV1sAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "cumsum_final = pd.Series(np.array(bandit_oracle.rewards).cumsum(axis=0)[-1,:])\n", - "(cumsum_final - cumsum_final.mean()).plot(kind=\"bar\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Comparing results between bandits" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "df_rewards = pd.DataFrame({label: bandit.rewards for (label, bandit) in bandits.items()})\n", - "df_rewards[\"oracle\"] = bandit_oracle.max_rewards\n", - "\n", - "sns.displot(df_rewards, kde=True)\n", - "\n", - "(df_rewards\n", - " .cumsum()\n", - " .plot(color=[\"red\", \"blue\", 'green', \"cyan\" ,\"magenta\"])\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 190, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 190, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "pd.DataFrame({\"original\": {feature_name:beta \n", - " for (feature_name, beta) \n", - " in zip(dataset.feature_names, dataset.beta)},\n", - " \"learned\": bandits[\"ucb\"].best_model[\"LinearRegression\"].weights}).plot(kind=\"bar\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "river-env", - "language": "python", - "name": "river-env" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.7" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/river/expert/__init__.py b/river/expert/__init__.py index c867ad3617..d26acba10f 100644 --- a/river/expert/__init__.py +++ b/river/expert/__init__.py @@ -29,9 +29,9 @@ __all__ = [ "EpsilonGreedyRegressor", - 'EWARegressor', + 'EWARegressor', 'SuccessiveHalvingClassifier', 'SuccessiveHalvingRegressor', 'StackingClassifier', 'UCBRegressor', -] \ No newline at end of file +] diff --git a/river/expert/bandit.py b/river/expert/bandit.py index 8665c21981..5fd9c89dea 100644 --- a/river/expert/bandit.py +++ b/river/expert/bandit.py @@ -101,7 +101,7 @@ def learn_one(self, x, y): if self.print_every: if (self._n_iter % self.print_every) == 0: self._print_info() - + if self.save_percentage_pulled: self.store_percentage_pulled += [self.percentage_pulled] @@ -119,7 +119,9 @@ def add_models(self, new_models): self.models += new_models self._n_arms += length_new_models self._N = np.concatenate([self._N, np.zeros(length_new_models, dtype=np.int)]) - self._average_reward = np.concatenate([self._average_reward, np.zeros(length_new_models, dtype=np.float)]) + self._average_reward = np.concatenate( + [self._average_reward, np.zeros(length_new_models, dtype=np.float)] + ) def _compute_scaled_reward(self, y_pred, y_true, update_scaler=True): metric_value = self.metric._eval(y_pred, y_true) @@ -265,4 +267,3 @@ class UCBRegressor(UCBBandit, base.Regressor): def _pred_func(self, model): return model.predict_one - From e128e6153b19c228a351deee0050db4eccc34497 Mon Sep 17 00:00:00 2001 From: Etienne Kintzler Date: Mon, 23 Nov 2020 18:45:42 +0100 Subject: [PATCH 04/14] enumerate all parameters in __init__, fix \epsilon --- river/expert/bandit.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/river/expert/bandit.py b/river/expert/bandit.py index 5fd9c89dea..8876742c06 100644 --- a/river/expert/bandit.py +++ b/river/expert/bandit.py @@ -143,8 +143,11 @@ def _print_info(self): class EpsilonGreedyBandit(Bandit): - def __init__(self, epsilon=0.1, epsilon_decay=None, **kwargs): - super().__init__(**kwargs) + def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transformer, + print_every=None, save_metric_values=False, save_percentage_pulled=False, + epsilon=0.1, epsilon_decay=None): + super().__init__(models=models, metric=metric, reward_scaler=reward_scaler, print_every=print_every, + save_metric_values=save_metric_values, save_percentage_pulled=save_percentage_pulled) self.epsilon = epsilon self.epsilon_decay = epsilon_decay if epsilon_decay: @@ -166,11 +169,11 @@ def _update_arm(self, arm, reward): self.epsilon = self._starting_epsilon*np.exp(-self._n_iter*self.epsilon_decay) -class EpsilonGreedyRegressor(EpsilonGreedyBandit, base.Regressor): +class EpsilonGreedyRegressor(EpsilonGreedyBandit): """Epsilon-greedy bandit algorithm for regression. This bandit selects the best arm (defined as the one with the highest average reward) with - probability $(1 - \epsilon)$ and draws a random arm with probability $\epsilon$. It is also + probability $(1 - \\epsilon)$ and draws a random arm with probability $\\epsilon$. It is also called Follow-The-Leader (FTL) algorithm. For this bandit, reward are supposed to be 1-subgaussian, hence the use of the StandardScaler @@ -208,8 +211,11 @@ def _pred_func(self, model): class UCBBandit(Bandit): - def __init__(self, delta=None, explore_each_arm=1, **kwargs): - super().__init__(**kwargs) + def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transformer, + print_every=None, save_metric_values=False, save_percentage_pulled=False, + delta=None, explore_each_arm=1): + super().__init__(models=models, metric=metric, reward_scaler=reward_scaler, print_every=print_every, + save_metric_values=save_metric_values, save_percentage_pulled=save_percentage_pulled) if delta is not None and (delta >= 1 or delta <= 0): raise ValueError("The parameter delta should be comprised in ]0, 1[ (or set to None)") self.delta = delta From a50693f2ff85769fff22de64270a8f6e2f8a8061 Mon Sep 17 00:00:00 2001 From: Etienne Kintzler Date: Mon, 23 Nov 2020 18:59:22 +0100 Subject: [PATCH 05/14] align on convention: single quote and import --- river/expert/__init__.py | 9 +++------ river/expert/bandit.py | 2 +- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/river/expert/__init__.py b/river/expert/__init__.py index d26acba10f..ee982a90bc 100644 --- a/river/expert/__init__.py +++ b/river/expert/__init__.py @@ -16,11 +16,8 @@ """ -from .bandit import ( - EpsilonGreedyRegressor, - UCBRegressor, -) - +from .bandit import EpsilonGreedyRegressor +from .bandit import UCBRegressor from .ewa import EWARegressor from .sh import SuccessiveHalvingClassifier from .sh import SuccessiveHalvingRegressor @@ -28,7 +25,7 @@ __all__ = [ - "EpsilonGreedyRegressor", + 'EpsilonGreedyRegressor', 'EWARegressor', 'SuccessiveHalvingClassifier', 'SuccessiveHalvingRegressor', diff --git a/river/expert/bandit.py b/river/expert/bandit.py index 8876742c06..cd1138fd6f 100644 --- a/river/expert/bandit.py +++ b/river/expert/bandit.py @@ -10,7 +10,7 @@ __all__ = [ - "EpsilonGreedyRegressor", + 'EpsilonGreedyRegressor', 'UCBRegressor', ] From 22c792355cad80fac0f04cfc81931c6a09dc1f7e Mon Sep 17 00:00:00 2001 From: Etienne Kintzler Date: Mon, 23 Nov 2020 19:35:19 +0100 Subject: [PATCH 06/14] rm print_every, change print_info->__repr__, skip line after class --- river/expert/bandit.py | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/river/expert/bandit.py b/river/expert/bandit.py index cd1138fd6f..b8f3864f3a 100644 --- a/river/expert/bandit.py +++ b/river/expert/bandit.py @@ -19,8 +19,9 @@ # Determine which object to store (rewards/percentages pulled/loss?) class Bandit(base.EnsembleMixin): + def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transformer, - print_every=None, save_metric_values=False, save_percentage_pulled=False): + save_metric_values=False, save_percentage_pulled=False): if len(models) <= 1: raise ValueError(f"You supply {len(models)} models. At least 2 models should be supplied.") @@ -33,7 +34,6 @@ def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transform super().__init__(models) self.reward_scaler = copy.deepcopy(reward_scaler) self.metric = copy.deepcopy(metric) - self.print_every = print_every self.save_metric_values = save_metric_values if save_metric_values: @@ -49,6 +49,13 @@ def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transform self._N = np.zeros(self._n_arms, dtype=np.int) self._average_reward = np.zeros(self._n_arms, dtype=np.float) + def __repr__(self): + return ( + f"{self.__class__.__name__}" + + f"\n\t{str(self.metric)}" + + f"\n\t{'Best model id: ' + str(self._best_model_idx)}" + ).expandtabs(2) + @abc.abstractmethod def _pull_arm(self): pass @@ -98,10 +105,6 @@ def learn_one(self, x, y): # Specific update of the arm for certain bandit class self._update_arm(chosen_arm, reward) - if self.print_every: - if (self._n_iter % self.print_every) == 0: - self._print_info() - if self.save_percentage_pulled: self.store_percentage_pulled += [self.percentage_pulled] @@ -133,20 +136,13 @@ def _compute_scaled_reward(self, y_pred, y_true, update_scaler=True): reward = self.reward_scaler.transform_one(metric_to_reward_dict)["metric"] return reward - def _print_info(self): - print( - str(self), - str(self.metric), - "Best model id: " + str(self._best_model_idx), - sep="\n\t" - ) - class EpsilonGreedyBandit(Bandit): + def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transformer, - print_every=None, save_metric_values=False, save_percentage_pulled=False, + save_metric_values=False, save_percentage_pulled=False, epsilon=0.1, epsilon_decay=None): - super().__init__(models=models, metric=metric, reward_scaler=reward_scaler, print_every=print_every, + super().__init__(models=models, metric=metric, reward_scaler=reward_scaler, save_metric_values=save_metric_values, save_percentage_pulled=save_percentage_pulled) self.epsilon = epsilon self.epsilon_decay = epsilon_decay @@ -211,10 +207,11 @@ def _pred_func(self, model): class UCBBandit(Bandit): + def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transformer, - print_every=None, save_metric_values=False, save_percentage_pulled=False, + save_metric_values=False, save_percentage_pulled=False, delta=None, explore_each_arm=1): - super().__init__(models=models, metric=metric, reward_scaler=reward_scaler, print_every=print_every, + super().__init__(models=models, metric=metric, reward_scaler=reward_scaler, save_metric_values=save_metric_values, save_percentage_pulled=save_percentage_pulled) if delta is not None and (delta >= 1 or delta <= 0): raise ValueError("The parameter delta should be comprised in ]0, 1[ (or set to None)") From afe1cac7ce56b0abf3a1f757a07ffdf883798d76 Mon Sep 17 00:00:00 2001 From: Etienne Kintzler Date: Mon, 23 Nov 2020 21:36:50 +0100 Subject: [PATCH 07/14] substitute stdlib for numpy --- river/expert/bandit.py | 49 +++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/river/expert/bandit.py b/river/expert/bandit.py index b8f3864f3a..f0cb716e14 100644 --- a/river/expert/bandit.py +++ b/river/expert/bandit.py @@ -1,9 +1,9 @@ import abc import copy +import math +import random import typing -import numpy as np - from river import base from river import metrics from river import preprocessing @@ -18,6 +18,9 @@ # Docstring # Determine which object to store (rewards/percentages pulled/loss?) +def argmax(l): + return max(range(len(l)), key=l.__getitem__) + class Bandit(base.EnsembleMixin): def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transformer, @@ -46,8 +49,8 @@ def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transform # Initializing bandits internals self._n_arms = len(models) self._n_iter = 0 # number of times learn_one is called - self._N = np.zeros(self._n_arms, dtype=np.int) - self._average_reward = np.zeros(self._n_arms, dtype=np.float) + self._N = [0] * self._n_arms + self._average_reward = [0.0] * self._n_arms def __repr__(self): return ( @@ -71,7 +74,7 @@ def _pred_func(self, model): @property def _best_model_idx(self): # average reward instead of cumulated (otherwise favors arms which are pulled often) - return np.argmax(self._average_reward) + return argmax(self._average_reward) @property def best_model(self): @@ -79,7 +82,7 @@ def best_model(self): @property def percentage_pulled(self): - percentages = self._N / sum(self._N) + percentages = [n / sum(self._N) for n in self._N] return percentages def predict_one(self, x): @@ -121,10 +124,8 @@ def add_models(self, new_models): # Careful, not validation of the model is done here (contrary to __init__) self.models += new_models self._n_arms += length_new_models - self._N = np.concatenate([self._N, np.zeros(length_new_models, dtype=np.int)]) - self._average_reward = np.concatenate( - [self._average_reward, np.zeros(length_new_models, dtype=np.float)] - ) + self._N = self._N + [0] * length_new_models + self._average_reward = self._average_reward + [0.0] * length_new_models def _compute_scaled_reward(self, y_pred, y_true, update_scaler=True): metric_value = self.metric._eval(y_pred, y_true) @@ -152,17 +153,17 @@ def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transfor self.reward_scaler = preprocessing.StandardScaler() def _pull_arm(self): - if np.random.rand() > self.epsilon: - chosen_arm = np.argmax(self._average_reward) + if random.random() > self.epsilon: + chosen_arm = argmax(self._average_reward) else: - chosen_arm = np.random.choice(self._n_arms) + chosen_arm = random.choice(range(self._n_arms)) return chosen_arm def _update_arm(self, arm, reward): # The arm internals are already updated in the `learn_one` phase of class `Bandit`. if self.epsilon_decay: - self.epsilon = self._starting_epsilon*np.exp(-self._n_iter*self.epsilon_decay) + self.epsilon = self._starting_epsilon * math.exp(-self._n_iter*self.epsilon_decay) class EpsilonGreedyRegressor(EpsilonGreedyBandit): @@ -222,17 +223,21 @@ def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transfor self.reward_scaler = preprocessing.StandardScaler() def _pull_arm(self): - not_pulled_enough = self._N <= self.explore_each_arm - if any(not_pulled_enough): # Explore all arms pulled less than `explore_each_arm` times - never_pulled_arm = np.where(not_pulled_enough)[0] #[0] because returned a tuple (array(),) even when input is 1D array - chosen_arm = np.random.choice(never_pulled_arm) + # Explore all arms pulled less than `explore_each_arm` times + never_pulled_arm = [i for (i, n) in enumerate(self._N) if n <= self.explore_each_arm] + if never_pulled_arm: + chosen_arm = random.choice(never_pulled_arm) else: if self.delta: - exploration_bonus = np.sqrt(2 * np.log(1/self.delta) / self._N) + exploration_bonus = [math.sqrt(2 * math.log(1/self.delta) / n) for n in self._N] else: - exploration_bonus = np.sqrt(2 * np.log(self._n_iter) / self._N) - upper_bound = self._average_reward + exploration_bonus - chosen_arm = np.argmax(upper_bound) + exploration_bonus = [math.sqrt(2 * math.log(self._n_iter) / n) for n in self._N] + upper_bound = [ + avg_reward + exploration + for (avg_reward, exploration) + in zip(self._average_reward, exploration_bonus) + ] + chosen_arm = argmax(upper_bound) return chosen_arm From 3c25c93002ac643992f4c0ff5e1ad52bd724809b Mon Sep 17 00:00:00 2001 From: Etienne Kintzler Date: Tue, 24 Nov 2020 18:12:12 +0100 Subject: [PATCH 08/14] rm metrics tracing, add _learn_one for powerusers --- river/expert/bandit.py | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/river/expert/bandit.py b/river/expert/bandit.py index f0cb716e14..29aebfbf01 100644 --- a/river/expert/bandit.py +++ b/river/expert/bandit.py @@ -91,6 +91,21 @@ def predict_one(self, x): return y_pred def learn_one(self, x, y): + self._learn_one(x, y) + return self + + def add_models(self, new_models): + if not isinstance(new_models, list): + raise TypeError("Argument `new_models` must be of a list") + + length_new_models = len(new_models) + # Careful, not validation of the model is done here (contrary to __init__) + self.models += new_models + self._n_arms += length_new_models + self._N = self._N + [0] * length_new_models + self._average_reward = self._average_reward + [0.0] * length_new_models + + def _learn_one(self, x, y): chosen_arm = self._pull_arm() chosen_model = self[chosen_arm] @@ -108,24 +123,7 @@ def learn_one(self, x, y): # Specific update of the arm for certain bandit class self._update_arm(chosen_arm, reward) - if self.save_percentage_pulled: - self.store_percentage_pulled += [self.percentage_pulled] - - if self.save_metric_values: - self.metric_values += [self.metric._eval(y_pred, y)] - - return self - - def add_models(self, new_models): - if not isinstance(new_models, list): - raise TypeError("Argument `new_models` must be of a list") - - length_new_models = len(new_models) - # Careful, not validation of the model is done here (contrary to __init__) - self.models += new_models - self._n_arms += length_new_models - self._N = self._N + [0] * length_new_models - self._average_reward = self._average_reward + [0.0] * length_new_models + return self, self.percentage_pulled, self.metric._eval(y_pred, y) def _compute_scaled_reward(self, y_pred, y_true, update_scaler=True): metric_value = self.metric._eval(y_pred, y_true) @@ -233,8 +231,8 @@ def _pull_arm(self): else: exploration_bonus = [math.sqrt(2 * math.log(self._n_iter) / n) for n in self._N] upper_bound = [ - avg_reward + exploration - for (avg_reward, exploration) + avg_reward + exploration + for (avg_reward, exploration) in zip(self._average_reward, exploration_bonus) ] chosen_arm = argmax(upper_bound) From 860e779f287891d6aed182889beaf7b99e4f1045 Mon Sep 17 00:00:00 2001 From: Etienne Kintzler Date: Tue, 24 Nov 2020 18:25:15 +0100 Subject: [PATCH 09/14] forget to rm object tracing in class __init__ signature --- river/expert/bandit.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/river/expert/bandit.py b/river/expert/bandit.py index 29aebfbf01..8071966bab 100644 --- a/river/expert/bandit.py +++ b/river/expert/bandit.py @@ -23,8 +23,7 @@ def argmax(l): class Bandit(base.EnsembleMixin): - def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transformer, - save_metric_values=False, save_percentage_pulled=False): + def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transformer): if len(models) <= 1: raise ValueError(f"You supply {len(models)} models. At least 2 models should be supplied.") @@ -38,14 +37,6 @@ def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transform self.reward_scaler = copy.deepcopy(reward_scaler) self.metric = copy.deepcopy(metric) - self.save_metric_values = save_metric_values - if save_metric_values: - self.metric_values: typing.List = [] - - self.save_percentage_pulled = save_percentage_pulled - if save_percentage_pulled: - self.store_percentage_pulled: typing.List = [] - # Initializing bandits internals self._n_arms = len(models) self._n_iter = 0 # number of times learn_one is called @@ -139,10 +130,8 @@ def _compute_scaled_reward(self, y_pred, y_true, update_scaler=True): class EpsilonGreedyBandit(Bandit): def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transformer, - save_metric_values=False, save_percentage_pulled=False, epsilon=0.1, epsilon_decay=None): - super().__init__(models=models, metric=metric, reward_scaler=reward_scaler, - save_metric_values=save_metric_values, save_percentage_pulled=save_percentage_pulled) + super().__init__(models=models, metric=metric, reward_scaler=reward_scaler) self.epsilon = epsilon self.epsilon_decay = epsilon_decay if epsilon_decay: @@ -208,10 +197,8 @@ def _pred_func(self, model): class UCBBandit(Bandit): def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transformer, - save_metric_values=False, save_percentage_pulled=False, delta=None, explore_each_arm=1): - super().__init__(models=models, metric=metric, reward_scaler=reward_scaler, - save_metric_values=save_metric_values, save_percentage_pulled=save_percentage_pulled) + super().__init__(models=models, metric=metric, reward_scaler=reward_scaler) if delta is not None and (delta >= 1 or delta <= 0): raise ValueError("The parameter delta should be comprised in ]0, 1[ (or set to None)") self.delta = delta @@ -270,6 +257,9 @@ class UCBRegressor(UCBBandit, base.Regressor): [^2]: [Lattimore, T., & Szepesvári, C. (2020). Bandit algorithms. Cambridge University Press.](https://tor-lattimore.com/downloads/book/book.pdf) [^3]: [Rivasplata, O. (2012). Subgaussian random variables: An expository note. Internet publication, PDF.]: (https://sites.ualberta.ca/~omarr/publications/subgaussians.pdf) """ + @classmethod + def _default_params(cls): + return {'regressor': linear_model.LogisticRegression()} def _pred_func(self, model): return model.predict_one From 9fe14f1de38e42fbe663f1de848a33bd571507e5 Mon Sep 17 00:00:00 2001 From: Etienne Kintzler Date: Tue, 24 Nov 2020 18:44:03 +0100 Subject: [PATCH 10/14] add type to models, _default_params, use '+=' for list append --- river/expert/bandit.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/river/expert/bandit.py b/river/expert/bandit.py index 8071966bab..190af60b40 100644 --- a/river/expert/bandit.py +++ b/river/expert/bandit.py @@ -5,6 +5,8 @@ import typing from river import base +from river import compose +from river import linear_model from river import metrics from river import preprocessing @@ -16,14 +18,13 @@ # TODO: # Docstring -# Determine which object to store (rewards/percentages pulled/loss?) def argmax(l): return max(range(len(l)), key=l.__getitem__) class Bandit(base.EnsembleMixin): - def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transformer): + def __init__(self, models: typing.List[base.Estimator], metric: metrics.Metric, reward_scaler: base.Transformer): if len(models) <= 1: raise ValueError(f"You supply {len(models)} models. At least 2 models should be supplied.") @@ -85,16 +86,12 @@ def learn_one(self, x, y): self._learn_one(x, y) return self - def add_models(self, new_models): - if not isinstance(new_models, list): - raise TypeError("Argument `new_models` must be of a list") - + def add_models(self, new_models: typing.List[base.Estimator]): length_new_models = len(new_models) - # Careful, not validation of the model is done here (contrary to __init__) self.models += new_models self._n_arms += length_new_models - self._N = self._N + [0] * length_new_models - self._average_reward = self._average_reward + [0.0] * length_new_models + self._N += [0] * length_new_models + self._average_reward += [0.0] * length_new_models def _learn_one(self, x, y): chosen_arm = self._pull_arm() @@ -129,7 +126,7 @@ def _compute_scaled_reward(self, y_pred, y_true, update_scaler=True): class EpsilonGreedyBandit(Bandit): - def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transformer, + def __init__(self, models: typing.List[base.Estimator], metric: metrics.Metric, reward_scaler: base.Transformer, epsilon=0.1, epsilon_decay=None): super().__init__(models=models, metric=metric, reward_scaler=reward_scaler) self.epsilon = epsilon @@ -189,6 +186,12 @@ class EpsilonGreedyRegressor(EpsilonGreedyBandit): [^2]: [Rivasplata, O. (2012). Subgaussian random variables: An expository note. Internet publication, PDF.]: (https://sites.ualberta.ca/~omarr/publications/subgaussians.pdf) [^3]: [Lattimore, T., & Szepesvári, C. (2020). Bandit algorithms. Cambridge University Press.](https://tor-lattimore.com/downloads/book/book.pdf) """ + @classmethod + def _default_params(cls): + return {'regressor': compose.Pipeline( + linear_model.LinearRegression() + )} + def _pred_func(self, model): return model.predict_one @@ -196,7 +199,7 @@ def _pred_func(self, model): class UCBBandit(Bandit): - def __init__(self, models, metric: metrics.Metric, reward_scaler: base.Transformer, + def __init__(self, models: typing.List[base.Estimator], metric: metrics.Metric, reward_scaler: base.Transformer, delta=None, explore_each_arm=1): super().__init__(models=models, metric=metric, reward_scaler=reward_scaler) if delta is not None and (delta >= 1 or delta <= 0): @@ -259,7 +262,9 @@ class UCBRegressor(UCBBandit, base.Regressor): """ @classmethod def _default_params(cls): - return {'regressor': linear_model.LogisticRegression()} + return {'regressor': compose.Pipeline( + linear_model.LinearRegression() + )} def _pred_func(self, model): return model.predict_one From f0b1b6c8cd878a26715f07a00f6bb3729c7bb5eb Mon Sep 17 00:00:00 2001 From: Etienne Kintzler Date: Wed, 25 Nov 2020 09:46:08 +0100 Subject: [PATCH 11/14] change parameters in _default_params --- river/expert/bandit.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/river/expert/bandit.py b/river/expert/bandit.py index 190af60b40..d89f57a7e8 100644 --- a/river/expert/bandit.py +++ b/river/expert/bandit.py @@ -22,6 +22,7 @@ def argmax(l): return max(range(len(l)), key=l.__getitem__) + class Bandit(base.EnsembleMixin): def __init__(self, models: typing.List[base.Estimator], metric: metrics.Metric, reward_scaler: base.Transformer): @@ -187,11 +188,12 @@ class EpsilonGreedyRegressor(EpsilonGreedyBandit): [^3]: [Lattimore, T., & Szepesvári, C. (2020). Bandit algorithms. Cambridge University Press.](https://tor-lattimore.com/downloads/book/book.pdf) """ @classmethod - def _default_params(cls): - return {'regressor': compose.Pipeline( - linear_model.LinearRegression() - )} - + def _default_params(cls): + return { + 'models': [linear_model.LinearRegression(lr=.1), linear_model.LinearRegression(lr=.01)], + 'metric': metrics.MSE(), + 'reward_scaler': preprocessing.StandardScaler() + } def _pred_func(self, model): return model.predict_one @@ -262,9 +264,11 @@ class UCBRegressor(UCBBandit, base.Regressor): """ @classmethod def _default_params(cls): - return {'regressor': compose.Pipeline( - linear_model.LinearRegression() - )} + return { + 'models': [linear_model.LinearRegression(lr=.1), linear_model.LinearRegression(lr=.01)], + 'metric': metrics.MSE(), + 'reward_scaler': preprocessing.StandardScaler() + } def _pred_func(self, model): return model.predict_one From 9cb720cbcfa667f9d3488113a74916fc2ae4c0b4 Mon Sep 17 00:00:00 2001 From: Etienne Kintzler Date: Wed, 25 Nov 2020 09:49:49 +0100 Subject: [PATCH 12/14] intercept_lr instead of lr in LinearRegression --- river/expert/bandit.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/river/expert/bandit.py b/river/expert/bandit.py index d89f57a7e8..29d49cf264 100644 --- a/river/expert/bandit.py +++ b/river/expert/bandit.py @@ -188,9 +188,12 @@ class EpsilonGreedyRegressor(EpsilonGreedyBandit): [^3]: [Lattimore, T., & Szepesvári, C. (2020). Bandit algorithms. Cambridge University Press.](https://tor-lattimore.com/downloads/book/book.pdf) """ @classmethod - def _default_params(cls): + def _default_params(cls): return { - 'models': [linear_model.LinearRegression(lr=.1), linear_model.LinearRegression(lr=.01)], + 'models': [ + linear_model.LinearRegression(intercept_lr=.1), + linear_model.LinearRegression(intercept_lr=.01) + ], 'metric': metrics.MSE(), 'reward_scaler': preprocessing.StandardScaler() } @@ -265,7 +268,10 @@ class UCBRegressor(UCBBandit, base.Regressor): @classmethod def _default_params(cls): return { - 'models': [linear_model.LinearRegression(lr=.1), linear_model.LinearRegression(lr=.01)], + 'models': [ + linear_model.LinearRegression(intercept_lr=.1), + linear_model.LinearRegression(intercept_lr=.01) + ], 'metric': metrics.MSE(), 'reward_scaler': preprocessing.StandardScaler() } From d3169dc07b9b9f0e557cbf229c615df0bfc1fdfc Mon Sep 17 00:00:00 2001 From: Etienne Kintzler Date: Wed, 25 Nov 2020 17:03:31 +0100 Subject: [PATCH 13/14] mv argmax to utils.math --- river/expert/bandit.py | 14 ++++++-------- river/utils/math.py | 12 ++++++++++++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/river/expert/bandit.py b/river/expert/bandit.py index 29d49cf264..eacff31e13 100644 --- a/river/expert/bandit.py +++ b/river/expert/bandit.py @@ -5,11 +5,10 @@ import typing from river import base -from river import compose from river import linear_model from river import metrics from river import preprocessing - +from river import utils __all__ = [ 'EpsilonGreedyRegressor', @@ -19,8 +18,7 @@ # TODO: # Docstring -def argmax(l): - return max(range(len(l)), key=l.__getitem__) + class Bandit(base.EnsembleMixin): @@ -67,7 +65,7 @@ def _pred_func(self, model): @property def _best_model_idx(self): # average reward instead of cumulated (otherwise favors arms which are pulled often) - return argmax(self._average_reward) + return utils.math.argmax(self._average_reward) @property def best_model(self): @@ -112,7 +110,7 @@ def _learn_one(self, x, y): # Specific update of the arm for certain bandit class self._update_arm(chosen_arm, reward) - return self, self.percentage_pulled, self.metric._eval(y_pred, y) + return self.metric._eval(y_pred, y) def _compute_scaled_reward(self, y_pred, y_true, update_scaler=True): metric_value = self.metric._eval(y_pred, y_true) @@ -139,7 +137,7 @@ def __init__(self, models: typing.List[base.Estimator], metric: metrics.Metric, def _pull_arm(self): if random.random() > self.epsilon: - chosen_arm = argmax(self._average_reward) + chosen_arm = utils.math.argmax(self._average_reward) else: chosen_arm = random.choice(range(self._n_arms)) @@ -230,7 +228,7 @@ def _pull_arm(self): for (avg_reward, exploration) in zip(self._average_reward, exploration_bonus) ] - chosen_arm = argmax(upper_bound) + chosen_arm = utils.math.argmax(upper_bound) return chosen_arm diff --git a/river/utils/math.py b/river/utils/math.py index 3a08448a58..d8625ddc41 100644 --- a/river/utils/math.py +++ b/river/utils/math.py @@ -12,6 +12,7 @@ __all__ = [ + 'argmax', 'chain_dot', 'clamp', 'dot', @@ -332,3 +333,14 @@ def sign(x: float): """ return -1 if x < 0 else (1 if x > 0 else 0) + + +def argmax(l: list): + """Argmax function. + + Parameters + ---------- + l + + """ + return max(range(len(l)), key=l.__getitem__) From c7b20ca06d547838bfc823d9bba3133aa40e153d Mon Sep 17 00:00:00 2001 From: Etienne Kintzler Date: Wed, 25 Nov 2020 18:29:49 +0100 Subject: [PATCH 14/14] fix mistake: EpsilonGreedyRessor didnt inherit from base.Regressor --- river/expert/bandit.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/river/expert/bandit.py b/river/expert/bandit.py index eacff31e13..e71f8a3422 100644 --- a/river/expert/bandit.py +++ b/river/expert/bandit.py @@ -149,7 +149,7 @@ def _update_arm(self, arm, reward): self.epsilon = self._starting_epsilon * math.exp(-self._n_iter*self.epsilon_decay) -class EpsilonGreedyRegressor(EpsilonGreedyBandit): +class EpsilonGreedyRegressor(EpsilonGreedyBandit, base.Regressor): """Epsilon-greedy bandit algorithm for regression. This bandit selects the best arm (defined as the one with the highest average reward) with @@ -177,7 +177,7 @@ class EpsilonGreedyRegressor(EpsilonGreedyBandit): >>> from river import metrics - TODO: finish ex + TODO: Example References ---------- @@ -256,6 +256,7 @@ class UCBRegressor(UCBBandit, base.Regressor): delta For UCB(delta) implementation. Lower value means more exploration. + TODO: Example References ----------