In [92]:
import torch
import requests
from tqdm import tqdm
from torch import nn
from torch.nn import Embedding, Linear, Bilinear, BatchNorm1d, ReLU, Dropout, MarginRankingLoss
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, WeightedRandomSampler, Dataset
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline 

In [84]:
class PosNegData:

    def __init__(self, pos_data, neg_data, weight):
        self.pos = pos_data
        self.neg = neg_data
        self.weight = weight


class Data:

    def __init__(self, user_id, item_id, metadata):
        self.user_id = torch.tensor(user_id)
        self.item_id = torch.tensor(item_id)
        self.metadata = torch.tensor(metadata)


class DataGenerator(Dataset):

    def __init__(self, state_history, reward_history, action_history):
        self.state_history = state_history
        self.reward_history = reward_history
        self.action_history = action_history
        self.data = []
        self._init_pos_neg()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

    def _init_pos_neg(self):
        for i, r in enumerate(self.reward_history):
            if r > 0:
                user_id = self.state_history[i][0][0]
                action = self.action_history[i]
                pos_data = Data(user_id=user_id, item_id=self.state_history[i][action][1],
                                metadata=self.state_history[i][action][2:])
                for j, state in enumerate(self.state_history[i]):
                    item_id = state[1]
                    metadata = state[2:]
                    data = Data(user_id=user_id, item_id=item_id, metadata=metadata)
                    if j != action:
                        self.data.append(PosNegData(pos_data, data, 1))

    def add_data(self, state, action, reward):
        if reward > 0:
            user_id = state[0][0]
            pos_data = Data(user_id=user_id, item_id=state[action][1], metadata=state[action][2:])
            for j, my_state in enumerate(state):
                item_id = my_state[1]
                metadata = my_state[2:]
                data = Data(user_id=user_id, item_id=item_id, metadata=metadata)
                if j != action:
                    self.data.append(PosNegData(pos_data, data, 1))


def collate_data_pos_neg(list_of_data):
    raw_data = [data for data in list_of_data]
    user_id_pos = torch.stack([data.pos.user_id for data in list_of_data])
    item_id_pos = torch.stack([data.pos.item_id for data in list_of_data])
    metadata_pos = torch.stack([data.pos.metadata for data in list_of_data])
    user_id_neg = torch.stack([data.neg.user_id for data in list_of_data])
    item_id_neg = torch.stack([data.neg.item_id for data in list_of_data])
    metadata_neg = torch.stack([data.neg.metadata for data in list_of_data])
    return {'user_id_pos': user_id_pos, 'item_id_pos': item_id_pos, 'metadata_pos': metadata_pos, 'raw_data': raw_data,
            'user_id_neg': user_id_neg, 'item_id_neg': item_id_neg, 'metadata_neg': metadata_neg}


def collate_data(list_of_data):
    user_id = torch.stack([data.user_id for data in list_of_data])
    item_id = torch.stack([data.item_id for data in list_of_data])
    metadata = torch.stack([data.metadata for data in list_of_data])
    return {'user_id': user_id, 'item_id': item_id, 'metadata': metadata}


In [85]:
class Interface:

    def __init__(self, args):
        self.base_url = 'http://{}'.format(args.ip_address_env_2)
        self.user_id = args.user_id
        self.url_reset = '{}/reset'.format(self.base_url)
        self.url_predict = '{}/predict'.format(self.base_url)

        r = requests.get(url=self.url_reset, params={'user_id': self.user_id})
        data = r.json()
        self.state_history = data['state_history']
        self.rewards_history = data['rewards_history']
        self.action_history = data['action_history']

        self.nb_items = data['nb_items']
        self.nb_users = data['nb_users']
        self.nb_variables = len(self.state_history[0][0]) - 2

        self.next_state = data['next_state']

    def reset(self):
        r = requests.get(url=self.url_reset, params={'user_id': self.user_id})
        data = r.json()

        self.state_history = data['state_history']
        self.rewards_history = data['rewards_history']
        self.action_history = data['action_history']

        self.nb_items = data['nb_items']
        self.nb_users = data['nb_users']

        self.next_state = data['next_state']

    def predict(self, recommended_item):
        r = requests.get(url=self.url_predict, params={'user_id': self.user_id, 'recommended_item': recommended_item})
        data = r.json()

        self.state_history.append(data['state'])
        self.rewards_history.append(data['reward'])
        self.action_history.append(recommended_item)

        self.next_state = data['state']
        return data['state'], data['reward']


In [95]:
class SiameseNetwork(nn.Module):

    def __init__(self, interface):
        super(SiameseNetwork, self).__init__()

        user_embedding_dim = 10
        item_embedding_dim = 10
        user_meta_dim = 30
        item_meta_dim = 30
        meta_meta_dim = 90
        dense_1_dim = 128
        dense_2_dim = 64
        out_dim = 1

        self.embedding_user = Embedding(num_embeddings=interface.nb_users, embedding_dim=user_embedding_dim)
        self.embedding_item = Embedding(num_embeddings=interface.nb_items, embedding_dim=item_embedding_dim)
        self.concat_user_meta = Bilinear(in1_features=user_embedding_dim, in2_features=interface.nb_variables, out_features=user_meta_dim)
        self.concat_item_meta = Bilinear(in1_features=item_embedding_dim, in2_features=interface.nb_variables, out_features=item_meta_dim)
        self.concat_meta_meta = Bilinear(in1_features=user_meta_dim, in2_features=item_meta_dim, out_features=meta_meta_dim)
        self.batch_norm_0 = BatchNorm1d(num_features=meta_meta_dim)
        self.dropout_0 = Dropout(0.5)
        self.dense_1 = Linear(in_features=meta_meta_dim, out_features=dense_1_dim)
        self.relu_1 = ReLU()
        self.dropout_1 = Dropout(0.5)
        self.batch_norm_1 = BatchNorm1d(num_features=dense_1_dim)
        self.dense_2 = Linear(in_features=dense_1_dim, out_features=dense_2_dim)
        self.relu_2 = ReLU()
        self.dropout_2 = Dropout(0.5)
        self.batch_norm_2 = BatchNorm1d(num_features=dense_2_dim)
        self.dense_3 = Linear(in_features=dense_2_dim, out_features=out_dim)

    def forward(self, user_id, item_id, metadata):
        user_embedded = self.embedding_user(user_id).squeeze(dim=1)
        item_embedded = self.embedding_item(item_id).squeeze(dim=1)
        user_and_meta = self.concat_user_meta(user_embedded, metadata)
        item_and_meta = self.concat_item_meta(item_embedded, metadata)
        meta_and_meta = self.concat_meta_meta(user_and_meta, item_and_meta)
        output = self.batch_norm_0(meta_and_meta)
        # output = self.dropout_0(output)
        output = self.dense_1(output)
        output = self.relu_1(output)
        output = self.batch_norm_1(output)
        # output = self.dropout_1(output)
        output = self.dense_2(output)
        output = self.relu_2(output)
        output = self.batch_norm_2(output)
        # output = self.dropout_2(output)
        output = self.dense_3(output)
        return output


In [98]:
class Trainer:

    def __init__(self, interface, learning_rate=1e-3, validation_split=0.2, batch_size=64, margin=10, min_weight=1,
                 num_samples=30):
        self.interface = interface
        self.network = SiameseNetwork(interface)
        self.dataset = DataGenerator(interface.state_history, interface.rewards_history, interface.action_history)
        self.batch_size = batch_size
        self.validation_split = validation_split
        self.min_weight = min_weight
        self.num_samples = num_samples

        self.loss = MarginRankingLoss(margin=margin, reduction='none')

        self.optimizer = Adam(self.network.parameters(), lr=learning_rate)
        self.lr_scheduler = ReduceLROnPlateau(self.optimizer, factor=0.3, patience=3, threshold=1e-3, verbose=True)

    def reset(self):
        self.train()

    def train(self, n=600):
        for _ in range(n):
            weights = [data.weight for data in self.dataset]
            sampler = WeightedRandomSampler(weights=weights, num_samples=self.num_samples, replacement=True)
            data_loader = DataLoader(self.dataset, batch_size=self.batch_size, sampler=sampler,
                                     collate_fn=collate_data_pos_neg, drop_last=True)
            self.network.train()
            cumloss = 0
            for inputs in data_loader:
                self.optimizer.zero_grad()
                output_pos = self.network(inputs['user_id_pos'], inputs['item_id_pos'], inputs['metadata_pos'])
                output_neg = self.network(inputs['user_id_neg'], inputs['item_id_neg'], inputs['metadata_neg'])
                loss = self.loss(output_pos, output_neg, torch.ones(output_pos.shape))
                for j, data in enumerate(inputs['raw_data']):
                    data.weight = loss[j][0].item()
                cumloss += loss.sum().item()
                loss = loss.mean()
                loss.backward()
                self.optimizer.step()

    def online(self):
        self.network.eval()
        l = []
        my_state = self.interface.next_state
        for m in self.interface.next_state:
            data = Data(m[0], m[1], m[2:])
            l.append(data)
        input = collate_data(l)
        output = self.network(input['user_id'], input['item_id'], input['metadata']).squeeze()
        recommended_item = output.argmax().item()
        state, reward = self.interface.predict(recommended_item)
        self.dataset.add_data(my_state, recommended_item, reward)
        self.train(n=3)
        return reward


In [99]:
class Argument:
    pass


list_reward = []

for _ in range(100):
    args = Argument
    args.user_id = 'R3EIFXNYY6XMBXBR01BK'
    args.ip_address_env_0 = '52.47.62.31'
    args.ip_address_env_1 = '35.180.254.42'
    args.ip_address_env_2 = '35.180.178.243'

    interface = Interface(args)
    trainer = Trainer(interface)
    interface.reset()
    trainer.reset()

    reward = 0
    nb_iter = 100

    for k in range(nb_iter):
        if (k % 10 == 9):
            print("\rIteration {}\tTotal Reward {:5f}\tMean Reward {:5f}   ".format(
                k, reward, reward / k), end="")
        # makes a prediction, returns the reward and update the network
        reward += trainer.online()

    print("Total reward : " + str(reward))
    print("Mean reward : " + str(reward / nb_iter))
    list_reward.append(reward)
plt.hist(list_reward, bins=5)
print('mean: {}'.format(np.mean(list_reward)))
print('variance: {}'.format(np.std(list_reward)))
plt.show()

Iteration 9	Total Reward 3286.871150	Mean Reward 365.207906   

Iteration 19	Total Reward 5641.949094	Mean Reward 296.944689   

Iteration 29	Total Reward 7520.656052	Mean Reward 259.332967   

Iteration 39	Total Reward 8965.423747	Mean Reward 229.882660   

Iteration 49	Total Reward 10545.903939	Mean Reward 215.222529   

Iteration 59	Total Reward 13822.223236	Mean Reward 234.274970   

Iteration 69	Total Reward 15511.214460	Mean Reward 224.800210   Iteration 79	Total Reward 16950.806617	Mean Reward 214.567172   

Iteration 89	Total Reward 18758.860728	Mean Reward 210.773716   Iteration 99	Total Reward 21332.849527	Mean Reward 215.483329   

Total reward : 21332.849526721246
Mean reward : 213.32849526721245


Iteration 9	Total Reward 3403.208885	Mean Reward 378.134321   Iteration 19	Total Reward 6297.796791	Mean Reward 331.462989   

Iteration 29	Total Reward 7072.370903	Mean Reward 243.874859   Iteration 39	Total Reward 9211.693835	Mean Reward 236.197278   

Iteration 49	Total Reward 10951.130582	Mean Reward 223.492461   

Iteration 59	Total Reward 14810.581123	Mean Reward 251.026799   

Iteration 69	Total Reward 19164.067032	Mean Reward 277.740102   

Iteration 79	Total Reward 23069.980930	Mean Reward 292.025075   

Iteration 89	Total Reward 26001.174012	Mean Reward 292.148023   

Iteration 99	Total Reward 28054.126503	Mean Reward 283.375015   Total reward : 28769.622474909112
Mean reward : 287.69622474909113


Iteration 9	Total Reward 3611.522049	Mean Reward 401.280228   Iteration 19	Total Reward 4496.180981	Mean Reward 236.641104   

Iteration 29	Total Reward 4496.180981	Mean Reward 155.040723   

Iteration 39	Total Reward 8158.488266	Mean Reward 209.192007   

Iteration 49	Total Reward 9972.494015	Mean Reward 203.520286   Iteration 59	Total Reward 11725.322430	Mean Reward 198.734278   

Iteration 69	Total Reward 14473.723174	Mean Reward 209.764104   Iteration 79	Total Reward 18930.264450	Mean Reward 239.623601   

Iteration 89	Total Reward 19748.697870	Mean Reward 221.895482   

Iteration 99	Total Reward 20657.556233	Mean Reward 208.662184   Total reward : 20657.55623252623
Mean reward : 206.5755623252623


Iteration 9	Total Reward 3687.847310	Mean Reward 409.760812   Iteration 19	Total Reward 5547.552915	Mean Reward 291.976469   

Iteration 29	Total Reward 8093.085208	Mean Reward 279.071904   

Iteration 39	Total Reward 9865.913800	Mean Reward 252.972149   Iteration 49	Total Reward 14650.004611	Mean Reward 298.979686   

Iteration 59	Total Reward 17170.235267	Mean Reward 291.020937   Iteration 69	Total Reward 19969.035273	Mean Reward 289.406308   

Iteration 79	Total Reward 21884.053990	Mean Reward 277.013342   

Iteration 89	Total Reward 23712.195695	Mean Reward 266.429165   

Iteration 99	Total Reward 25299.710291	Mean Reward 255.552629   Total reward : 25299.710291105544
Mean reward : 252.99710291105544


Iteration 9	Total Reward 2415.703937	Mean Reward 268.411549   

Iteration 19	Total Reward 3169.962643	Mean Reward 166.840139   

Iteration 29	Total Reward 5915.362197	Mean Reward 203.978007   

Iteration 39	Total Reward 10844.865592	Mean Reward 278.073477   

Iteration 49	Total Reward 11599.124297	Mean Reward 236.716822   Iteration 59	Total Reward 12280.208851	Mean Reward 208.139133   

Iteration 69	Total Reward 14191.660261	Mean Reward 205.676236   

Iteration 79	Total Reward 16768.247789	Mean Reward 212.256301   

Iteration 89	Total Reward 22325.769994	Mean Reward 250.851348   Iteration 99	Total Reward 24286.491350	Mean Reward 245.318094   

Total reward : 24286.491349742828
Mean reward : 242.8649134974283


Iteration 9	Total Reward 1916.185683	Mean Reward 212.909520   

Iteration 19	Total Reward 2820.472490	Mean Reward 148.445921   Iteration 29	Total Reward 5453.869407	Mean Reward 188.064462   

Iteration 39	Total Reward 8729.796714	Mean Reward 223.840941   Iteration 49	Total Reward 10620.127147	Mean Reward 216.737289   

Iteration 59	Total Reward 11524.413953	Mean Reward 195.329050   Iteration 69	Total Reward 13393.570810	Mean Reward 194.109722   

Iteration 79	Total Reward 16214.043300	Mean Reward 205.241054   Iteration 89	Total Reward 16214.043300	Mean Reward 182.180262   

Iteration 99	Total Reward 19922.437658	Mean Reward 201.236744   Total reward : 19922.437657603197
Mean reward : 199.22437657603197


Iteration 9	Total Reward 1183.858107	Mean Reward 131.539790   

Iteration 19	Total Reward 2466.230834	Mean Reward 129.801623   Iteration 29	Total Reward 4595.665886	Mean Reward 158.471237   

Iteration 39	Total Reward 5873.750425	Mean Reward 150.608985   Iteration 49	Total Reward 7811.196810	Mean Reward 159.412180   

Iteration 59	Total Reward 10116.214871	Mean Reward 171.461269   

Iteration 69	Total Reward 11776.251356	Mean Reward 170.670310   Iteration 79	Total Reward 13436.287841	Mean Reward 170.079593   

Iteration 89	Total Reward 17200.205314	Mean Reward 193.260734   Iteration 99	Total Reward 18549.804752	Mean Reward 187.371765   

Total reward : 18549.804751585834
Mean reward : 185.49804751585833


Iteration 9	Total Reward 1450.562315	Mean Reward 161.173591   Iteration 19	Total Reward 3277.749294	Mean Reward 172.513121   

Iteration 29	Total Reward 6897.226961	Mean Reward 237.835412   Iteration 39	Total Reward 10692.563793	Mean Reward 274.168302   

Iteration 49	Total Reward 11671.131314	Mean Reward 238.186353   Iteration 59	Total Reward 13956.892222	Mean Reward 236.557495   

Iteration 69	Total Reward 16835.877171	Mean Reward 243.998220   

Iteration 79	Total Reward 19433.177818	Mean Reward 245.989593   Iteration 89	Total Reward 21170.550041	Mean Reward 237.871349   

Iteration 99	Total Reward 23248.733526	Mean Reward 234.835692   Total reward : 23248.733525890075
Mean reward : 232.48733525890074


Iteration 9	Total Reward 804.783257	Mean Reward 89.420362   Iteration 19	Total Reward 4731.271910	Mean Reward 249.014311   

Iteration 29	Total Reward 6143.478494	Mean Reward 211.844086   Iteration 39	Total Reward 10482.196686	Mean Reward 268.774274   

Iteration 49	Total Reward 12118.390490	Mean Reward 247.314092   

Iteration 59	Total Reward 15196.861765	Mean Reward 257.573928   

Iteration 69	Total Reward 17296.308717	Mean Reward 250.671141   

Iteration 79	Total Reward 20596.062068	Mean Reward 260.709646   

Iteration 89	Total Reward 23007.632892	Mean Reward 258.512729   Iteration 99	Total Reward 23933.692124	Mean Reward 241.754466   

Total reward : 23933.692124281795
Mean reward : 239.33692124281797


Iteration 9	Total Reward 2569.749506	Mean Reward 285.527723   Iteration 19	Total Reward 3457.267422	Mean Reward 181.961443   

Iteration 29	Total Reward 7558.717222	Mean Reward 260.645421   

Iteration 39	Total Reward 9308.409034	Mean Reward 238.677155   

Iteration 49	Total Reward 12774.128280	Mean Reward 260.696496   Iteration 59	Total Reward 14490.046601	Mean Reward 245.594010   

Iteration 69	Total Reward 18076.793169	Mean Reward 261.982510   

Iteration 79	Total Reward 23676.246046	Mean Reward 299.699317   Iteration 89	Total Reward 28536.678091	Mean Reward 320.636832   

Iteration 99	Total Reward 28536.678091	Mean Reward 288.249274   Total reward : 28536.678091375456
Mean reward : 285.36678091375455


Iteration 9	Total Reward 3284.259108	Mean Reward 364.917679   Iteration 19	Total Reward 5995.857074	Mean Reward 315.571425   

Iteration 29	Total Reward 8647.616988	Mean Reward 298.193689   Iteration 39	Total Reward 11239.934825	Mean Reward 288.203457   

Iteration 49	Total Reward 13852.434398	Mean Reward 282.702743   

Iteration 59	Total Reward 15468.621420	Mean Reward 262.180024   

Iteration 69	Total Reward 18988.363124	Mean Reward 275.193668   Iteration 79	Total Reward 22155.547776	Mean Reward 280.449972   

Iteration 89	Total Reward 23843.801598	Mean Reward 267.907883   Iteration 99	Total Reward 25331.454267	Mean Reward 255.873275   

Total reward : 26255.700018349045
Mean reward : 262.55700018349046


Iteration 9	Total Reward 530.219271	Mean Reward 58.913252   

Iteration 19	Total Reward 730.457420	Mean Reward 38.445127   

Iteration 29	Total Reward 730.457420	Mean Reward 25.188187   Iteration 39	Total Reward 830.576495	Mean Reward 21.296833   

Iteration 49	Total Reward 830.576495	Mean Reward 16.950541   Iteration 59	Total Reward 992.478781	Mean Reward 16.821674   

Iteration 69	Total Reward 1923.850341	Mean Reward 27.881889   

Iteration 79	Total Reward 2085.752627	Mean Reward 26.401932   Iteration 89	Total Reward 2927.160224	Mean Reward 32.889441   

Iteration 99	Total Reward 3651.481976	Mean Reward 36.883656   Total reward : 3651.4819763717046
Mean reward : 36.51481976371704


Iteration 9	Total Reward 3274.650453	Mean Reward 363.850050   Iteration 19	Total Reward 6550.155681	Mean Reward 344.745036   

Iteration 29	Total Reward 11142.810614	Mean Reward 384.234849   Iteration 39	Total Reward 15230.307901	Mean Reward 390.520715   

Iteration 49	Total Reward 20002.597539	Mean Reward 408.216276   

Iteration 59	Total Reward 23725.025620	Mean Reward 402.119078   Iteration 69	Total Reward 26458.981433	Mean Reward 383.463499   

Iteration 79	Total Reward 29307.580091	Mean Reward 370.982026   

Iteration 89	Total Reward 31207.931318	Mean Reward 350.650914   Iteration 99	Total Reward 33767.936510	Mean Reward 341.090268   

Total reward : 33767.93651034104
Mean reward : 337.67936510341036


Iteration 9	Total Reward 1895.498499	Mean Reward 210.610944   

Iteration 19	Total Reward 5754.759559	Mean Reward 302.882082   

Iteration 29	Total Reward 8477.586520	Mean Reward 292.330570   Iteration 39	Total Reward 8477.586520	Mean Reward 217.374013   

Iteration 49	Total Reward 11265.353913	Mean Reward 229.905182   

Iteration 59	Total Reward 13888.401262	Mean Reward 235.396632   Iteration 69	Total Reward 15761.274213	Mean Reward 228.424264   

Iteration 79	Total Reward 15761.274213	Mean Reward 199.509800   Iteration 89	Total Reward 16756.000224	Mean Reward 188.269665   

Iteration 99	Total Reward 23434.719275	Mean Reward 236.714336   Total reward : 24393.50669891984
Mean reward : 243.9350669891984


Iteration 9	Total Reward 663.990923	Mean Reward 73.776769   

Iteration 19	Total Reward 1567.651324	Mean Reward 82.507964   

Iteration 29	Total Reward 4865.830789	Mean Reward 167.787269   

Iteration 39	Total Reward 7559.105451	Mean Reward 193.823217   

Iteration 49	Total Reward 11148.570600	Mean Reward 227.521849   

Iteration 59	Total Reward 12026.394101	Mean Reward 203.837188   

Iteration 69	Total Reward 14659.864606	Mean Reward 212.461806   

Iteration 79	Total Reward 16665.064913	Mean Reward 210.950189   

Iteration 89	Total Reward 20264.017605	Mean Reward 227.685591   

Iteration 99	Total Reward 21167.678006	Mean Reward 213.814929   Total reward : 21167.678006035116
Mean reward : 211.67678006035115


Iteration 9	Total Reward 2875.057541	Mean Reward 319.450838   

Iteration 19	Total Reward 5750.115082	Mean Reward 302.637636   

Iteration 29	Total Reward 9998.048217	Mean Reward 344.760283   Iteration 39	Total Reward 13242.243379	Mean Reward 339.544702   

Iteration 49	Total Reward 15158.948406	Mean Reward 309.366294   

Iteration 59	Total Reward 16931.728313	Mean Reward 286.978446   

Iteration 69	Total Reward 18778.346776	Mean Reward 272.149953   

Iteration 79	Total Reward 19662.860734	Mean Reward 248.896971   

Iteration 89	Total Reward 23020.154440	Mean Reward 258.653421   

Iteration 99	Total Reward 25396.726756	Mean Reward 256.532593   Total reward : 26055.257001861828
Mean reward : 260.55257001861827


Iteration 9	Total Reward 775.292064	Mean Reward 86.143563   Iteration 19	Total Reward 2948.743989	Mean Reward 155.197052   

Iteration 29	Total Reward 3576.193010	Mean Reward 123.317000   Iteration 39	Total Reward 4203.642031	Mean Reward 107.785693   

Iteration 49	Total Reward 4203.642031	Mean Reward 85.788613   

Iteration 59	Total Reward 5606.383117	Mean Reward 95.023443   

Iteration 69	Total Reward 7651.035850	Mean Reward 110.884578   

Iteration 79	Total Reward 10159.136511	Mean Reward 128.596665   

Iteration 89	Total Reward 11648.002146	Mean Reward 130.876429   

Iteration 99	Total Reward 13086.543180	Mean Reward 132.187305   Total reward : 13086.5431795087
Mean reward : 130.865431795087


Iteration 9	Total Reward 848.035515	Mean Reward 94.226168   Iteration 19	Total Reward 1696.071030	Mean Reward 89.266896   

Iteration 29	Total Reward 2627.782588	Mean Reward 90.613193   

Iteration 39	Total Reward 5339.241220	Mean Reward 136.903621   

Iteration 49	Total Reward 7848.332570	Mean Reward 160.170052   

Iteration 59	Total Reward 11177.251356	Mean Reward 189.444938   

Iteration 69	Total Reward 14224.471638	Mean Reward 206.151763   

Iteration 79	Total Reward 15590.335895	Mean Reward 197.346024   

Iteration 89	Total Reward 17503.872593	Mean Reward 196.672726   

Iteration 99	Total Reward 20240.791182	Mean Reward 204.452436   Total reward : 20240.791181850847
Mean reward : 202.40791181850847


Iteration 9	Total Reward 1644.483551	Mean Reward 182.720395   

Iteration 19	Total Reward 4136.762144	Mean Reward 217.724323   

Iteration 29	Total Reward 5502.522239	Mean Reward 189.742146   Iteration 39	Total Reward 7135.544183	Mean Reward 182.962671   

Iteration 49	Total Reward 8545.447472	Mean Reward 174.396887   

Iteration 59	Total Reward 8545.447472	Mean Reward 144.838093   

Iteration 69	Total Reward 8545.447472	Mean Reward 123.847065   Iteration 79	Total Reward 13279.201516	Mean Reward 168.091158   

Iteration 89	Total Reward 16540.769471	Mean Reward 185.851342   

Iteration 99	Total Reward 19038.442366	Mean Reward 192.307499   Total reward : 19038.44236634741
Mean reward : 190.3844236634741


Iteration 9	Total Reward 2942.120165	Mean Reward 326.902241   Iteration 19	Total Reward 6413.686960	Mean Reward 337.562472   

Iteration 29	Total Reward 8328.633248	Mean Reward 287.194250   

Iteration 39	Total Reward 11198.327287	Mean Reward 287.136597   Iteration 49	Total Reward 16796.268730	Mean Reward 342.780994   

Iteration 59	Total Reward 19668.688162	Mean Reward 333.367596   Iteration 69	Total Reward 21352.561794	Mean Reward 309.457417   

Iteration 79	Total Reward 22217.381633	Mean Reward 281.232679   

Iteration 89	Total Reward 25480.695850	Mean Reward 286.299953   

Iteration 99	Total Reward 28350.389889	Mean Reward 286.367575   Total reward : 29186.91319103035
Mean reward : 291.8691319103035


Iteration 9	Total Reward 2440.683367	Mean Reward 271.187041   Iteration 19	Total Reward 5938.765481	Mean Reward 312.566604   

Iteration 29	Total Reward 8707.880872	Mean Reward 300.271754   Iteration 39	Total Reward 10431.477998	Mean Reward 267.473795   

Iteration 49	Total Reward 15656.154345	Mean Reward 319.513354   

Iteration 59	Total Reward 21689.721166	Mean Reward 367.622393   

Iteration 69	Total Reward 23472.922153	Mean Reward 340.187278   Iteration 79	Total Reward 25196.519279	Mean Reward 318.943282   

Iteration 89	Total Reward 28161.210055	Mean Reward 316.418091   

Iteration 99	Total Reward 31747.128952	Mean Reward 320.678070   Total reward : 31747.128951998624
Mean reward : 317.4712895199862


Iteration 9	Total Reward 1794.506233	Mean Reward 199.389581   Iteration 19	Total Reward 3101.565105	Mean Reward 163.240269   

Iteration 29	Total Reward 3535.838647	Mean Reward 121.925471   Iteration 39	Total Reward 4485.483394	Mean Reward 115.012395   

Iteration 49	Total Reward 5112.436610	Mean Reward 104.335441   Iteration 59	Total Reward 6488.311033	Mean Reward 109.971373   

Iteration 69	Total Reward 7355.746575	Mean Reward 106.605023   

Iteration 79	Total Reward 8949.615308	Mean Reward 113.286270   

Iteration 89	Total Reward 9241.719906	Mean Reward 103.839550   

Iteration 99	Total Reward 11680.418462	Mean Reward 117.984025   Total reward : 11680.41846204865
Mean reward : 116.80418462048651


Iteration 9	Total Reward 1315.350845	Mean Reward 146.150094   Iteration 19	Total Reward 2747.818421	Mean Reward 144.622022   

Iteration 29	Total Reward 4821.976567	Mean Reward 166.275054   

Iteration 39	Total Reward 7379.392283	Mean Reward 189.215187   

Iteration 49	Total Reward 8250.355517	Mean Reward 168.374602   

Iteration 59	Total Reward 8250.355517	Mean Reward 139.836534   Iteration 69	Total Reward 9965.233129	Mean Reward 144.423669   

Iteration 79	Total Reward 13422.037209	Mean Reward 169.899205   

Iteration 89	Total Reward 14279.476015	Mean Reward 160.443551   

Iteration 99	Total Reward 14966.660718	Mean Reward 151.178391   Total reward : 14966.66071757756
Mean reward : 149.6666071757756


Iteration 9	Total Reward 2856.819403	Mean Reward 317.424378   

Iteration 19	Total Reward 6616.130710	Mean Reward 348.217406   

Iteration 29	Total Reward 9673.922206	Mean Reward 333.583524   Iteration 39	Total Reward 11914.959443	Mean Reward 305.511781   

Iteration 49	Total Reward 13388.122081	Mean Reward 273.226981   

Iteration 59	Total Reward 16660.732452	Mean Reward 282.385296   

Iteration 69	Total Reward 18197.517313	Mean Reward 263.732135   

Iteration 79	Total Reward 19670.679951	Mean Reward 248.995949   Iteration 89	Total Reward 20439.590214	Mean Reward 229.658317   

Iteration 99	Total Reward 24061.101092	Mean Reward 243.041425   Total reward : 24939.62366895
Mean reward : 249.3962366895


Iteration 9	Total Reward 1316.919306	Mean Reward 146.324367   Iteration 19	Total Reward 3165.637630	Mean Reward 166.612507   

Iteration 29	Total Reward 5003.344592	Mean Reward 172.529124   Iteration 39	Total Reward 8165.666056	Mean Reward 209.376053   

Iteration 49	Total Reward 9747.648051	Mean Reward 198.931593   Iteration 59	Total Reward 11067.110702	Mean Reward 187.578147   

Iteration 69	Total Reward 14328.371174	Mean Reward 207.657553   Iteration 79	Total Reward 15313.578401	Mean Reward 193.842765   

Iteration 89	Total Reward 17379.773368	Mean Reward 195.278352   Iteration 99	Total Reward 19044.088056	Mean Reward 192.364526   

Total reward : 19044.088055771306
Mean reward : 190.44088055771306


Iteration 9	Total Reward 2549.165945	Mean Reward 283.240661   Iteration 19	Total Reward 4530.854413	Mean Reward 238.466022   

Iteration 29	Total Reward 6731.959381	Mean Reward 232.136530   Iteration 39	Total Reward 10393.358388	Mean Reward 266.496369   

Iteration 49	Total Reward 12618.158739	Mean Reward 257.513444   Iteration 59	Total Reward 12618.158739	Mean Reward 213.867097   

Iteration 69	Total Reward 16122.750121	Mean Reward 233.663045   

Iteration 79	Total Reward 18111.208610	Mean Reward 229.255805   Iteration 89	Total Reward 19655.571657	Mean Reward 220.849120   

Iteration 99	Total Reward 20178.294667	Mean Reward 203.821158   Total reward : 20178.294666866117
Mean reward : 201.78294666866117


Iteration 9	Total Reward 0.000000	Mean Reward 0.000000   

Iteration 19	Total Reward 2442.076869	Mean Reward 128.530362   Iteration 29	Total Reward 3896.987689	Mean Reward 134.378886   

Iteration 39	Total Reward 3896.987689	Mean Reward 99.922761   Iteration 49	Total Reward 6425.586289	Mean Reward 131.134414   

Iteration 59	Total Reward 8913.217844	Mean Reward 151.071489   Iteration 69	Total Reward 8913.217844	Mean Reward 129.177070   

Iteration 79	Total Reward 11605.656475	Mean Reward 146.907044   Iteration 89	Total Reward 13940.043190	Mean Reward 156.629699   

Iteration 99	Total Reward 17955.329959	Mean Reward 181.366969   Total reward : 17955.32995939338
Mean reward : 179.55329959393382


Iteration 9	Total Reward 1880.152338	Mean Reward 208.905815   

Iteration 19	Total Reward 2791.797270	Mean Reward 146.936698   Iteration 29	Total Reward 3642.231319	Mean Reward 125.594183   

Iteration 39	Total Reward 5522.383657	Mean Reward 141.599581   

Iteration 49	Total Reward 10076.259908	Mean Reward 205.637957   

Iteration 59	Total Reward 10076.259908	Mean Reward 170.784066   

Iteration 69	Total Reward 11979.280448	Mean Reward 173.612760   

Iteration 79	Total Reward 15625.860178	Mean Reward 197.795698   

Iteration 89	Total Reward 16282.905283	Mean Reward 182.953992   

Iteration 99	Total Reward 18758.891842	Mean Reward 189.483756   Total reward : 18758.891842160054
Mean reward : 187.58891842160054


Iteration 9	Total Reward 3995.857241	Mean Reward 443.984138   Iteration 19	Total Reward 6542.861692	Mean Reward 344.361142   

Iteration 29	Total Reward 9514.455221	Mean Reward 328.084663   Iteration 39	Total Reward 13215.025858	Mean Reward 338.846817   

Iteration 49	Total Reward 15997.704721	Mean Reward 326.483770   Iteration 59	Total Reward 17785.368839	Mean Reward 301.446929   

Iteration 69	Total Reward 17785.368839	Mean Reward 257.758969   

Iteration 79	Total Reward 20284.253860	Mean Reward 256.762707   

Iteration 89	Total Reward 22472.605491	Mean Reward 252.501185   

Iteration 99	Total Reward 25012.931021	Mean Reward 252.655869   Total reward : 25012.931021441032
Mean reward : 250.12931021441034


Iteration 9	Total Reward 1629.714115	Mean Reward 181.079346   

Iteration 19	Total Reward 2584.076743	Mean Reward 136.004039   Iteration 29	Total Reward 8057.366317	Mean Reward 277.840218   

Iteration 39	Total Reward 9870.941742	Mean Reward 253.101070   Iteration 49	Total Reward 9870.941742	Mean Reward 201.447791   

Iteration 59	Total Reward 14980.523459	Mean Reward 253.907177   Iteration 69	Total Reward 16174.070403	Mean Reward 234.406817   

Iteration 79	Total Reward 19435.532390	Mean Reward 246.019397   

Iteration 89	Total Reward 20412.430459	Mean Reward 229.353151   

Iteration 99	Total Reward 20692.204818	Mean Reward 209.012170   Total reward : 20692.204817881448
Mean reward : 206.92204817881446


Iteration 9	Total Reward 2691.928660	Mean Reward 299.103184   Iteration 19	Total Reward 3820.035102	Mean Reward 201.054479   

Iteration 29	Total Reward 5980.431876	Mean Reward 206.221789   Iteration 39	Total Reward 7457.604930	Mean Reward 191.220639   

Iteration 49	Total Reward 7457.604930	Mean Reward 152.196019   

Iteration 59	Total Reward 9859.654003	Mean Reward 167.112780   

Iteration 69	Total Reward 13261.343887	Mean Reward 192.193390   

Iteration 79	Total Reward 16472.196724	Mean Reward 208.508819   Iteration 89	Total Reward 17172.309160	Mean Reward 192.947294   

Iteration 99	Total Reward 17172.309160	Mean Reward 173.457668   Total reward : 17172.30915965353
Mean reward : 171.7230915965353


Iteration 9	Total Reward 802.132208	Mean Reward 89.125801   

Iteration 19	Total Reward 4019.642183	Mean Reward 211.560115   

Iteration 29	Total Reward 4949.292467	Mean Reward 170.665257   

Iteration 39	Total Reward 6526.208654	Mean Reward 167.338683   Iteration 49	Total Reward 10077.719751	Mean Reward 205.667750   

Iteration 59	Total Reward 12759.869776	Mean Reward 216.268979   Iteration 69	Total Reward 13636.119646	Mean Reward 197.624922   

Iteration 79	Total Reward 15163.588648	Mean Reward 191.944160   Iteration 89	Total Reward 16979.688240	Mean Reward 190.783014   

Iteration 99	Total Reward 19393.662959	Mean Reward 195.895585   Total reward : 19393.662959086803
Mean reward : 193.93662959086802


Iteration 9	Total Reward 1801.941330	Mean Reward 200.215703   Iteration 19	Total Reward 4510.852389	Mean Reward 237.413284   

Iteration 29	Total Reward 6243.480387	Mean Reward 215.292427   

Iteration 39	Total Reward 7786.215134	Mean Reward 199.646542   

Iteration 49	Total Reward 11585.931172	Mean Reward 236.447575   

Iteration 59	Total Reward 16450.194120	Mean Reward 278.816849   

Iteration 69	Total Reward 18863.795722	Mean Reward 273.388344   

Iteration 79	Total Reward 20622.622451	Mean Reward 261.045854   

Iteration 89	Total Reward 22398.365049	Mean Reward 251.667023   

Iteration 99	Total Reward 26839.904106	Mean Reward 271.110142   Total reward : 26839.90410622224
Mean reward : 268.3990410622224


Iteration 9	Total Reward 2363.683174	Mean Reward 262.631464   Iteration 19	Total Reward 3841.629164	Mean Reward 202.191009   

Iteration 29	Total Reward 4551.453614	Mean Reward 156.946676   Iteration 39	Total Reward 6947.375932	Mean Reward 178.137844   

Iteration 49	Total Reward 11264.965091	Mean Reward 229.897247   Iteration 59	Total Reward 12159.982644	Mean Reward 206.101401   

Iteration 69	Total Reward 15175.439126	Mean Reward 219.933900   Iteration 79	Total Reward 18579.583805	Mean Reward 235.184605   

Iteration 89	Total Reward 21186.840277	Mean Reward 238.054385   

Iteration 99	Total Reward 25242.026472	Mean Reward 254.969964   Total reward : 26049.98026921236
Mean reward : 260.4998026921236


Iteration 9	Total Reward 3708.424890	Mean Reward 412.047210   Iteration 19	Total Reward 8021.646772	Mean Reward 422.191935   

Iteration 29	Total Reward 13781.201766	Mean Reward 475.213854   

Iteration 39	Total Reward 16327.970808	Mean Reward 418.665918   

Iteration 49	Total Reward 18136.980494	Mean Reward 370.142459   

Iteration 59	Total Reward 19979.508843	Mean Reward 338.635743   

Iteration 69	Total Reward 23720.669094	Mean Reward 343.777813   

Iteration 79	Total Reward 26328.648044	Mean Reward 333.274026   

Iteration 89	Total Reward 28197.123886	Mean Reward 316.821617   

Iteration 99	Total Reward 30069.477635	Mean Reward 303.732097   Total reward : 30889.032505496525
Mean reward : 308.8903250549653


Iteration 9	Total Reward 325.403800	Mean Reward 36.155978   Iteration 19	Total Reward 1157.910233	Mean Reward 60.942644   

Iteration 29	Total Reward 4332.564272	Mean Reward 149.398768   Iteration 39	Total Reward 6447.488776	Mean Reward 165.320225   

Iteration 49	Total Reward 7681.720095	Mean Reward 156.769798   Iteration 59	Total Reward 9487.671634	Mean Reward 160.807994   

Iteration 69	Total Reward 11712.428611	Mean Reward 169.745342   Iteration 79	Total Reward 13386.673999	Mean Reward 169.451570   

Iteration 89	Total Reward 15639.754371	Mean Reward 175.727577   

Iteration 99	Total Reward 17217.781672	Mean Reward 173.916987   Total reward : 17217.781671584784
Mean reward : 172.17781671584783


Iteration 9	Total Reward 771.641467	Mean Reward 85.737941   Iteration 19	Total Reward 1512.151866	Mean Reward 79.586940   

Iteration 29	Total Reward 2447.814648	Mean Reward 84.407402   Iteration 39	Total Reward 5034.585532	Mean Reward 129.091937   

Iteration 49	Total Reward 5034.585532	Mean Reward 102.746644   Iteration 59	Total Reward 5954.888202	Mean Reward 100.930309   

Iteration 69	Total Reward 8790.760471	Mean Reward 127.402326   

Iteration 79	Total Reward 10207.335757	Mean Reward 129.206782   Iteration 89	Total Reward 11978.631981	Mean Reward 134.591371   

Iteration 99	Total Reward 14681.892619	Mean Reward 148.301946   Total reward : 14681.892619118204
Mean reward : 146.81892619118204


Iteration 9	Total Reward 2297.855806	Mean Reward 255.317312   Iteration 19	Total Reward 5073.909918	Mean Reward 267.047890   

Iteration 29	Total Reward 8785.894870	Mean Reward 302.961892   Iteration 39	Total Reward 11518.107851	Mean Reward 295.336099   

Iteration 49	Total Reward 14511.894555	Mean Reward 296.161113   Iteration 59	Total Reward 18134.422692	Mean Reward 307.363096   

Iteration 69	Total Reward 22286.814396	Mean Reward 322.997310   Iteration 79	Total Reward 24090.119688	Mean Reward 304.938224   

Iteration 89	Total Reward 26256.982293	Mean Reward 295.022273   Iteration 99	Total Reward 29749.366949	Mean Reward 300.498656   

Total reward : 29749.36694914978
Mean reward : 297.49366949149777


Iteration 9	Total Reward 0.000000	Mean Reward 0.000000   

Iteration 19	Total Reward 1834.230980	Mean Reward 96.538473   

Iteration 29	Total Reward 5164.711040	Mean Reward 178.093484   

Iteration 39	Total Reward 9810.553974	Mean Reward 251.552666   

Iteration 49	Total Reward 15393.834406	Mean Reward 314.159886   

Iteration 59	Total Reward 18936.144035	Mean Reward 320.951594   

Iteration 69	Total Reward 19902.377849	Mean Reward 288.440259   

Iteration 79	Total Reward 23600.580557	Mean Reward 298.741526   

Iteration 89	Total Reward 27393.396705	Mean Reward 307.790974   Iteration 99	Total Reward 29247.238004	Mean Reward 295.426647   

Total reward : 29247.23800350633
Mean reward : 292.4723800350633


Iteration 9	Total Reward 1549.328738	Mean Reward 172.147638   

Iteration 19	Total Reward 3789.151339	Mean Reward 199.429018   Iteration 29	Total Reward 6979.753617	Mean Reward 240.681159   

Iteration 39	Total Reward 11280.712573	Mean Reward 289.249040   Iteration 49	Total Reward 14637.274354	Mean Reward 298.719885   

Iteration 59	Total Reward 17099.524541	Mean Reward 289.822450   

Iteration 69	Total Reward 21261.665259	Mean Reward 308.140076   Iteration 79	Total Reward 23526.039868	Mean Reward 297.797973   

Iteration 89	Total Reward 26787.592359	Mean Reward 300.984184   Iteration 99	Total Reward 27784.363865	Mean Reward 280.650140   

Total reward : 27784.363865367402
Mean reward : 277.843638653674


Iteration 9	Total Reward 1652.220470	Mean Reward 183.580052   Iteration 19	Total Reward 3427.740777	Mean Reward 180.407409   

Iteration 29	Total Reward 4956.605540	Mean Reward 170.917432   Iteration 39	Total Reward 6402.030255	Mean Reward 164.154622   

Iteration 49	Total Reward 7216.513052	Mean Reward 147.275777   Iteration 59	Total Reward 10374.343407	Mean Reward 175.836329   

Iteration 69	Total Reward 12026.563877	Mean Reward 174.298027   Iteration 79	Total Reward 12841.046673	Mean Reward 162.544895   

Iteration 89	Total Reward 16245.588442	Mean Reward 182.534702   Iteration 99	Total Reward 19059.103950	Mean Reward 192.516202   

Total reward : 19059.103949762884
Mean reward : 190.59103949762886


Iteration 9	Total Reward 1583.744491	Mean Reward 175.971610   Iteration 19	Total Reward 5501.578777	Mean Reward 289.556778   

Iteration 29	Total Reward 7793.626349	Mean Reward 268.745736   Iteration 39	Total Reward 9541.083091	Mean Reward 244.643156   

Iteration 49	Total Reward 10415.647400	Mean Reward 212.564233   Iteration 59	Total Reward 13929.940667	Mean Reward 236.100689   

Iteration 69	Total Reward 16952.089796	Mean Reward 245.682461   Iteration 79	Total Reward 17676.186308	Mean Reward 223.749194   

Iteration 89	Total Reward 20134.495108	Mean Reward 226.230282   Iteration 99	Total Reward 21809.531039	Mean Reward 220.298293   

Total reward : 22584.10577839314
Mean reward : 225.8410577839314


Iteration 9	Total Reward 592.293552	Mean Reward 65.810395   Iteration 19	Total Reward 3369.248642	Mean Reward 177.328876   

Iteration 29	Total Reward 7403.794781	Mean Reward 255.303268   Iteration 39	Total Reward 8326.404621	Mean Reward 213.497554   

Iteration 49	Total Reward 10941.733623	Mean Reward 223.300686   Iteration 59	Total Reward 12721.934307	Mean Reward 215.626005   

Iteration 69	Total Reward 14346.378027	Mean Reward 207.918522   

Iteration 79	Total Reward 19325.804974	Mean Reward 244.630443   Iteration 89	Total Reward 21226.484316	Mean Reward 238.499824   

Iteration 99	Total Reward 22378.259358	Mean Reward 226.043024   Total reward : 22378.25935770323
Mean reward : 223.78259357703232


Iteration 9	Total Reward 855.689310	Mean Reward 95.076590   Iteration 19	Total Reward 2667.705335	Mean Reward 140.405544   

Iteration 29	Total Reward 5449.362189	Mean Reward 187.909041   Iteration 39	Total Reward 7160.740809	Mean Reward 183.608739   

Iteration 49	Total Reward 8916.481895	Mean Reward 181.969018   Iteration 59	Total Reward 12595.587910	Mean Reward 213.484541   

Iteration 69	Total Reward 16174.056520	Mean Reward 234.406616   

Iteration 79	Total Reward 19484.263881	Mean Reward 246.636252   Iteration 89	Total Reward 20283.678251	Mean Reward 227.906497   

Iteration 99	Total Reward 21250.716465	Mean Reward 214.653702   Total reward : 21250.716465048685
Mean reward : 212.50716465048686


Iteration 9	Total Reward 1749.621244	Mean Reward 194.402360   Iteration 19	Total Reward 5365.559815	Mean Reward 282.397885   

Iteration 29	Total Reward 9335.642405	Mean Reward 321.918704   

Iteration 39	Total Reward 12410.123461	Mean Reward 318.208294   Iteration 49	Total Reward 14277.637209	Mean Reward 291.380351   

Iteration 59	Total Reward 16109.503540	Mean Reward 273.042433   

Iteration 69	Total Reward 17909.573055	Mean Reward 259.559030   

Iteration 79	Total Reward 20330.169534	Mean Reward 257.343918   

Iteration 89	Total Reward 23838.372328	Mean Reward 267.846880   

Iteration 99	Total Reward 27065.567171	Mean Reward 273.389567   Total reward : 27065.567171279577
Mean reward : 270.65567171279577


Iteration 9	Total Reward 1353.399416	Mean Reward 150.377713   Iteration 19	Total Reward 2388.041053	Mean Reward 125.686371   

Iteration 29	Total Reward 3683.252040	Mean Reward 127.008691   Iteration 39	Total Reward 5951.713488	Mean Reward 152.608038   

Iteration 49	Total Reward 6639.094421	Mean Reward 135.491723   

Iteration 59	Total Reward 8414.447183	Mean Reward 142.617749   Iteration 69	Total Reward 10703.534652	Mean Reward 155.123691   

Iteration 79	Total Reward 10769.932525	Mean Reward 136.328260   Iteration 89	Total Reward 12036.879104	Mean Reward 135.245833   

Iteration 99	Total Reward 12872.199668	Mean Reward 130.022219   Total reward : 12872.199667791705
Mean reward : 128.72199667791705


Iteration 9	Total Reward 1904.630080	Mean Reward 211.625564   

Iteration 19	Total Reward 3117.601544	Mean Reward 164.084292   

Iteration 29	Total Reward 3900.882977	Mean Reward 134.513206   Iteration 39	Total Reward 6026.181300	Mean Reward 154.517469   

Iteration 49	Total Reward 6078.089713	Mean Reward 124.042647   Iteration 59	Total Reward 6361.624757	Mean Reward 107.824148   

Iteration 69	Total Reward 10380.874970	Mean Reward 150.447463   Iteration 79	Total Reward 12181.580446	Mean Reward 154.197221   

Iteration 89	Total Reward 12559.626451	Mean Reward 141.119398   Iteration 99	Total Reward 15236.438945	Mean Reward 153.903424   

Total reward : 15236.438945256043
Mean reward : 152.36438945256043


Iteration 9	Total Reward 5718.195882	Mean Reward 635.355098   

Iteration 19	Total Reward 7567.769524	Mean Reward 398.303659   

Iteration 29	Total Reward 12325.174737	Mean Reward 425.006025   

Iteration 39	Total Reward 15285.813684	Mean Reward 391.943941   

Iteration 49	Total Reward 19020.650108	Mean Reward 388.176533   

Iteration 59	Total Reward 19963.281498	Mean Reward 338.360703   

Iteration 69	Total Reward 23588.812373	Mean Reward 341.866846   

Iteration 79	Total Reward 27322.820749	Mean Reward 345.858490   

Iteration 89	Total Reward 31114.202728	Mean Reward 349.597783   

Iteration 99	Total Reward 33043.713768	Mean Reward 333.774887   Total reward : 33043.71376815137
Mean reward : 330.4371376815137


Iteration 9	Total Reward 3636.590109	Mean Reward 404.065568   

Iteration 19	Total Reward 10621.535752	Mean Reward 559.028197   

Iteration 29	Total Reward 11351.012407	Mean Reward 391.414221   

Iteration 39	Total Reward 12786.219368	Mean Reward 327.851779   

Iteration 49	Total Reward 16406.641248	Mean Reward 334.829413   

Iteration 59	Total Reward 19173.305042	Mean Reward 324.971272   

Iteration 69	Total Reward 23655.815923	Mean Reward 342.837912   

Iteration 79	Total Reward 28761.190836	Mean Reward 364.065707   

Iteration 89	Total Reward 28761.190836	Mean Reward 323.159448   

Iteration 99	Total Reward 28761.190836	Mean Reward 290.517079   Total reward : 28761.19083588535
Mean reward : 287.6119083588535


Iteration 9	Total Reward 1501.361465	Mean Reward 166.817941   

Iteration 19	Total Reward 3079.904617	Mean Reward 162.100243   Iteration 29	Total Reward 3818.893070	Mean Reward 131.685968   

Iteration 39	Total Reward 4761.271377	Mean Reward 122.083881   

Iteration 49	Total Reward 7269.863377	Mean Reward 148.364559   Iteration 59	Total Reward 8906.826227	Mean Reward 150.963156   

Iteration 69	Total Reward 12257.242252	Mean Reward 177.641192   

Iteration 79	Total Reward 14736.720489	Mean Reward 186.540766   

Iteration 89	Total Reward 17976.497565	Mean Reward 201.983119   

Iteration 99	Total Reward 19613.460415	Mean Reward 198.115762   Total reward : 19613.460414812664
Mean reward : 196.13460414812664


Iteration 9	Total Reward 0.000000	Mean Reward 0.000000   Iteration 19	Total Reward 3900.320762	Mean Reward 205.280040   

Iteration 29	Total Reward 4863.089532	Mean Reward 167.692742   

Iteration 39	Total Reward 5850.481142	Mean Reward 150.012337   

Iteration 49	Total Reward 9153.378296	Mean Reward 186.803639   

Iteration 59	Total Reward 13750.336919	Mean Reward 233.056558   

Iteration 69	Total Reward 15677.368681	Mean Reward 227.208242   

Iteration 79	Total Reward 19575.082356	Mean Reward 247.785853   

Iteration 89	Total Reward 21193.940881	Mean Reward 238.134167   

Iteration 99	Total Reward 22632.199822	Mean Reward 228.608079   Total reward : 22632.19982176107
Mean reward : 226.3219982176107


Iteration 9	Total Reward 2378.172503	Mean Reward 264.241389   Iteration 19	Total Reward 2378.172503	Mean Reward 125.166974   

Iteration 29	Total Reward 3788.432918	Mean Reward 130.635618   Iteration 39	Total Reward 4523.188282	Mean Reward 115.979187   

Iteration 49	Total Reward 7354.303969	Mean Reward 150.087836   Iteration 59	Total Reward 8237.594184	Mean Reward 139.620240   

Iteration 69	Total Reward 9705.680481	Mean Reward 140.662036   

Iteration 79	Total Reward 11919.487215	Mean Reward 150.879585   

Iteration 89	Total Reward 15161.807163	Mean Reward 170.357384   

Iteration 99	Total Reward 17020.805750	Mean Reward 171.927331   Total reward : 17020.805749699914
Mean reward : 170.20805749699915


Iteration 9	Total Reward 1809.463071	Mean Reward 201.051452   Iteration 19	Total Reward 4025.245184	Mean Reward 211.855010   

Iteration 29	Total Reward 7369.510955	Mean Reward 254.121067   Iteration 39	Total Reward 9036.047235	Mean Reward 231.693519   

Iteration 49	Total Reward 11428.597516	Mean Reward 233.236684   

Iteration 59	Total Reward 12847.558613	Mean Reward 217.755231   Iteration 69	Total Reward 14514.094893	Mean Reward 210.349201   

Iteration 79	Total Reward 18057.305638	Mean Reward 228.573489   Iteration 89	Total Reward 22459.380790	Mean Reward 252.352593   

Iteration 99	Total Reward 24305.719577	Mean Reward 245.512319   Total reward : 24305.719577254014
Mean reward : 243.05719577254015


Iteration 9	Total Reward 1928.662577	Mean Reward 214.295842   Iteration 19	Total Reward 3410.998618	Mean Reward 179.526243   

Iteration 29	Total Reward 3664.077627	Mean Reward 126.347504   

Iteration 39	Total Reward 5719.175459	Mean Reward 146.645525   Iteration 49	Total Reward 5965.524597	Mean Reward 121.745400   

Iteration 59	Total Reward 7553.195288	Mean Reward 128.020259   

Iteration 69	Total Reward 10614.284551	Mean Reward 153.830211   

Iteration 79	Total Reward 11773.628461	Mean Reward 149.033272   

Iteration 89	Total Reward 12958.981186	Mean Reward 145.606530   

Iteration 99	Total Reward 14268.417109	Mean Reward 144.125425   Total reward : 14268.417108832804
Mean reward : 142.68417108832804


Iteration 9	Total Reward 1492.083705	Mean Reward 165.787078   

Iteration 19	Total Reward 2588.863128	Mean Reward 136.255954   Iteration 29	Total Reward 4138.877151	Mean Reward 142.719902   

Iteration 39	Total Reward 6365.858458	Mean Reward 163.227140   

Iteration 49	Total Reward 9053.678264	Mean Reward 184.768944   Iteration 59	Total Reward 9827.309752	Mean Reward 166.564572   

Iteration 69	Total Reward 10554.201159	Mean Reward 152.959437   

Iteration 79	Total Reward 12011.906496	Mean Reward 152.049449   Iteration 89	Total Reward 13683.779733	Mean Reward 153.750334   

Iteration 99	Total Reward 16233.582399	Mean Reward 163.975580   Total reward : 16233.582399099289
Mean reward : 162.33582399099288


Iteration 9	Total Reward 828.946460	Mean Reward 92.105162   

Iteration 19	Total Reward 2412.263841	Mean Reward 126.961255   

Iteration 29	Total Reward 3114.493447	Mean Reward 107.396326   

Iteration 39	Total Reward 4081.181834	Mean Reward 104.645688   

Iteration 49	Total Reward 4230.547989	Mean Reward 86.337714   

Iteration 59	Total Reward 6179.946289	Mean Reward 104.744852   

Iteration 69	Total Reward 6967.592177	Mean Reward 100.979597   

Iteration 79	Total Reward 6967.592177	Mean Reward 88.197369   

Iteration 89	Total Reward 9072.384929	Mean Reward 101.936909   

Iteration 99	Total Reward 12128.914113	Mean Reward 122.514284   Total reward : 12128.914113173558
Mean reward : 121.28914113173558


Iteration 9	Total Reward 1377.215296	Mean Reward 153.023922   

Iteration 19	Total Reward 2845.064320	Mean Reward 149.740227   Iteration 29	Total Reward 4672.304868	Mean Reward 161.113961   

Iteration 39	Total Reward 8805.679848	Mean Reward 225.786663   

Iteration 49	Total Reward 9765.036105	Mean Reward 199.286451   

Iteration 59	Total Reward 11467.900546	Mean Reward 194.371196   

Iteration 69	Total Reward 15366.260109	Mean Reward 222.699422   

Iteration 79	Total Reward 17641.058889	Mean Reward 223.304543   

Iteration 89	Total Reward 17641.058889	Mean Reward 198.214145   

Iteration 99	Total Reward 20580.062195	Mean Reward 207.879416   Total reward : 21569.885719898186
Mean reward : 215.69885719898187


Iteration 9	Total Reward 2086.717979	Mean Reward 231.857553   

Iteration 19	Total Reward 2090.737952	Mean Reward 110.038840   Iteration 29	Total Reward 2497.067997	Mean Reward 86.105793   

Iteration 39	Total Reward 3897.207017	Mean Reward 99.928385   Iteration 49	Total Reward 5429.000241	Mean Reward 110.795923   

Iteration 59	Total Reward 6271.622839	Mean Reward 106.298692   

Iteration 69	Total Reward 8820.972547	Mean Reward 127.840182   

Iteration 79	Total Reward 11199.466678	Mean Reward 141.765401   

Iteration 89	Total Reward 13271.300013	Mean Reward 149.115730   

Iteration 99	Total Reward 15241.597455	Mean Reward 153.955530   Total reward : 15241.597454970502
Mean reward : 152.41597454970503


Iteration 9	Total Reward 1707.495280	Mean Reward 189.721698   Iteration 19	Total Reward 5238.486360	Mean Reward 275.709808   

Iteration 29	Total Reward 6916.371685	Mean Reward 238.495575   

Iteration 39	Total Reward 10057.088364	Mean Reward 257.874061   Iteration 49	Total Reward 11759.978557	Mean Reward 239.999562   

Iteration 59	Total Reward 15004.142633	Mean Reward 254.307502   

Iteration 69	Total Reward 16579.311995	Mean Reward 240.279884   Iteration 79	Total Reward 18846.329343	Mean Reward 238.561131   

Iteration 89	Total Reward 23803.575772	Mean Reward 267.455908   Iteration 99	Total Reward 25951.377141	Mean Reward 262.135123   

Total reward : 25951.377140836557
Mean reward : 259.5137714083656


Iteration 9	Total Reward 1645.531994	Mean Reward 182.836888   

Iteration 19	Total Reward 5710.802544	Mean Reward 300.568555   Iteration 29	Total Reward 9798.926671	Mean Reward 337.894023   

Iteration 39	Total Reward 12160.317186	Mean Reward 311.803005   Iteration 49	Total Reward 15365.843975	Mean Reward 313.588653   

Iteration 59	Total Reward 18129.856156	Mean Reward 307.285698   

Iteration 69	Total Reward 19714.415769	Mean Reward 285.716171   

Iteration 79	Total Reward 23328.963326	Mean Reward 295.303333   

Iteration 89	Total Reward 24908.830482	Mean Reward 279.874500   

Iteration 99	Total Reward 28272.570944	Mean Reward 285.581525   Total reward : 28272.570943601717
Mean reward : 282.72570943601715


Iteration 9	Total Reward 2624.346759	Mean Reward 291.594084   Iteration 19	Total Reward 5266.235050	Mean Reward 277.170266   

Iteration 29	Total Reward 7629.161983	Mean Reward 263.074551   Iteration 39	Total Reward 9901.848694	Mean Reward 253.893556   

Iteration 49	Total Reward 11599.683159	Mean Reward 236.728228   

Iteration 59	Total Reward 14426.166612	Mean Reward 244.511299   

Iteration 69	Total Reward 18533.507333	Mean Reward 268.601556   Iteration 79	Total Reward 20260.301224	Mean Reward 256.459509   

Iteration 89	Total Reward 21149.546897	Mean Reward 237.635358   Iteration 99	Total Reward 22776.265142	Mean Reward 230.063284   

Total reward : 22776.265141996064
Mean reward : 227.76265141996063


Iteration 9	Total Reward 2606.602701	Mean Reward 289.622522   

Iteration 19	Total Reward 2606.602701	Mean Reward 137.189616   

Iteration 29	Total Reward 7166.807562	Mean Reward 247.131295   

Iteration 39	Total Reward 9611.993867	Mean Reward 246.461381   Iteration 49	Total Reward 9611.993867	Mean Reward 196.163140   

Iteration 59	Total Reward 12142.093314	Mean Reward 205.798192   Iteration 69	Total Reward 14594.641062	Mean Reward 211.516537   

Iteration 79	Total Reward 16829.600542	Mean Reward 213.032918   Iteration 89	Total Reward 18477.654007	Mean Reward 207.614090   

Iteration 99	Total Reward 22471.763553	Mean Reward 226.987511   Total reward : 22471.76355336734
Mean reward : 224.7176355336734


Iteration 9	Total Reward 1429.855285	Mean Reward 158.872809   Iteration 19	Total Reward 6424.876820	Mean Reward 338.151412   

Iteration 29	Total Reward 8401.472709	Mean Reward 289.705955   

Iteration 39	Total Reward 9889.548977	Mean Reward 253.578179   Iteration 49	Total Reward 12133.805948	Mean Reward 247.628693   

Iteration 59	Total Reward 15089.640230	Mean Reward 255.756614   Iteration 69	Total Reward 15994.016067	Mean Reward 231.797334   

Iteration 79	Total Reward 17720.328056	Mean Reward 224.307950   Iteration 89	Total Reward 20338.173245	Mean Reward 228.518801   

Iteration 99	Total Reward 21710.098742	Mean Reward 219.293927   Total reward : 21710.098742373353
Mean reward : 217.10098742373353


Iteration 9	Total Reward 2735.807615	Mean Reward 303.978624   Iteration 19	Total Reward 4435.274952	Mean Reward 233.435524   

Iteration 29	Total Reward 5395.647552	Mean Reward 186.056812   

Iteration 39	Total Reward 6214.619051	Mean Reward 159.349206   

Iteration 49	Total Reward 7205.594451	Mean Reward 147.052948   Iteration 59	Total Reward 8187.314492	Mean Reward 138.768042   

Iteration 69	Total Reward 9147.687091	Mean Reward 132.575175   

Iteration 79	Total Reward 13897.102236	Mean Reward 175.912687   

Iteration 89	Total Reward 16830.170277	Mean Reward 189.103037   Iteration 99	Total Reward 20189.246792	Mean Reward 203.931786   

Total reward : 20189.24679158989
Mean reward : 201.8924679158989


Iteration 9	Total Reward 2505.964513	Mean Reward 278.440501   Iteration 19	Total Reward 4853.027836	Mean Reward 255.422518   

Iteration 29	Total Reward 6836.647760	Mean Reward 235.746474   

Iteration 39	Total Reward 7829.898310	Mean Reward 200.766623   

Iteration 49	Total Reward 8450.171218	Mean Reward 172.452474   

Iteration 59	Total Reward 10252.398288	Mean Reward 173.769463   Iteration 69	Total Reward 11251.270773	Mean Reward 163.061895   

Iteration 79	Total Reward 13998.345937	Mean Reward 177.194252   

Iteration 89	Total Reward 14721.753513	Mean Reward 165.412961   Iteration 99	Total Reward 15981.981353	Mean Reward 161.434155   

Total reward : 15981.981353280895
Mean reward : 159.81981353280895


Iteration 9	Total Reward 2395.767676	Mean Reward 266.196408   

Iteration 19	Total Reward 5785.020348	Mean Reward 304.474755   

Iteration 29	Total Reward 8079.462463	Mean Reward 278.602154   Iteration 39	Total Reward 10619.730141	Mean Reward 272.300773   

Iteration 49	Total Reward 11416.653027	Mean Reward 232.992919   

Iteration 59	Total Reward 14579.568338	Mean Reward 247.111328   

Iteration 69	Total Reward 17225.543612	Mean Reward 249.645560   Iteration 79	Total Reward 18170.382137	Mean Reward 230.004837   

Iteration 89	Total Reward 21559.634809	Mean Reward 242.243088   

Iteration 99	Total Reward 24789.075712	Mean Reward 250.394704   Total reward : 24789.075711534937
Mean reward : 247.89075711534937


Iteration 9	Total Reward 2584.105853	Mean Reward 287.122873   Iteration 19	Total Reward 3517.376869	Mean Reward 185.125098   

Iteration 29	Total Reward 7192.663212	Mean Reward 248.022869   Iteration 39	Total Reward 8093.805945	Mean Reward 207.533486   

Iteration 49	Total Reward 9274.959871	Mean Reward 189.284895   Iteration 59	Total Reward 10636.651221	Mean Reward 180.282224   

Iteration 69	Total Reward 13397.877720	Mean Reward 194.172141   

Iteration 79	Total Reward 16645.293601	Mean Reward 210.699919   

Iteration 89	Total Reward 19758.757016	Mean Reward 222.008506   

Iteration 99	Total Reward 21437.144083	Mean Reward 216.536809   Total reward : 22129.03788758498
Mean reward : 221.2903788758498


Iteration 9	Total Reward 3503.001406	Mean Reward 389.222378   Iteration 19	Total Reward 6740.574065	Mean Reward 354.767056   

Iteration 29	Total Reward 8492.074768	Mean Reward 292.830164   Iteration 39	Total Reward 10925.025163	Mean Reward 280.128850   

Iteration 49	Total Reward 14768.443964	Mean Reward 301.396816   Iteration 59	Total Reward 18244.685854	Mean Reward 309.231964   

Iteration 69	Total Reward 20993.906538	Mean Reward 304.259515   

Iteration 79	Total Reward 23451.138132	Mean Reward 296.849850   

Iteration 89	Total Reward 25237.135420	Mean Reward 283.563319   

Iteration 99	Total Reward 26234.855402	Mean Reward 264.998539   Total reward : 27228.323529919515
Mean reward : 272.28323529919516


Iteration 9	Total Reward 2809.846645	Mean Reward 312.205183   

Iteration 19	Total Reward 4711.957279	Mean Reward 247.997752   Iteration 29	Total Reward 6616.546436	Mean Reward 228.156774   

Iteration 39	Total Reward 10423.246227	Mean Reward 267.262724   Iteration 49	Total Reward 13362.180740	Mean Reward 272.697566   

Iteration 59	Total Reward 16181.084207	Mean Reward 274.255665   Iteration 69	Total Reward 18060.353185	Mean Reward 261.744249   

Iteration 79	Total Reward 20879.256652	Mean Reward 264.294388   

Iteration 89	Total Reward 24651.579441	Mean Reward 276.984039   

Iteration 99	Total Reward 26553.690076	Mean Reward 268.219092   Total reward : 26553.690075541628
Mean reward : 265.5369007554163


Iteration 9	Total Reward 2677.764454	Mean Reward 297.529384   Iteration 19	Total Reward 5027.042409	Mean Reward 264.581179   

Iteration 29	Total Reward 6653.076791	Mean Reward 229.416441   Iteration 39	Total Reward 13595.008184	Mean Reward 348.589953   

Iteration 49	Total Reward 15359.896700	Mean Reward 313.467280   

Iteration 59	Total Reward 16451.015801	Mean Reward 278.830776   

Iteration 69	Total Reward 18982.592527	Mean Reward 275.110037   

Iteration 79	Total Reward 19865.102866	Mean Reward 251.456998   

Iteration 89	Total Reward 22353.926999	Mean Reward 251.167719   

Iteration 99	Total Reward 24129.592688	Mean Reward 243.733259   Total reward : 25088.985723832615
Mean reward : 250.88985723832616


Iteration 9	Total Reward 1585.405490	Mean Reward 176.156166   

Iteration 19	Total Reward 1585.405490	Mean Reward 83.442394   Iteration 29	Total Reward 3429.834520	Mean Reward 118.270156   

Iteration 39	Total Reward 7159.705699	Mean Reward 183.582197   Iteration 49	Total Reward 8056.152467	Mean Reward 164.411275   

Iteration 59	Total Reward 9004.134730	Mean Reward 152.612453   

Iteration 69	Total Reward 12705.929858	Mean Reward 184.143911   

Iteration 79	Total Reward 14601.894383	Mean Reward 184.834106   Iteration 89	Total Reward 15498.341151	Mean Reward 174.138665   

Iteration 99	Total Reward 17394.305675	Mean Reward 175.700057   Total reward : 17394.3056752272
Mean reward : 173.94305675227199


Iteration 9	Total Reward 2556.562781	Mean Reward 284.062531   Iteration 19	Total Reward 5526.391485	Mean Reward 290.862710   

Iteration 29	Total Reward 7181.813162	Mean Reward 247.648730   Iteration 39	Total Reward 7181.813162	Mean Reward 184.149055   

Iteration 49	Total Reward 9453.196139	Mean Reward 192.922370   Iteration 59	Total Reward 11050.957118	Mean Reward 187.304358   

Iteration 69	Total Reward 11050.957118	Mean Reward 160.158799   Iteration 79	Total Reward 14950.075907	Mean Reward 189.241467   

Iteration 89	Total Reward 16389.361627	Mean Reward 184.150131   

Iteration 99	Total Reward 18067.583438	Mean Reward 182.500843   Total reward : 18067.58343802862
Mean reward : 180.6758343802862


Iteration 9	Total Reward 3425.030920	Mean Reward 380.558991   Iteration 19	Total Reward 5097.760754	Mean Reward 268.303198   

Iteration 29	Total Reward 7586.510671	Mean Reward 261.603816   Iteration 39	Total Reward 11631.634227	Mean Reward 298.247031   

Iteration 49	Total Reward 11631.634227	Mean Reward 237.380290   

Iteration 59	Total Reward 11631.634227	Mean Reward 197.146343   

Iteration 69	Total Reward 14788.885659	Mean Reward 214.331676   

Iteration 79	Total Reward 19542.539195	Mean Reward 247.373914   Iteration 89	Total Reward 20420.266083	Mean Reward 229.441192   

Iteration 99	Total Reward 22909.016000	Mean Reward 231.404202   Total reward : 22909.015999831317
Mean reward : 229.09015999831317


Iteration 9	Total Reward 5258.522085	Mean Reward 584.280232   Iteration 19	Total Reward 8758.232769	Mean Reward 460.959619   

Iteration 29	Total Reward 10379.668391	Mean Reward 357.919600   Iteration 39	Total Reward 13386.826605	Mean Reward 343.251964   

Iteration 49	Total Reward 15792.352783	Mean Reward 322.292914   Iteration 59	Total Reward 20379.887564	Mean Reward 345.421823   

Iteration 69	Total Reward 24935.277210	Mean Reward 361.380829   

Iteration 79	Total Reward 26549.987318	Mean Reward 336.075789   Iteration 89	Total Reward 27575.853577	Mean Reward 309.841051   

Iteration 99	Total Reward 28956.666088	Mean Reward 292.491577   Total reward : 28956.66608822884
Mean reward : 289.56666088228843


Iteration 9	Total Reward 3685.594313	Mean Reward 409.510479   

Iteration 19	Total Reward 8333.743149	Mean Reward 438.618060   Iteration 29	Total Reward 13396.242405	Mean Reward 461.939393   

Iteration 39	Total Reward 17135.947586	Mean Reward 439.383271   Iteration 49	Total Reward 18932.770698	Mean Reward 386.383075   

Iteration 59	Total Reward 21468.089770	Mean Reward 363.865928   

Iteration 69	Total Reward 23264.912882	Mean Reward 337.172650   

Iteration 79	Total Reward 25844.848829	Mean Reward 327.149985   Iteration 89	Total Reward 27481.756344	Mean Reward 308.783779   

Iteration 99	Total Reward 30862.976112	Mean Reward 311.747233   Total reward : 30862.9761115728
Mean reward : 308.629761115728


Iteration 9	Total Reward 2172.759298	Mean Reward 241.417700   

Iteration 19	Total Reward 2172.759298	Mean Reward 114.355753   

Iteration 29	Total Reward 3815.807385	Mean Reward 131.579565   

Iteration 39	Total Reward 5455.684639	Mean Reward 139.889350   

Iteration 49	Total Reward 6201.824029	Mean Reward 126.567837   Iteration 59	Total Reward 7651.139928	Mean Reward 129.680338   

Iteration 69	Total Reward 8487.918014	Mean Reward 123.013305   Iteration 79	Total Reward 10259.120705	Mean Reward 129.862287   

Iteration 89	Total Reward 11769.028812	Mean Reward 132.236279   Iteration 99	Total Reward 13407.285454	Mean Reward 135.427126   

Total reward : 13407.285453613868
Mean reward : 134.07285453613866


Iteration 9	Total Reward 3473.055489	Mean Reward 385.895054   

Iteration 19	Total Reward 3473.055489	Mean Reward 182.792394   

Iteration 29	Total Reward 7192.624887	Mean Reward 248.021548   Iteration 39	Total Reward 7192.624887	Mean Reward 184.426279   

Iteration 49	Total Reward 9068.832678	Mean Reward 185.078218   Iteration 59	Total Reward 10859.508776	Mean Reward 184.059471   

Iteration 69	Total Reward 11797.612672	Mean Reward 170.979894   Iteration 79	Total Reward 13651.601752	Mean Reward 172.805085   

Iteration 89	Total Reward 13651.601752	Mean Reward 153.388784   

Iteration 99	Total Reward 14504.173953	Mean Reward 146.506808   Total reward : 14504.173953494092
Mean reward : 145.04173953494092


Iteration 9	Total Reward 2972.912852	Mean Reward 330.323650   Iteration 19	Total Reward 5400.272986	Mean Reward 284.224894   

Iteration 29	Total Reward 8182.530195	Mean Reward 282.156214   

Iteration 39	Total Reward 8182.530195	Mean Reward 209.808467   

Iteration 49	Total Reward 10584.529131	Mean Reward 216.010799   

Iteration 59	Total Reward 14405.203373	Mean Reward 244.155989   

Iteration 69	Total Reward 17857.177999	Mean Reward 258.799681   

Iteration 79	Total Reward 21327.435854	Mean Reward 269.967542   

Iteration 89	Total Reward 23073.102853	Mean Reward 259.248347   

Iteration 99	Total Reward 23073.102853	Mean Reward 233.061645   Total reward : 23073.10285340719
Mean reward : 230.7310285340719


Iteration 9	Total Reward 3292.015641	Mean Reward 365.779516   Iteration 19	Total Reward 8165.352536	Mean Reward 429.755397   

Iteration 29	Total Reward 8782.984241	Mean Reward 302.861526   

Iteration 39	Total Reward 12050.252899	Mean Reward 308.980844   

Iteration 49	Total Reward 16194.550035	Mean Reward 330.501021   

Iteration 59	Total Reward 17826.434872	Mean Reward 302.142964   

Iteration 69	Total Reward 18690.187733	Mean Reward 270.872286   

Iteration 79	Total Reward 20968.162716	Mean Reward 265.419781   

Iteration 89	Total Reward 24042.515641	Mean Reward 270.140625   

Iteration 99	Total Reward 26296.973100	Mean Reward 265.625991   Total reward : 27140.86729472006
Mean reward : 271.4086729472006


Iteration 9	Total Reward 1819.397071	Mean Reward 202.155230   Iteration 19	Total Reward 3638.794142	Mean Reward 191.515481   

Iteration 29	Total Reward 5764.829535	Mean Reward 198.787225   

Iteration 39	Total Reward 8203.240215	Mean Reward 210.339493   

Iteration 49	Total Reward 10029.510419	Mean Reward 204.683886   

Iteration 59	Total Reward 12456.218001	Mean Reward 211.122339   

Iteration 69	Total Reward 14886.668695	Mean Reward 215.748822   

Iteration 79	Total Reward 16993.989738	Mean Reward 215.113794   

Iteration 89	Total Reward 20380.315926	Mean Reward 228.992314   

Iteration 99	Total Reward 20795.714207	Mean Reward 210.057719   Total reward : 20795.71420665019
Mean reward : 207.95714206650192


Iteration 9	Total Reward 1651.813205	Mean Reward 183.534801   

Iteration 19	Total Reward 4265.530047	Mean Reward 224.501581   

Iteration 29	Total Reward 6178.943925	Mean Reward 213.067032   

Iteration 39	Total Reward 9084.740939	Mean Reward 232.942075   

Iteration 49	Total Reward 10894.170279	Mean Reward 222.330006   

Iteration 59	Total Reward 13844.328281	Mean Reward 234.649632   Iteration 69	Total Reward 15546.190796	Mean Reward 225.307113   

Iteration 79	Total Reward 17269.356393	Mean Reward 218.599448   

Iteration 89	Total Reward 19432.402002	Mean Reward 218.341596   

Iteration 99	Total Reward 22338.199016	Mean Reward 225.638374   Total reward : 22338.199015956932
Mean reward : 223.3819901595693


Iteration 9	Total Reward 1326.342015	Mean Reward 147.371335   

Iteration 19	Total Reward 3512.324945	Mean Reward 184.859208   

Iteration 29	Total Reward 5016.608918	Mean Reward 172.986514   Iteration 39	Total Reward 7426.650783	Mean Reward 190.426943   

Iteration 49	Total Reward 10017.411282	Mean Reward 204.436965   

Iteration 59	Total Reward 13192.633067	Mean Reward 223.603950   

Iteration 69	Total Reward 13704.913591	Mean Reward 198.621936   

Iteration 79	Total Reward 16002.261019	Mean Reward 202.560266   

Iteration 89	Total Reward 16646.847866	Mean Reward 187.043234   

Iteration 99	Total Reward 18977.767882	Mean Reward 191.694625   Total reward : 18977.76788206368
Mean reward : 189.7776788206368


Iteration 9	Total Reward 1865.598938	Mean Reward 207.288771   

Iteration 19	Total Reward 6068.312639	Mean Reward 319.384876   Iteration 29	Total Reward 10945.461675	Mean Reward 377.429713   

Iteration 39	Total Reward 13292.539822	Mean Reward 340.834354   

Iteration 49	Total Reward 16407.749267	Mean Reward 334.852026   

Iteration 59	Total Reward 23211.436836	Mean Reward 393.414184   

Iteration 69	Total Reward 25510.878645	Mean Reward 369.722879   

Iteration 79	Total Reward 31054.478768	Mean Reward 393.094668   

Iteration 89	Total Reward 32808.671117	Mean Reward 368.636754   

Iteration 99	Total Reward 33512.963504	Mean Reward 338.514783   Total reward : 34334.35638376391
Mean reward : 343.34356383763907


Iteration 9	Total Reward 2417.446840	Mean Reward 268.605204   Iteration 19	Total Reward 3971.015194	Mean Reward 209.000800   

Iteration 29	Total Reward 4773.091306	Mean Reward 164.589355   Iteration 39	Total Reward 6515.521317	Mean Reward 167.064649   

Iteration 49	Total Reward 8130.892045	Mean Reward 165.936572   

Iteration 59	Total Reward 11623.779611	Mean Reward 197.013214   Iteration 69	Total Reward 13344.324171	Mean Reward 193.396002   

Iteration 79	Total Reward 16155.948855	Mean Reward 204.505682   

Iteration 89	Total Reward 19528.023190	Mean Reward 219.415991   

Iteration 99	Total Reward 20505.502495	Mean Reward 207.126288   Total reward : 20505.502495015742
Mean reward : 205.05502495015742


Iteration 9	Total Reward 1778.767692	Mean Reward 197.640855   

Iteration 19	Total Reward 4951.864443	Mean Reward 260.624444   

Iteration 29	Total Reward 6260.567404	Mean Reward 215.881635   

Iteration 39	Total Reward 8996.565278	Mean Reward 230.681161   

Iteration 49	Total Reward 12981.083515	Mean Reward 264.920072   

Iteration 59	Total Reward 15647.007291	Mean Reward 265.203513   Iteration 69	Total Reward 18467.893255	Mean Reward 267.650627   

Iteration 79	Total Reward 20443.781371	Mean Reward 258.782043   

Iteration 89	Total Reward 22805.802974	Mean Reward 256.244977   

Iteration 99	Total Reward 26427.939296	Mean Reward 266.948882   Total reward : 26427.939296048236
Mean reward : 264.27939296048237


Iteration 9	Total Reward 3687.899783	Mean Reward 409.766643   

Iteration 19	Total Reward 4687.799199	Mean Reward 246.726274   Iteration 29	Total Reward 8337.662182	Mean Reward 287.505592   

Iteration 39	Total Reward 11702.150760	Mean Reward 300.055148   

Iteration 49	Total Reward 14835.763669	Mean Reward 302.770687   

Iteration 59	Total Reward 16811.209257	Mean Reward 284.935750   

Iteration 69	Total Reward 21683.551942	Mean Reward 314.254376   

Iteration 79	Total Reward 24380.668467	Mean Reward 308.616057   

Iteration 89	Total Reward 25110.971168	Mean Reward 282.145743   

Iteration 99	Total Reward 27062.063510	Mean Reward 273.354177   Total reward : 28007.86826474672
Mean reward : 280.0786826474672


Iteration 9	Total Reward 2244.687895	Mean Reward 249.409766   

Iteration 19	Total Reward 5970.329831	Mean Reward 314.227886   Iteration 29	Total Reward 7291.191380	Mean Reward 251.420392   

Iteration 39	Total Reward 7985.249861	Mean Reward 204.749996   Iteration 49	Total Reward 10335.259640	Mean Reward 210.923666   

Iteration 59	Total Reward 12296.840707	Mean Reward 208.421029   

Iteration 69	Total Reward 15140.145667	Mean Reward 219.422401   

Iteration 79	Total Reward 18793.468842	Mean Reward 237.892011   Iteration 89	Total Reward 21988.012843	Mean Reward 247.056324   

Iteration 99	Total Reward 23844.978153	Mean Reward 240.858365   Total reward : 23844.978153240136
Mean reward : 238.44978153240135


Iteration 9	Total Reward 2284.027495	Mean Reward 253.780833   Iteration 19	Total Reward 3963.527226	Mean Reward 208.606696   

Iteration 29	Total Reward 6329.926829	Mean Reward 218.273339   Iteration 39	Total Reward 9538.980330	Mean Reward 244.589239   

Iteration 49	Total Reward 12369.095588	Mean Reward 252.430522   Iteration 59	Total Reward 15306.040889	Mean Reward 259.424422   

Iteration 69	Total Reward 17614.765549	Mean Reward 255.286457   

Iteration 79	Total Reward 20623.967956	Mean Reward 261.062886   

Iteration 89	Total Reward 21332.653960	Mean Reward 239.692741   

Iteration 99	Total Reward 22846.739387	Mean Reward 230.775145   Total reward : 22846.739387043068
Mean reward : 228.46739387043067


Iteration 9	Total Reward 1594.902749	Mean Reward 177.211417   

Iteration 19	Total Reward 5332.415366	Mean Reward 280.653440   

Iteration 29	Total Reward 8900.787357	Mean Reward 306.923702   

Iteration 39	Total Reward 11662.345881	Mean Reward 299.034510   

Iteration 49	Total Reward 14476.388248	Mean Reward 295.436495   

Iteration 59	Total Reward 18579.344776	Mean Reward 314.904149   Iteration 69	Total Reward 21811.347406	Mean Reward 316.106484   

Iteration 79	Total Reward 24583.252784	Mean Reward 311.180415   

Iteration 89	Total Reward 26336.615181	Mean Reward 295.917025   

Iteration 99	Total Reward 28204.108618	Mean Reward 284.889986   Total reward : 29125.053123675465
Mean reward : 291.2505312367546


Iteration 9	Total Reward 2142.976664	Mean Reward 238.108518   

Iteration 19	Total Reward 4984.308523	Mean Reward 262.332028   

Iteration 29	Total Reward 6006.224213	Mean Reward 207.111180   Iteration 39	Total Reward 7350.291104	Mean Reward 188.469003   

Iteration 49	Total Reward 8079.921503	Mean Reward 164.896357   

Iteration 59	Total Reward 9309.384844	Mean Reward 157.786184   

Iteration 69	Total Reward 10760.858833	Mean Reward 155.954476   

Iteration 79	Total Reward 11475.467624	Mean Reward 145.259084   

Iteration 89	Total Reward 12768.347180	Mean Reward 143.464575   

Iteration 99	Total Reward 16092.612205	Mean Reward 162.551638   Total reward : 16092.612204728983
Mean reward : 160.92612204728982


Iteration 9	Total Reward 2728.230094	Mean Reward 303.136677   

Iteration 19	Total Reward 5348.998447	Mean Reward 281.526234   

Iteration 29	Total Reward 8166.568120	Mean Reward 281.605797   

Iteration 39	Total Reward 10078.324591	Mean Reward 258.418579   

Iteration 49	Total Reward 14159.946174	Mean Reward 288.978493   

Iteration 59	Total Reward 14159.946174	Mean Reward 239.999088   

Iteration 69	Total Reward 16685.526875	Mean Reward 241.819230   

Iteration 79	Total Reward 18271.593061	Mean Reward 231.285988   Iteration 89	Total Reward 18646.510635	Mean Reward 209.511355   

Iteration 99	Total Reward 22189.213127	Mean Reward 224.133466   Total reward : 22189.213127465802
Mean reward : 221.89213127465803


Iteration 9	Total Reward 902.313770	Mean Reward 100.257086   Iteration 19	Total Reward 3593.259288	Mean Reward 189.118910   

Iteration 29	Total Reward 5397.886828	Mean Reward 186.134029   Iteration 39	Total Reward 7112.074339	Mean Reward 182.360880   

Iteration 49	Total Reward 9629.410124	Mean Reward 196.518574   

Iteration 59	Total Reward 9629.410124	Mean Reward 163.210341   

Iteration 69	Total Reward 13670.971367	Mean Reward 198.130020   Iteration 79	Total Reward 15558.268088	Mean Reward 196.940102   

Iteration 89	Total Reward 17745.273774	Mean Reward 199.385099   

Iteration 99	Total Reward 22182.852482	Mean Reward 224.069217   Total reward : 22846.398671163857
Mean reward : 228.46398671163857


Iteration 9	Total Reward 861.942843	Mean Reward 95.771427   Iteration 19	Total Reward 3521.328998	Mean Reward 185.333105   

Iteration 29	Total Reward 5281.993497	Mean Reward 182.137707   Iteration 39	Total Reward 9401.856612	Mean Reward 241.073246   

Iteration 49	Total Reward 12015.796576	Mean Reward 245.220338   

Iteration 59	Total Reward 14261.969275	Mean Reward 241.728293   

Iteration 69	Total Reward 16454.410869	Mean Reward 238.469723   

Iteration 79	Total Reward 20663.526151	Mean Reward 261.563622   

Iteration 89	Total Reward 22387.411837	Mean Reward 251.543953   

Iteration 99	Total Reward 24741.156329	Mean Reward 249.910670   Total reward : 24741.156328728925
Mean reward : 247.41156328728925


Iteration 9	Total Reward 77.716591	Mean Reward 8.635177   

Iteration 19	Total Reward 406.746308	Mean Reward 21.407700   Iteration 29	Total Reward 562.179490	Mean Reward 19.385500   

Iteration 39	Total Reward 1056.258570	Mean Reward 27.083553   

Iteration 49	Total Reward 1676.275142	Mean Reward 34.209697   

Iteration 59	Total Reward 2005.304859	Mean Reward 33.988218   

Iteration 69	Total Reward 2005.304859	Mean Reward 29.062389   Iteration 79	Total Reward 2308.784549	Mean Reward 29.225121   

Iteration 89	Total Reward 2824.219262	Mean Reward 31.732801   Iteration 99	Total Reward 2979.652445	Mean Reward 30.097499   

Total reward : 2979.652444858364
Mean reward : 29.79652444858364


Iteration 9	Total Reward 2694.894544	Mean Reward 299.432727   Iteration 19	Total Reward 4552.687636	Mean Reward 239.615139   

Iteration 29	Total Reward 7921.911949	Mean Reward 273.169378   

Iteration 39	Total Reward 9712.189102	Mean Reward 249.030490   

Iteration 49	Total Reward 13716.389722	Mean Reward 279.926321   Iteration 59	Total Reward 14696.533257	Mean Reward 249.093784   

Iteration 69	Total Reward 17465.516902	Mean Reward 253.123433   

Iteration 79	Total Reward 19425.803973	Mean Reward 245.896253   

Iteration 89	Total Reward 20281.756314	Mean Reward 227.884902   

Iteration 99	Total Reward 24904.357380	Mean Reward 251.559165   Total reward : 24904.35737953922
Mean reward : 249.04357379539218


Iteration 9	Total Reward 1352.166817	Mean Reward 150.240757   Iteration 19	Total Reward 3210.671143	Mean Reward 168.982692   

Iteration 29	Total Reward 5255.723868	Mean Reward 181.231858   Iteration 39	Total Reward 7307.276559	Mean Reward 187.366066   

Iteration 49	Total Reward 8841.877132	Mean Reward 180.446472   Iteration 59	Total Reward 10425.451355	Mean Reward 176.702565   

Iteration 69	Total Reward 14119.674588	Mean Reward 204.632965   Iteration 79	Total Reward 15692.942515	Mean Reward 198.644842   

Iteration 89	Total Reward 19518.777112	Mean Reward 219.312102   Iteration 99	Total Reward 21235.972450	Mean Reward 214.504772   

Total reward : 21235.972449661363
Mean reward : 212.35972449661364


Iteration 9	Total Reward 3083.171744	Mean Reward 342.574638   Iteration 19	Total Reward 6564.678657	Mean Reward 345.509403   

Iteration 29	Total Reward 7169.336870	Mean Reward 247.218513   Iteration 39	Total Reward 11077.448682	Mean Reward 284.037146   

Iteration 49	Total Reward 14117.786993	Mean Reward 288.118102   

Iteration 59	Total Reward 15738.492001	Mean Reward 266.754102   

Iteration 69	Total Reward 18178.171332	Mean Reward 263.451758   

Iteration 79	Total Reward 18918.215913	Mean Reward 239.471088   Iteration 89	Total Reward 22462.064372	Mean Reward 252.382746   

Iteration 99	Total Reward 23084.935403	Mean Reward 233.181166   Total reward : 23084.935402878476
Mean reward : 230.84935402878477


Iteration 9	Total Reward 1634.046944	Mean Reward 181.560772   Iteration 19	Total Reward 3780.585957	Mean Reward 198.978208   

Iteration 29	Total Reward 4469.775417	Mean Reward 154.130187   

Iteration 39	Total Reward 5464.518891	Mean Reward 140.115869   

Iteration 49	Total Reward 6245.453406	Mean Reward 127.458233   

Iteration 59	Total Reward 9260.050102	Mean Reward 156.950002   

Iteration 69	Total Reward 12512.024272	Mean Reward 181.333685   

Iteration 79	Total Reward 16665.387408	Mean Reward 210.954271   

Iteration 89	Total Reward 18258.099941	Mean Reward 205.147190   

Iteration 99	Total Reward 21722.273616	Mean Reward 219.416905   Total reward : 21722.273615904287
Mean reward : 217.22273615904288


Iteration 9	Total Reward 3386.848979	Mean Reward 376.316553   

Iteration 19	Total Reward 7443.682304	Mean Reward 391.772753   

Iteration 29	Total Reward 8311.535274	Mean Reward 286.604665   

Iteration 39	Total Reward 12260.218748	Mean Reward 314.364583   

Iteration 49	Total Reward 13575.174400	Mean Reward 277.044376   

Iteration 59	Total Reward 15310.880340	Mean Reward 259.506446   