In [2]:
import numpy as np
import random
import json

In [3]:
class GenerateJSON:

    def __init__(self, numberOfRegs, typesOfPlaces, T, upperBound, maxNumberCourts):
        """
        :param numberOfRegs: -> int, number of regions
        :param typesOfPlaces: -> int, number of basketball courts types
        :param T: -> int, total number of years
        :param upperBound: -> int, maximum number of projects for each regions
        :param maxNumberCourts: -> int, maximum number of basketball courts all types for each region
        """
        self.numberOfRegs = numberOfRegs
        self.typesOfPlaces = typesOfPlaces
        self.namesOfRegs = ['Region' + '_' + str(i) for i in range(1, numberOfRegs + 1)]
        self.w = np.zeros((self.numberOfRegs, self.typesOfPlaces)).astype(np.int64)
        self.b = np.zeros((self.numberOfRegs,)).astype(np.int64)
        self.T = T
        self.upperBound = upperBound
        self.totalBudget = random.randint(3, 10)
        self.totalProjPerYear = random.randint(1, 4)
        self.maxNumberCourts = maxNumberCourts

    def gap(self):

        # Зполняем матрицу приоритетностей площадок
        for i in range(self.numberOfRegs):
            for j in range(self.typesOfPlaces):
                self.w[i][j] = int(np.random.randint(1, 18, (1, 1))[0][0])

        # Заполняем матрицу количесвта баскетболистов
        for i in range(self.numberOfRegs):
            self.b[i] = int(np.random.randint(3245, 10001, (1, 1))[0][0])

        # Словарь стоимостей типов площадок
        # _cost = {'Type of basketball court' + '_' + str(i): random.randint(12000000, 20000000) for i in
        #          range(1, self.typesOfPlaces + 1)}

        # Словарь рагов регионов
        _p = {self.namesOfRegs[j]: {'Rank': random.randint(1, 11)} for j in range(len(self.namesOfRegs))}

        # Бюджет региона на строительство объектов
        _a = {self.namesOfRegs[j]: {'Regs budget': random.randint(5000000, 7500000)} for j in range(len(self.namesOfRegs))}

        # Словарь вмещаемости количества людей для каждого типа площадки
        _e = {'Type of basketball court' + '_' + str(e): random.randint(51, 142) for e in range(1, self.typesOfPlaces + 1)}

        # Словарь приориттностей для каждого региона
        _w = {name: {'Type of basketball court': {}} for name in self.namesOfRegs}
        w_keys = list(_w.keys())
        squared = ['Type of basketball court' + '_' + str(j) for j in range(1, self.typesOfPlaces + 1)]

        for i in range(len(w_keys)):
            for j in range(len(squared)):
                _w[w_keys[i]]['Type of basketball court'][squared[j]] = {'Priority': int(self.w[i][j]), 'cost': random.randint(12000000, 20000000),
                                                                         'Regional costs': random.randint(3000000, 5200000)}

        # Словарь количества баскетболистов для каждого региона
        _b = {name: {'Number of players': None} for name in self.namesOfRegs}
        b_keys = list(_b.keys())
        for ind in range(len(b_keys)):
            _b[b_keys[ind]]['Number of players'] = int(self.b[ind])

        for key in list(_w.keys()):
            _w[key].update(_b[key])
            _w[key].update(_p[key])
            _w[key].update(_a[key])

        cost_and_capacity = {i: {"Capacity": _e[i]} for i in list(_e.keys())}

        mean_cost = []
        for i in list(_w.keys()):
            for j in list(_w[i]["Type of basketball court"].keys()):
                mean_cost.append(_w[i]["Type of basketball court"][j]['cost'])

        mean_cost = sum(mean_cost) / len(mean_cost)
        totalBudget = int(self.upperBound * mean_cost * self.T * (0.4 + self.totalBudget * 0.2))
        totalProjPerYear = int((self.numberOfRegs / self.T) * (0.7 + self.totalProjPerYear * 0.2))

        data = {'Periods': self.T,
                'Limit on the number of projects per year': totalProjPerYear,
                'Total budget': totalBudget,
                'The maximum number of basketball courts in the region per year': self.upperBound,
                'The maximum number of basketball courts all types for each region': self.maxNumberCourts,
                'Regions': _w,
                'Types of basketball courts': cost_and_capacity}

        with open('data.json', 'w', encoding='utf-8') as file:
            json.dump(data, file, indent=4, ensure_ascii=False)


numberOfRegs = 2
typesOfPlaces = 2
T = 3
upperBound, maxNumberCourts = 1, 3

obj = GenerateJSON(numberOfRegs, typesOfPlaces, T, upperBound, maxNumberCourts)
obj.gap()

In [4]:
from ortools.linear_solver import pywraplp


def LinearProgrammingExample(w, b, cost, p, e, T, w_dict, a, u, upperBound, totalBudget, totalProjPerYear, numberOfRegs,
                             typesOfPlaces, maxNumberCourts):
    years = ['year' + '_' + str(i) for i in range(1, T + 1)]
    res = []

    solver = pywraplp.Solver.CreateSolver('SCIP')
    solver.EnableOutput()
    variables = []

    # Создаем переменные
    for t in range(1, T + 1):
        for key in list(w_dict.keys()):
            for k in list(w_dict[key].keys()):
                variables.append(solver.IntVar(0, upperBound, (key + '.' + k + '.' + years[t - 1])))

    # Ограничения на количесвто объектов в год
    for i in range(0, len(variables), numberOfRegs * typesOfPlaces):
        solver.Add(sum(variables[i:i + (numberOfRegs * typesOfPlaces)]) <= totalProjPerYear)
        print(variables[i:i + (numberOfRegs * typesOfPlaces)])

    # Ограничения на стоиммость объектов за T лет
    tot = []
    for i in range(0, len(variables), typesOfPlaces):
        l = variables[i: i + typesOfPlaces]
        for j in range(len(l)):
            tot.append(l[j] * cost[int(i / typesOfPlaces) % numberOfRegs][j])

    solver.Add(sum(tot) <= totalBudget)

    vars = dict(sorted({str(var): var for var in variables}.items(), key=lambda x: x[0]))

    # Ограничение на максимальное количесвто площадок в каждом регионе
    for i in range(0, len(list(vars.values())), T * typesOfPlaces):
        solver.Add(sum(list(vars.values())[i: i + (T * typesOfPlaces)]) <= maxNumberCourts)

    # Ограничение на колиество баскетболистов
    for i in range(0, len(list(vars.values())), T * typesOfPlaces):
        regs = []
        v = list(vars.values())[i: i + (T * typesOfPlaces)]
        for j in range(0, len(v), T):
            regs.append(sum(v[j:j + T]) * e[int(j / T) % T])

        solver.Add(sum(regs) <= b[int(i / (T * typesOfPlaces))])

    # Ограничение на затраты регионов
    for i in range(0, len(list(vars.values())), T * typesOfPlaces):
        regs = []
        v = list(vars.values())[i: i + (T * typesOfPlaces)]
        for j in range(0, len(v), T):
            regs.append(sum(v[j:j + T]) * u[int(i / (T * typesOfPlaces) % (T * typesOfPlaces))][int(j / T)])

        solver.Add(sum(regs) <= a[int(i / (T * typesOfPlaces))])
    
    print('Number of constraints =', solver.NumConstraints())

    obj = []

    for i in range(0, len(list(vars.values())), T * typesOfPlaces):
        v = list(vars.values())[i: i + (T * typesOfPlaces)]
        for j in range(0, len(v), T):
            coef = (w[int(i / (T * typesOfPlaces))][int(j / T) % T] + p[int(i / (T * typesOfPlaces))])
            for t in range(T):
                obj.append(coef * v[j:j + T][t] * ((T + 1 - (t + 1)) / T))

    solver.Maximize(sum(obj))
    status = solver.Solve()

    if status == pywraplp.Solver.OPTIMAL:
        for key in list(vars.keys()):
            if int(vars[key].solution_value()) > 0:
                reg, court, year = tuple(key.split('.'))
                res.append((reg, court, year, int(vars[key].solution_value())))

        ans = {i[0]: {i[1]: {i[2]: i[3]}} for i in res}
        print('Objective value =', solver.Objective().Value())

    return ans

In [5]:
def get_data(json_path):
    with open(json_path) as file:
        data = json.load(file)

    T = data["Periods"]
    maxNumberCourts = data['The maximum number of basketball courts all types for each region']

    w_dict = {i: {
        j: data["Regions"][i]["Type of basketball court"][j]["Priority"]
        for
        j in list(data["Regions"][i]["Type of basketball court"].keys())
    } for i in
        list(data["Regions"].keys())}
    b = {i: data["Regions"][i]["Number of players"] for i in
         list(data["Regions"].keys())}
    a = {i: data["Regions"][i]["Regs budget"] for i in
         list(data["Regions"].keys())}
    u = {
        i: {j: data["Regions"][i]["Type of basketball court"][j]["Regional costs"]
            for
            j in list(data["Regions"][i]["Type of basketball court"].keys())
            } for i in
        list(data["Regions"].keys())}
    cost = {
        i: {j: data["Regions"][i]["Type of basketball court"][j]["cost"]
            for
            j in list(data["Regions"][i]["Type of basketball court"].keys())
            } for i in
        list(data["Regions"].keys())}
    e = {i: data["Types of basketball courts"][i]["Capacity"] for i in list(data["Types of basketball courts"].keys())}
    p = {i: data["Regions"][i]["Rank"] for i in
         list(data["Regions"].keys())}

    # Сортируем ключи json, чтобы учесть неупорядоченность введенных данных
    b = dict(sorted(b.items(), key=lambda x: int(x[0][x[0].index('_') + 1])))
    a = dict(sorted(a.items(), key=lambda x: int(x[0][x[0].index('_') + 1])))
    cost = dict(sorted({i: dict(sorted(cost[i].items(), key=lambda x: int(x[0][x[0].index('_') + 1]))) for i in
                        list(cost.keys())}.items(),
                       key=lambda x: int(x[0][x[0].index('_') + 1])))
    u = dict(sorted({i: dict(sorted(u[i].items(), key=lambda x: int(x[0][x[0].index('_') + 1]))) for i in
                        list(u.keys())}.items(),
                       key=lambda x: int(x[0][x[0].index('_') + 1])))
    p = dict(sorted(p.items(), key=lambda x: int(x[0][x[0].index('_') + 1])))
    e = dict(sorted(e.items(), key=lambda x: int(x[0][x[0].index('_') + 1])))
    w_dict = dict(sorted({i: dict(sorted(w_dict[i].items(), key=lambda x: int(x[0][x[0].index('_') + 1]))) for i in
                          list(w_dict.keys())}.items(),
                         key=lambda x: int(x[0][x[0].index('_') + 1])))

    cost = [[*list((list(cost[key].values())))] for key in list(cost.keys())]
    w = [[*list((list(w_dict[key].values())))] for key in list(w_dict.keys())]
    u = [[*list((list(u[key].values())))] for key in list(u.keys())]
    e = [e[i] for i in list(e.keys())]
    p = [p[i] for i in list(p.keys())]
    b = [b[i] for i in list(b.keys())]
    a = [a[i] for i in list(a.keys())]
    upperBound, totalBudget, totalProjPerYear = data["The maximum number of basketball courts in the region per year"], \
                                                data["Total budget"], data["Limit on the number of projects per year"]

    numberOfRegs, typesOfPlaces = len(data["Regions"]), len(data["Types of basketball courts"])

    return w, b, cost, p, e, T, w_dict, a, u, upperBound, totalBudget, totalProjPerYear, numberOfRegs, typesOfPlaces, maxNumberCourts

In [6]:
path = 'data.json'
transformFunction = get_data(path)
solverFunction = LinearProgrammingExample(*transformFunction)
print(json.dumps(solverFunction, indent=4, ensure_ascii=False))

[Region_1.Type of basketball court_1.year_1, Region_1.Type of basketball court_2.year_1, Region_2.Type of basketball court_1.year_1, Region_2.Type of basketball court_2.year_1]
[Region_1.Type of basketball court_1.year_2, Region_1.Type of basketball court_2.year_2, Region_2.Type of basketball court_1.year_2, Region_2.Type of basketball court_2.year_2]
[Region_1.Type of basketball court_1.year_3, Region_1.Type of basketball court_2.year_3, Region_2.Type of basketball court_1.year_3, Region_2.Type of basketball court_2.year_3]
Number of constraints = 10
Objective value = 0.0
{}
feasible solution found by trivial heuristic after 0.0 seconds, objective value 0.000000e+00
presolving:
   Deactivated symmetry handling methods, since SCIP was built without symmetry detector (SYM=none).
presolving (1 rounds: 1 fast, 1 medium, 1 exhaustive):
 12 deleted vars, 10 deleted constraints, 0 added constraints, 12 tightened bounds, 0 added holes, 0 changed sides, 0 changed coefficients
 0 implications, 

In [68]:
import numpy as np
import ray.rllib.agents.ppo as ppo
import ray.rllib.agents.dqn as dqn
import ray.rllib.agents.a3c as a3c
from ray.tune.logger import pretty_print
import gym

In [69]:
import random
class PrepData:

  def __init__(self, json_path):
    self.json_path = json_path
    self.vars = []
    self.coefs = []
    self.upperBound = None

    with open(self.json_path) as file:
      self.data = json.load(file)

  
  def set_goal_coef(self):
    ex = {}
    T = self.data["Periods"]
    years = ['year' + '_' + str(i) for i in range(1, T + 1)]
    self.upperBound = self.data["The maximum number of basketball courts in the region per year"]

    w_dict = {i: {
        j: self.data["Regions"][i]["Type of basketball court"][j]["Priority"]
        for
        j in list(self.data["Regions"][i]["Type of basketball court"].keys())
    } for i in
        list(self.data["Regions"].keys())}

    p = {i: self.data["Regions"][i]["Rank"] for i in
         list(self.data["Regions"].keys())}

    # Sorting
    p = dict(sorted(p.items(), key=lambda x: int(x[0][x[0].index('_') + 1])))
    w_dict = dict(sorted({i: dict(sorted(w_dict[i].items(), key=lambda x: int(x[0][x[0].index('_') + 1]))) for i in
                          list(w_dict.keys())}.items(),
                         key=lambda x: int(x[0][x[0].index('_') + 1])))
    
    # Creating vars
    for t in range(1, T + 1):
      for key in list(w_dict.keys()):
        for k in list(w_dict[key].keys()):
          self.vars.append(key + '.' + k + '.' + years[t - 1])
          self.coefs.append((p[key] + w_dict[key][k]) * ((T + 1 - t) / T))
          ex[key + '.' + k + '.' + years[t - 1]] = (p[key] + w_dict[key][k]) * ((T + 1 - t) / T)
    
    return ex

  def get_constraints(self):
    constraits = []
    numberOfRegs, typesOfPlaces = len(self.data["Regions"]), len(self.data["Types of basketball courts"])

    maxNumberCourts = self.data["The maximum number of basketball courts all types for each region"]

    cost = {
        i: {j: self.data["Regions"][i]["Type of basketball court"][j]["cost"]
            for
            j in list(self.data["Regions"][i]["Type of basketball court"].keys())
            } for i in
        list(self.data["Regions"].keys())}

    b = {i: self.data["Regions"][i]["Number of players"] for i in
         list(self.data["Regions"].keys())}
    
    e = {i: self.data["Types of basketball courts"][i]["Capacity"] for i in list(self.data["Types of basketball courts"].keys())}

    u = {
        i: {j: self.data["Regions"][i]["Type of basketball court"][j]["Regional costs"]
            for
            j in list(self.data["Regions"][i]["Type of basketball court"].keys())
            } for i in
        list(self.data["Regions"].keys())}
    
    a = {i: self.data["Regions"][i]["Regs budget"] for i in
         list(self.data["Regions"].keys())}
    
    cost = dict(sorted({i: dict(sorted(cost[i].items(), key=lambda x: int(x[0][x[0].index('_') + 1]))) for i in
                        list(cost.keys())}.items(),
                       key=lambda x: int(x[0][x[0].index('_') + 1])))
    
    b = dict(sorted(b.items(), key=lambda x: int(x[0][x[0].index('_') + 1])))
    e = dict(sorted(e.items(), key=lambda x: int(x[0][x[0].index('_') + 1])))
    a = dict(sorted(a.items(), key=lambda x: int(x[0][x[0].index('_') + 1])))
    u = dict(sorted({i: dict(sorted(u[i].items(), key=lambda x: int(x[0][x[0].index('_') + 1]))) for i in
                        list(u.keys())}.items(),
                       key=lambda x: int(x[0][x[0].index('_') + 1])))
    
    cost = [[*list((list(cost[key].values())))] for key in list(cost.keys())]
    b = [b[i] for i in list(b.keys())]
    e = [e[i] for i in list(e.keys())]
    u = [[*list((list(u[key].values())))] for key in list(u.keys())]
    a = [a[i] for i in list(a.keys())]

    x, X = {i: 0 for i in self.vars}, []

    totalProjPerYear = self.data["Limit on the number of projects per year"]
    totalBudget = self.data["Total budget"]

    # Ограничения на количесвто объектов в год
    for i in range(0, len(self.vars), numberOfRegs * typesOfPlaces):
      for j in self.vars[i:i + (numberOfRegs * typesOfPlaces)]:
        x[j] = 1
      X.append(x)
      constraits.append(totalProjPerYear)
      
      x = {i: 0 for i in self.vars}

    # Ограничения на стоиммость объектов за T лет
    for i in range(0, len(self.vars), typesOfPlaces):
      l = self.vars[i: i + typesOfPlaces]
      for j in range(len(l)):
        x[l[j]] = cost[int(i / typesOfPlaces) % numberOfRegs][j]
    
    X.append(x)
    constraits.append(totalBudget)
    x = {i: 0 for i in self.vars}

    # Ограничение на максимальное количесвто площадок в каждом регионе
    vars = sorted(self.vars)
    for i in range(0, len(vars), T * typesOfPlaces):
      for j in vars[i: i + (T * typesOfPlaces)]:
        x[j] = 1
      X.append(x)
      constraits.append(maxNumberCourts)

      x = {i: 0 for i in self.vars}
    
    # Ограничение на колиество баскетболистов
    for i in range(0, len(vars), T * typesOfPlaces):
      v = vars[i: i + (T * typesOfPlaces)]
      for j in range(0, len(v), T):
        for k in v[j:j + T]:
          x[k] = e[int(j / T) % T]

      X.append(x)
      constraits.append(b[int(i / (T * typesOfPlaces))])
      x = {i: 0 for i in self.vars}
    
    # Ограничение на затраты регионов
    for i in range(0, len(vars), T * typesOfPlaces):
      v = vars[i: i + (T * typesOfPlaces)]
      for j in range(0, len(v), T):
        for k in v[j:j + T]:
          x[k] = u[int(i / (T * typesOfPlaces) % (T * typesOfPlaces))][int(j / T)]

      X.append(x)
      constraits.append(a[int(i / (T * typesOfPlaces))])
      x = {i: 0 for i in self.vars}
    
    result = [list(d.values()) for d in X]

      
    return X, constraits

In [70]:
obj = PrepData('data.json')
goal_coef = obj.set_goal_coef()

In [71]:
constraits = obj.get_constraints()[0]
bounds = obj.get_constraints()[1]

In [72]:
p = []
for k in sorted(list(goal_coef.keys())):
  p.append(goal_coef[k])
p = np.array(p)

In [73]:
c = []
for d in constraits:
  r = []
  for k in sorted(list(d.keys())):
    r.append(d[k])
  c.append(r)
c = np.array(c)

In [74]:
b = np.array(bounds).astype(np.float64)

In [75]:
ubound = upperBound
m = len(p)
n = len(c)

In [76]:
solver = pywraplp.Solver.CreateSolver('SCIP')
x = {}
for j in range(m):
    x[j] = solver.IntVar(0, ubound, f"x[{j}]")
for i in range(n):
    constraint_expr = [c[i,j] * x[j] for j in range(m)]
    solver.Add(sum(constraint_expr) <= b[i])
obj_expr = [p[j] * x[j] for j in range(m)]
solver.Maximize(solver.Sum(obj_expr))

In [77]:
status = solver.Solve()

In [78]:
if status == pywraplp.Solver.OPTIMAL:
    print('Objective value =', solver.Objective().Value())
    for j in range(m):
        print(x[j].name(), ' = ', x[j].solution_value())
    print()
    print(f"Problem solved in {solver.wall_time()} milliseconds")
    print(f"Problem solved in {solver.iterations()} iterations")
    print(f"Problem solved in {solver.nodes()} branch-and-bound nodes")
else:
    print("The problem does not have an optimal solution.")

Objective value = 21.0
x[0]  =  0.0
x[1]  =  0.0
x[2]  =  1.0
x[3]  =  -0.0
x[4]  =  0.0
x[5]  =  0.0
x[6]  =  0.0
x[7]  =  0.0

Problem solved in 4215 milliseconds
Problem solved in 0 iterations
Problem solved in 1 branch-and-bound nodes


In [79]:
import ray
ray.shutdown()
ray.init()

{'node_ip_address': '127.0.0.1',
 'raylet_ip_address': '127.0.0.1',
 'redis_address': '127.0.0.1:62265',
 'object_store_address': '/tmp/ray/session_2022-04-02_10-04-03_717765_1536/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2022-04-02_10-04-03_717765_1536/sockets/raylet',
 'webui_url': None,
 'session_dir': '/tmp/ray/session_2022-04-02_10-04-03_717765_1536',
 'metrics_export_port': 55586,
 'gcs_address': '127.0.0.1:51589',
 'node_id': 'bff2feeed564237d07b2d9105fab01dcdb122d2e8fd1eee6ae74a2ae'}

In [80]:
config =  ppo.DEFAULT_CONFIG.copy()
config["num_gpus"] = 0
config["num_workers"] = 1
config["framework"] = "torch"
config["env_config"] = {}
# config['kl_coeff'] = 0.0

In [86]:
class MyEnv(gym.Env):
    def __init__(self, env_config):
        self.action_space = gym.spaces.Discrete(upperBound + 1)
        self.observation_space = gym.spaces.Dict({
            'rem': gym.spaces.Box(low=np.zeros(len(b)), high=b, dtype=np.float64), 
            'j': gym.spaces.Discrete(c.shape[1] + 1)})
        self.state = {'rem': b, 'j': 0}
        self.done = False

    def reset(self):
        self.state = {'rem': b, 'j': 0}
        self.done = False
        return self.state

    def step(self, action):
        #print('current state:', self.state)   
        #print('action taken:', action)
        j = self.state['j']
        rem = self.state['rem'] - c[:,j] * action
        if np.any(rem < 0):
            self.reward = -1
        else:
            self.reward = action * p[j]
            j += 1
            self.state = {'rem': rem, 'j': j}
            
        # print('reward:', self.reward)
        # print('next state:', self.state)
        
        if j == c.shape[1]: 
            self.done = True
        else:
            self.done = False
            
        return self.state, self.reward, self.done, {}

In [87]:
agent3 = ppo.PPOTrainer(config=config, env=MyEnv)
# agent2 = dqn.DQNTrainer(config=config, env=MyEnv)
#agent3 = a3c.A3CTrainer(config=config, env=MyEnv)



In [88]:
for i in range(50):
   # Perform one iteration of training the policy with PPO
   result = agent3.train()
   if i % 10 == 0:
       #print(pretty_print(result))
       print('i: ', i)
       print('mean episode length:', result['episode_len_mean'])
       print('max episode reward:', result['episode_reward_max'])
       print('mean episode reward:', result['episode_reward_mean'])
       print('min episode reward:', result['episode_reward_min'])
       print('total episodes:', result['episodes_total'])
       print()

       checkpoint = agent3.save()
       #print("checkpoint saved at", checkpoint)

i:  0
mean episode length: 13.32
max episode reward: 20.9
mean episode reward: 9.304666666666666
min episode reward: 1.3999999999999995
total episodes: 300

i:  10
mean episode length: 12.072289156626505
max episode reward: 21.0
mean episode reward: 9.824698795180723
min episode reward: 0.0
total episodes: 3579

i:  20
mean episode length: 11.658892128279883
max episode reward: 21.0
mean episode reward: 9.314868804664723
min episode reward: 0.0
total episodes: 7123

i:  30
mean episode length: 12.256880733944953
max episode reward: 21.0
mean episode reward: 9.629357798165138
min episode reward: 0.0
total episodes: 10736

i:  40
mean episode length: 12.0
max episode reward: 21.0
mean episode reward: 9.23963963963964
min episode reward: 0.0
total episodes: 14249



In [89]:
env = MyEnv(config)
state = env.reset()
g = 0
done = False
reward = 0
while not done:
  action = agent3.compute_action(state, explore = False)
  state, reward, done, info = env.step(action)
  print(f"j = {state['j']} action = {action} reward = {reward}")
  g += reward
print(g)

j = 1 action = 1 reward = 6.0
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 action = 1 reward = -0.1
j = 1 act

KeyboardInterrupt: 