In [46]:
import matplotlib.pyplot as plt
import heapq
import random
import numpy as np
import pandas as pd
from tqdm import tqdm


In [None]:
# generate original model


In [95]:

class BikeNet():
    def __init__(self, N, A, R, Q, repair, warmup_time, run_time, start_position=0, n_epi=5):
        self.N = N
        self.A = A
        self.R = R
        self.Q = Q
        self.repair = repair
        self.warmup_time = warmup_time
        self.run_time = run_time
        self.time_limit = warmup_time + run_time
        self.car = start_position
        self.edge = int(self.A**0.5)
        self.areas = list(range(A + 1))

        self.n_epi = n_epi

        self.reset()

        self.create_instances()

    def reset(self):
        self.T = 0
        self.carrier_position = self.car
        self.state = [int(self.N / self.A)] * self.A + [0] * self.A
        self.scheduler = []
        heapq.heapify(self.scheduler)
        for i in range(self.A):
            heapq.heappush(self.scheduler, [random.expovariate(self.R[i][0]), -1, i])
        heapq.heapify(self.scheduler)
        return np.array(self.state.copy())

    def warmup(self):
        self.reset()

        while self.T < self.warmup_time:
            self.step(random.randint(0,self.A-1), 0)

        self.T = self.warmup_time
        # return np.array(s.copy())

    def create_instances(self):
        log_by_step = []
        log_by_epi = []
        for i in tqdm(range(self.n_epi)):
            self.warmup()
            step_count = 0
            sum_loss, sum_cus = 0,0
            while step_count < MAX_STEP:
                s = self.state
                action = random.randint(0,self.A-1)
                re = self.step(action, 1)
                cus, done, loss = re[1], re[2], re[3]
                rate_broken = sum(self.state[4:8])/self.N
                log_by_step.append([i+1]+s+[action]+self.state+[cus]+[loss]+[rate_broken] + [self.T])
                sum_loss += loss
                sum_cus += cus
                step_count += 1
            log_by_epi.append([sum_loss, sum_cus])
        pd.DataFrame(log_by_step).to_csv('C:/Rebalancing/data/result/pytorchmodel/log_by_step_4_80part1.csv')
        pd.DataFrame(log_by_epi).to_csv('C:/Rebalancing/data/result/pytorchmodel/log_by_epi_4_80part1.csv')


        # self.T = self.warmup_time
        # return np.array(s.copy())


    def get_distance(self, start, end):
        dist = abs(start % (self.edge) - end % (self.edge)) + abs(
            start // (self.edge) - end // (self.edge))
        return dist
    
    def step(self, action, not_warm):
        loss = 0
        cus = 0
        # time for carrier to take the action and repair one bicycle
        dist = self.get_distance(self.carrier_position, action)
        t = dist * 2
        if self.state[action + self.A] > 0:
            t_cursor = self.T + t + self.repair
        else:
            t_cursor = self.T + t

        event = self.scheduler[0]
        self.T, kind, location = event[0], event[1], event[2]

        # update the atate of QN during the tansformation time
        while self.T < t_cursor:
            # 车到达
            if kind == 1:
                self.state[location] += 1
                heapq.heappop(self.scheduler)
            else:# 顾客到达
                if self.state[location] == 0:  # 但没车
                    loss += 1
                    #rewards -= 1
                    heapq.heappop(self.scheduler)
                else:
                    target = np.random.choice(self.areas, 1, p=self.Q[location])[0]
                    if target == self.A:  # 顾客到达，发现是坏车
                        self.state[location] -= 1
                        self.state[location + self.A] += 1
                        continue
                    else:  # 顾客到达，顺利骑行
                        cus += 1
                        self.state[location] -= 1
                        heapq.heappop(self.scheduler)
                        next_time = random.expovariate(self.R[location][1]) + self.T
                        heapq.heappush(self.scheduler, [next_time, 1, target])
                next_time = random.expovariate(self.R[location][0]) + self.T
                heapq.heappush(self.scheduler, [next_time, -1, location])

            if self.scheduler:
                event = self.scheduler[0]
                self.T, kind, location = event[0], event[1], event[2]
            else:
                break

        if not_warm == 1:
            if self.state[action + self.A] > 0:
                self.state[action] += 1
                self.state[action + self.A] -= 1

        self.carrier_position = action
        self.T = t_cursor

        s_ = np.array(self.state)

        #if self.T <= self.time_limit and self.scheduler:
        return s_, cus, 0, loss
        #else:
        #    return s_, cus, 1, t

        
MAX_STEP = 120
        
if __name__ == '__main__':
    random.seed(0)
    N = 400  # total number of bikes in the QN
    A = 4  # A for areas, indicates the number of areas and the action space
    R = {}  # [customer_arrval, ride]
    for i in range(A): R[i] = [0.5, 0.2]
    rate = 0.073
    Q = np.array([[0.15,0.3,0.3,0.15],
             [0.3,0.15,0.15,0.3],
             [0.3,0.15,0.15,0.3],
             [0.15,0.3,0.3,0.15]])
    Q = [np.append(x/sum(x)*(1-rate), rate) for x in Q]
    #Q = [[0.15,0.3,0.3,0.15,0.1],
    #     [0.3,0.15,0.15,0.3,0.1],
    #     [0.3,0.15,0.15,0.3,0.1],
    #     [0.15,0.3,0.3,0.15,0.1]]
    # Q = [[0,0.9,0.1], [0.9,0,0.1]]
    t_repair = 16
    warmup_time = 60
    run_time = 180

    env = BikeNet(N=N,
                  A=A,
                  R=R,
                  Q=Q,
                  repair=t_repair,
                  warmup_time=warmup_time,
                  run_time=run_time,
                  start_position=0,
                  n_epi=500)

    # print(env.warmup(RL))
    # print(RL.memory)
    # r = 0
    # while env.T<680:
    #     r += env.step(random.randint(0, 3))[1]
    # print(env.state)
    # print(env.c)
    # print(env.T)
    # # print(env.step(1))
    # print(env.scheduler)



100%|████████████████████████████████████████| 500/500 [02:34<00:00,  3.16it/s]


In [96]:
df1 = pd.read_csv('C:/Rebalancing/data/result/pytorchmodel/log_by_step_4_80part1.csv', index_col=0)
df1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
0,1,62,56,5,3,1,2,4,7,1,...,5,3,1,2,4,7,4,0,0.0875,78
1,1,72,60,2,0,1,3,4,8,2,...,2,0,1,3,4,8,56,25,0.1,98
2,1,72,50,1,0,0,6,5,8,0,...,1,0,0,6,5,8,56,30,0.11875,116
3,1,74,46,5,0,0,8,5,8,2,...,5,0,0,8,5,8,50,14,0.13125,134
4,1,77,45,3,2,0,7,6,9,1,...,3,2,0,7,6,9,55,17,0.1375,154


In [97]:
df1['20'].mean()

0.43672635416666666

In [98]:
result = []
for i in range(1,501):
    temp = df1[df1['0']==i]
    tm = temp['21'].max()
    temp['seg'] = temp['21'].diff()
    temp.iloc[0,22] = 0
    result.append(sum(temp.seg*temp['20'])/tm)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [99]:
np.average(result)

0.4265774381166899

In [11]:
df1.tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
119995,2000,111,0,1,2,9,20,0,4,3,111,0,1,2,9,20,0,4,6,7
119996,2000,109,1,0,0,10,19,2,4,1,109,1,0,0,10,19,2,4,14,4
119997,2000,108,1,2,3,10,19,2,4,1,108,1,2,3,10,19,2,4,4,1
119998,2000,108,3,4,2,10,18,2,4,1,108,3,4,2,10,18,2,4,3,0
119999,2000,113,1,1,1,10,18,2,4,3,113,1,1,1,10,18,2,4,10,3


In [37]:
df2 = pd.read_csv('C:/Rebalancing/data/result/pytorchmodel/log_by_epi_4_80part1.csv', index_col=0)
df2.head()

Unnamed: 0,0,1
0,304,442
1,233,577
2,335,361
3,248,506
4,290,438


In [38]:
df2['0'].mean(), df2['1'].mean()

(296.5055, 485.884)

In [19]:
re = df.groupby(by='0').sum()

In [20]:
re['19'].mean()

345.2475