In [504]:
import numpy as np
class TComEnv:
    def __init__(self):
    	self.nUEs = 2
    	self.nCHs = 5
    	self.iTTI = 1
    	self.trafficPeriod = [10,20] # in TTI
    	self.trafficSize = [50000,30000] # in KByte
    	self.txPow = np.ones((self.nUEs,1))
    	self.noisePow = np.ones((self.nUEs,1))/3
    	self.state = self.reset()
    	self.throughput = 0

    def reset(self):
        state ={
            'buffer_size':100*np.ones((self.nUEs,)),
            'channel_gain': np.ones((self.nCHs,self.nUEs)),
            'new_traffic':np.zeros((self.nUEs,))
        }
        self.throughput = 0
        self.iTTI = 1
        return state

    def step(self,action):

    	# 根据动作计算可传输的数据量
    	allocated_data_size = self.compute_alloc_data_size(action)

    	#计算传输后的缓存容量
    	#state.buffer_size = min(self.state.buffer_size - allocated_data_size, 0)

    	# 评价并给出奖励
    	reward, remain_buffer_size = self.compute_reward(allocated_data_size)

    	#记录throughput
    	self.throughput += sum(self.state['buffer_size'] - remain_buffer_size)

    	# 更新缓存
    	self.state['buffer_size'] = remain_buffer_size

    	# 生成新的信道
    	channel = np.random.normal(0, 1, size=(self.nCHs,self.nUEs)) + np.random.normal(0, 1, size=(self.nCHs,self.nUEs))*1j
    	self.state['channel_gain']= np.square(np.abs(channel))/2


    	# 产生新业务
    	self.state['new_traffic'] = np.zeros((self.nUEs,))
    	for iUE in range(self.nUEs):
    		if self.iTTI % self.trafficPeriod[iUE] == 0:
    			self.state['new_traffic'][iUE] = self.trafficSize[iUE]


    	self.iTTI += 1

    	return self.state, reward



    def compute_alloc_data_size(self,action):



    	#把动作编号转变为二进制
    	alloc_arr = np.zeros((self.nUEs*self.nCHs,))
    	alloc_list = [int(x) for x in bin(action)[2:]]
    	alloc_arr[-len(alloc_list):] = alloc_list

    	#把二进制串变形成为nUEs X nCHs的分配矩阵
    	alloc_arr = alloc_arr.reshape((self.nCHs, self.nUEs))


    	#信号功率矩阵
    	des_pow_arr = np.multiply(alloc_arr,self.state['channel_gain'])
    	des_pow_arr = np.multiply(des_pow_arr, self.txPow.reshape((1,2)))

    	#干扰功率矩阵
    	ch_pow_arr = np.multiply(alloc_arr,self.state['channel_gain'])
    	ch_pow_arr = np.matmul(ch_pow_arr,self.txPow)


    	int_pow_arr = ch_pow_arr - des_pow_arr


    	#信噪干扰比矩阵
    	sinr_arr = np.divide(des_pow_arr,int_pow_arr+self.noisePow.reshape((1,2)))

    	#可传输数据量矩阵
    	bytes_arr = 180000*np.log2(1 + sinr_arr)/8

    	#每个用户分配到的可传输数据量
    	alloc_data_size= np.sum(bytes_arr, axis = 0)

    	return alloc_data_size


    def compute_reward(self,allocated_data_size):

            #奖励分两部分，1）针对UE的 2）公平奖励
            #针对UE的，分配的容量跟缓存的容量越接近越好
            UE_reward = np.zeros((2,))

            for i in range(self.nUEs):
                if self.state['buffer_size'][i] == 0:
                    UE_reward[i] = - alloc_data_size[i]
                else:
                    UE_reward[i] = - abs(self.state['buffer_size'][i] - alloc_data_size[i])/self.state['buffer_size'][i]

            #公平奖励，用户的缓存余量越接近越好
            remain_buffer_size = self.state['buffer_size']  - alloc_data_size
            remain_buffer_size = remain_buffer_size.clip(min = 0)

            if sum(self.state['buffer_size']) == 0:
                fair_reward = 0
            else:
                fair_reward = - (np.max(remain_buffer_size)-np.min(remain_buffer_size))/np.max(remain_buffer_size)

            reward = sum(UE_reward) + fair_reward

            return reward, remain_buffer_size

  


In [505]:
env = TComEnv()
temp = vars(env)
for item in temp:
    print(item, ':', temp[item])

nUEs : 2
nCHs : 5
iTTI : 1
trafficPeriod : [10, 20]
trafficSize : [50000, 30000]
txPow : [[1.]
 [1.]]
noisePow : [[0.33333333]
 [0.33333333]]
throughput : 0
state : {'buffer_size': array([100., 100.]), 'channel_gain': array([[1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.]]), 'new_traffic': array([0., 0.])}


In [506]:
env.txPow = np.array([[2],[1]])
env.state['buffer_size'][0] = 100000
env.state['buffer_size'][1] = 50000
env.state['channel_gain'][:,1]=2
temp = vars(env)
for item in temp:
    print(item, ':', temp[item])

nUEs : 2
nCHs : 5
iTTI : 1
trafficPeriod : [10, 20]
trafficSize : [50000, 30000]
txPow : [[2]
 [1]]
noisePow : [[0.33333333]
 [0.33333333]]
throughput : 0
state : {'buffer_size': array([100000.,  50000.]), 'channel_gain': array([[1., 2.],
       [1., 2.],
       [1., 2.],
       [1., 2.],
       [1., 2.]]), 'new_traffic': array([0., 0.])}


In [507]:
action = 1013
env.compute_alloc_data_size(action)

array([ 60283.22373564, 186614.19522823])

In [508]:
#分配矩阵
alloc_arr = np.zeros((10,))
alloc_list = [int(x) for x in bin(action)[2:]]
alloc_arr[-len(alloc_list):] = alloc_list
print(alloc_arr)
alloc_arr = alloc_arr.reshape((5, 2))
print(alloc_arr)

[1. 1. 1. 1. 1. 1. 0. 1. 0. 1.]
[[1. 1.]
 [1. 1.]
 [1. 1.]
 [0. 1.]
 [0. 1.]]


In [509]:
#信号功率矩阵
des_pow_arr = np.multiply(alloc_arr,env.state['channel_gain'])
des_pow_arr = np.multiply(des_pow_arr, env.txPow.reshape((1,2)))
print(des_pow_arr)

[[2. 2.]
 [2. 2.]
 [2. 2.]
 [0. 2.]
 [0. 2.]]


In [510]:
#每个信道上的信号总强度
ch_pow_arr = np.multiply(alloc_arr,env.state['channel_gain'])
ch_pow_arr = np.matmul(ch_pow_arr,env.txPow)
print(ch_pow_arr)

[[4.]
 [4.]
 [4.]
 [2.]
 [2.]]


In [511]:
#干扰矩阵
int_pow_arr = ch_pow_arr - des_pow_arr
print(int_pow_arr)

[[2. 2.]
 [2. 2.]
 [2. 2.]
 [2. 0.]
 [2. 0.]]


In [512]:
#信噪干扰比矩阵
sinr_arr = np.divide(des_pow_arr,int_pow_arr + env.noisePow.reshape((1,2)))
print(sinr_arr)

[[0.85714286 0.85714286]
 [0.85714286 0.85714286]
 [0.85714286 0.85714286]
 [0.         6.        ]
 [0.         6.        ]]


In [513]:
#可传输数据量矩阵
bytes_arr = 180000*np.log2(1 + sinr_arr)/8
print(bytes_arr)

[[20094.40791188 20094.40791188]
 [20094.40791188 20094.40791188]
 [20094.40791188 20094.40791188]
 [    0.         63165.4857463 ]
 [    0.         63165.4857463 ]]


In [514]:
#每个用户分配到的可传输数据量
alloc_data_size= np.sum(bytes_arr, axis = 0)
print(alloc_data_size)

[ 60283.22373564 186614.19522823]


In [515]:
reward, remain_buffer_size = env.compute_reward(alloc_data_size)
print([reward,remain_buffer_size])

[-4.1294516672081985, array([39716.77626436,     0.        ])]


In [516]:
    	#记录throughput
    	env.throughput += sum(env.state['buffer_size'] - remain_buffer_size)

In [517]:
env.throughput

110283.22373563545

In [518]:
env.state['buffer_size']

array([100000.,  50000.])

In [519]:
env.state['buffer_size'] - alloc_data_size

array([  39716.77626436, -136614.19522823])

In [520]:
		#奖励分两部分，1）针对UE的 2）公平奖励
		#针对UE的，分配的容量跟缓存的容量越接近越好
		UE_reward = np.zeros((2,))

		for i in range(env.nUEs):
			if env.state['buffer_size'][i] == 0:
				UE_reward[i] = - alloc_data_size[i]
			else:
				UE_reward[i] = - abs(env.state['buffer_size'][i] - alloc_data_size[i])/env.state['buffer_size'][i]
      

In [521]:
  print(UE_reward)

[-0.39716776 -2.7322839 ]


In [522]:
print(-abs(100000-60283.22373564)/100000)

-0.3971677626436


In [523]:
print(-abs(50000-186614.19522823)/50000)

-2.7322839045646


In [524]:
		#公平奖励，用户的缓存余量越接近越好
		remain_buffer_size = env.state['buffer_size']  - alloc_data_size
		remain_buffer_size = remain_buffer_size.clip(min = 0)

		if sum(env.state['buffer_size']) == 0:
			fair_reward = 0
		else:
			fair_reward = - (np.max(remain_buffer_size)-np.min(remain_buffer_size))/np.max(remain_buffer_size)

In [525]:
print(fair_reward)

-1.0


In [526]:
state, reward = env.step(action)
print([state, reward])

[{'buffer_size': array([39716.77626436,     0.        ]), 'channel_gain': array([[0.10520588, 0.85185558],
       [1.48454148, 0.96114967],
       [1.27155577, 0.24922676],
       [1.73962611, 0.93870394],
       [0.70824596, 1.742404  ]]), 'new_traffic': array([0., 0.])}, -4.1294516672081985]


In [527]:
    	# 产生新业务
    	env.state['new_traffic'] = np.zeros((env.nUEs,))
    	for iUE in range(env.nUEs):
    		if env.iTTI % env.trafficPeriod[iUE] == 0:
    			env.state['new_traffic'][iUE] = env.trafficSize[iUE]

In [528]:
print(env.state['new_traffic'])

[0. 0.]


In [529]:
print(env.throughput)

220566.4474712709


In [530]:
print(100000-39716.77626436 + 50000)

110283.22373564


In [531]:
env.reset()
env.txPow = np.array([[2],[1]])
env.state['buffer_size'][0] = 100000
env.state['buffer_size'][1] = 50000
env.state['channel_gain'][:,1]=2
state, reward = env.step(action)
print([state, reward])

[{'buffer_size': array([39716.77626436,     0.        ]), 'channel_gain': array([[2.05566784, 0.97480668],
       [0.36011713, 0.9026307 ],
       [0.19416032, 0.46125527],
       [0.74765196, 1.14091687],
       [0.93748898, 1.31320764]]), 'new_traffic': array([0., 0.])}, -4.1294516672081985]


In [532]:
print(env.throughput)

110283.22373563545


In [533]:
100000-39716.77626436+50000

110283.22373564

In [534]:
sum(np.array([100000,50000]) - env.state['buffer_size'])

110283.22373563545

In [535]:
2**3


8

In [537]:
list(env.state.values())

[array([39716.77626436,     0.        ]),
 array([[2.05566784, 0.97480668],
        [0.36011713, 0.9026307 ],
        [0.19416032, 0.46125527],
        [0.74765196, 1.14091687],
        [0.93748898, 1.31320764]]),
 array([0., 0.])]

In [538]:
{k:v.tolist() for k,v in env.state.items()}

{'buffer_size': [39716.776264364555, 0.0],
 'channel_gain': [[2.0556678398047628, 0.9748066802887989],
  [0.3601171285525231, 0.9026307000057159],
  [0.1941603186996454, 0.46125527200471034],
  [0.747651962426482, 1.1409168656542037],
  [0.9374889772307515, 1.313207642918771]],
 'new_traffic': [0.0, 0.0]}

In [543]:
new_buffer_size = env.state['buffer_size'] + env.state['new_traffic']
np.concatenate((new_buffer_size, env.state['channel_gain'].reshape((env.nUEs*env.nCHs,))))

array([3.97167763e+04, 0.00000000e+00, 2.05566784e+00, 9.74806680e-01,
       3.60117129e-01, 9.02630700e-01, 1.94160319e-01, 4.61255272e-01,
       7.47651962e-01, 1.14091687e+00, 9.37488977e-01, 1.31320764e+00])

In [544]:
list(env.state['buffer_size'])

[39716.776264364555, 0.0]

In [545]:
2+3

5