In [2]:
!pip install Ipython --upgrade
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

Requirement already up-to-date: Ipython in /usr/local/lib/python3.7/dist-packages (7.25.0)
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
from google.colab import drive, files
drive.mount('/content/drive')
rootpath = '/content/drive/Shareddrives/KW phase1 repo'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [12]:
import sys
sys.path.append(rootpath+'/package')
from utils.env import env
from service.combinationset import groupingFromSet
from model.sdp import calculateSDP
from model.pathlossmodel import PathLossModel, NoiseMaker

In [14]:
import numpy as np
from scipy.special import comb, perm

In [10]:
def MSE(a, b):
  return np.mean(np.square(np.array(a) - np.array(b)))

In [15]:
def swapFunc(j, X, shapeOfX, targetReformSlice): 
  for sample in range(shapeOfX[1]): #sample
    temp = []
    for i in range(shapeOfX[0]):
      temp.append(X[i][sample][j])
    targetReformSlice.append(temp) 

In [16]:
class QAgent():
  def __init__(self):
    self._learningRate = env.learningRate
    self._discountLambda = env.discountLambda
    self._epsilon = env.explorationRate
    self._states = []
    self._actions = []
    self._table = np.zeros(len(self._states), len(self._actions))
    self._currentState = np.random.choice(self._states)
    self._currentAction = None
    self._nextState = None
    self._UAVsSamples = None

    self._pathLossModel = PathLossModel()
    self._pathLossModel.noiseMaker = NoiseMaker(mean = env.noiseMean, deviation = env.noiseDeviation)


  def step(self, vae, action):
    # 1. 確認當前state (踢掉人後的組)
    # 2. 根據state中的group內成員，重新grouping出各種groups
    # 3. 計算SDP
    # 4. 讀取SDP結果並reshape成VAE的input size
    # 5. testResult = vae.predict(test)
    # 6. mse = MSE(testResult, test)
    # 7. if (detectAnomaly(mse)) -> penalty
    # 8. reward = rewardFunc(len(當前state), penalty)
    # 9. 根據action (踢掉的人)列出所有groups，從中透過Q table選擇或是有機率 (self._epsilon) 會選擇不同的group，變成下一個state
    # 10. self._nextState 更新
    # 11. return reward

    groupListSamples = {0:groupingFromSet(self._currentState)}
    sdpResultsDatas = calculateSDP(groupListSamples, self._UAVsSamples, self._pathLossModel)
    
    groupSize =0
    for i in np.arange(env.groupSizeFrom, env.N_ngps+1,1):
      groupSize += comb(env.N_ngps,i)
    groupSize = int(groupSize)

    rawDatas = []
    ReadSampleSize = 1
    for i in range(env.N_ngps):
      temp = []
      for j in range(ReadSampleSize):
        temp.append(sdpResultsDatas[i][j*groupSize : j*groupSize+groupSize])
      rawDatas.append(temp)

    _anchorSamples = []
    anchorSamples = []
    for sample in range(ReadSampleSize):
      temp = []
      for id, uav in self._UAVsSamples[sample].items():
        if not uav.observedPosition == None:
          temp.append([uav.observedPosition for i in range(groupSize)])
      _anchorSamples.append(temp)
    for i in range(env.N_gps):
      temp = []
      for sample in range(ReadSampleSize):
        temp.append(_anchorSamples[sample][i])
      anchorSamples.append(temp)
    
    list (map(lambda i: rawDatas.append(anchorSamples[i]), range(env.N_gps)))

    dualGroupingTestingData = []
    for i in range(env.N):
      testingS = []
      for j in range(np.array(rawTestingData).shape[1]):
        temp = []
        for index_1, e1 in enumerate(rawTestingData[i][j]):
          for index_2, e2 in enumerate(rawTestingData[i][j]):
            if not index_1 == index_2:
              temp.append([e1, e2])
        testingS.append(temp)
      dualGroupingTestingData.append(testingS)

    _testingDataShape= np.array(dualGroupingTestingData).shape
    _reshapeTestingData = []

    list (map(lambda _sample: swapFunc(_sample, dualGroupingTestingData, _testingDataShape, _reshapeTestingData), range(_testingDataShape[2])))
    VAETestingData = np.array(_reshapeTestingData).reshape(len(_reshapeTestingData), np.prod(np.array(_reshapeTestingData).shape[1:]))

    data_min = 0
    data_max = max(max(env.X_RANGE,env.Y_RANGE),env.Z_RANGE)

    normalizeVAETestingData = (VAETestingData-data_min)/(data_max - data_min)
    normalizeVAETestingData = normalizeVAETestingData.astype('float32')


    vaeOutput = vae.predict(normalizeVAETestingData)

    mse = MSE(vaeOutput, normalizeVAETestingData)
    if (detectAnomaly(mse)):
      penalty = -10
    else:
      penalty = 0
    reward = len(self._currentState)*1 + penalty*1

    return reward

  
  def chooseAction(self):
    if np.random.rand() < self._epsilon:
      action = np.random.choice(self._actions)
    else:
      state_action = self._table[self._currentState]
      action = self._actions[np.argmax(state_action, axis=0)]
    return action


  def updateQtable(self, reward):
    # Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]
    q_value = self._table[self._currentState][self._currentAction]
    self._table[self._currentState][self._currentAction] = q_value + self._learningRate*(reward + self._discountLambda*max(self._table[self._nextState][:]) - q_value)


  @property
  def learningRate(self):
    return self._learningRate

  @property
  def discountLambda(self):
    return self._discountLambda

  @property
  def actions(self):
    return self._actions

  @property
  def currentState(self):
    return self._currentState
  
  @property
  def currentAction(self):
    return self._currentAction

  @property
  def nextState(self):
    return self._nextState

  @property
  def table(self):
    return self._table

  @property
  def states(self):
    return self._states

  @property
  def UAVsSamples(self):
    return self._UAVsSamples


  @learningRate.setter
  def learningRate(self, learningRate):
    self._learningRate = learningRate

  @discountLambda.setter
  def discountLambda(self, discountLambda):
    self._discountLambda = discountLambda

  @actions.setter
  def actions(self, actions):
    self._actions = actions

  @currentState.setter
  def currentState(self, currentState):
    self._currentState = currentState
  
  @currentAction.setter
  def currentAction(self, currentAction):
    self._currentAction = currentAction

  @nextState.setter
  def nextState(self, nextState):
    self._nextState = nextState

  @table.setter
  def table(self, table):
    self._table = table

  @states.setter
  def states(self, states):
    self._states = states
  
  @UAVsSamples.setter
  def UAVsSamples(self, UAVsSamples):
    self._UAVsSamples = UAVsSamples