### Exploring Dynamic Programing



In [1]:
# the required imports
import numpy as np
import pandas as pd
# from linear_aproximation import Model
# from environment import network
import matplotlib.pyplot as plt
import re
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# additional import for the Neural Network
from keras.layers import Dense
from keras.models import Sequential, load_model
from keras import optimizers
import tensorflow as tf


Using TensorFlow backend.


In [2]:
# loading the original dataset
path = '/home/mawongh/ws/datasets/'
dataset = pd.read_pickle(path + 'full_dataset.pickle')
dataset.tail(3)


Unnamed: 0,action,reward,s_next,state
299475,121.0,-2.915486,"[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250..."
299476,121.0,-3.054551,"[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250..."
299477,121.0,-2.65302,"[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250..."


In [3]:
# this section:
# 1) add the stateid columns to filter the unique states later
# 2) normalizes the state vectors

# will convert to state ids
def state2stateid(s):
    return ''.join(str for str in s.astype(str).tolist())

state_id = [state2stateid(s) for s in dataset.state.tolist()]

# normalize the states
S_woScale = dataset.state.tolist()
Snext_woScale = dataset.s_next.tolist()
scaler = StandardScaler()
scaler.fit(S_woScale)

S = scaler.transform(S_woScale)
Snext = scaler.transform(Snext_woScale)

dataset['state_norm'] = S.tolist()
dataset['s_next_norm'] = Snext.tolist()
dataset['state_id'] = state_id
dataset.head(3)

Unnamed: 0,action,reward,s_next,state,state_norm,s_next_norm,state_id
0,34.0,-5.789736,"[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",2733273327333200320032002500250025004266426642...
1,0.0,-5.105602,"[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",2733273327333200320032002500250025004266426642...
2,69.0,-5.131072,"[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",2733273327333200320032002500250025004266426642...


In [4]:
# creates a new dataframe with the unique states
unique_states_df = dataset.groupby('state_id').first().reset_index()
unique_states_df.head(3)

Unnamed: 0,state_id,action,reward,s_next,state,state_norm,s_next_norm
0,2733273327333200320032002500250025004266426642...,63.0,-2.60591,"[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,2733273327333200320032002500250025004266426642...,85.0,-1.627199,"[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,2733273327333200320032002500250025004266426642...,105.0,-1.764777,"[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[2733, 2733, 2733, 3200, 3200, 3200, 2500, 250...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."


In [5]:
# load the MDP model from file and initializes the Q value function
from keras.models import load_model
modelfilename = 'MDP_model6.h5'

MDP_model = load_model(path + modelfilename)

# Q model
Q_model = Sequential()
n_cols = 105

Q_model.add(Dense(400, activation = 'relu', input_shape=(n_cols,)))
Q_model.add(Dense(400, activation = 'relu'))
Q_model.add(Dense(400, activation = 'relu'))
Q_model.add(Dense(126, activation = 'linear'))
Q_model.compile(optimizer=optimizers.Adam(), loss='mean_squared_error')


In [6]:
# functions that returns all the posibles next states
def next_state(s,action_code):
        s_next = s.copy()
        cell = int(action_code / 6)
        # this operations gives the cell on the action will be executed
        subaction = action_code % 6
        attribute = int(subaction / 2) + 2
        
        index = attribute * 21 + cell
#         print(index)
        action_dict = {0:'incr20', 1:'decr20', 2:'incr2', 3:'decr2', 4:'on', 5:'off'}
        action = action_dict[subaction]
#         print(action)
        if action == 'incr20':
#             print(s_next[index])
            s_next[index] += 20
#             print(s_next[index])
        if action == 'decr20':
            s_next[index] -= 20
        if action == 'incr2':
            s_next[index] += 2
        if action == 'decr2':
            s_next[index] -= 2
        if action == 'on':
            s_next[index] = 1
        if action == 'off':
            s_next[index] = 0
        # check conf
        # tx_powers
        txpower_idx = np.arange(3*21, 3*21+21)
        for idx in txpower_idx:
            if s_next[idx] < 36:
                s_next[idx] = 36
            if s_next[idx] > 46:
                s_next[idx] = 46
        # azimuths
        cell1_idx = np.arange(44,44+21,3)
        cell2_idx = np.arange(45,45+21,3)
        cell3_idx = np.arange(46,46+21,3)
        for idx in cell1_idx:
            if s_next[idx] < 0:
                s_next[idx] = 0
            if s_next[idx] > 100:
                s_next[idx] = 100
        for idx in cell2_idx:
            if s_next[idx] < 0:
                s_next[idx] = 0
            if s_next[idx] > 120:
                s_next[idx] = 220
        for idx in cell3_idx:
            if s_next[idx] < 240:
                s_next[idx] = 240
            if s_next[idx] > 340:
                s_next[idx] = 340
        return s_next

def next_states(s):
    next_states_array = [next_state(s,a) for a in np.arange(126)]
    return np.array(next_states_array)

In [9]:
M = 3
gamma = 0.5
Qmodelfilename = 'Q_model_DP_g05.h5'
Qmodel_weigths_filename = 'Q_model_DP_g05_weights.h5'
Pr = 1 / 126 # uniform probability for all actions

for k in np.arange(M):
    print('iteraction: {}'.format(k+1))
    
    vk = []
    print('setting target...')
    for i in np.arange(len(unique_states_df)):
        # determine all the posibles next states
        s_norm = unique_states_df.state_norm[i]
        s = unique_states_df.state[i]
        s_next = next_states(s)
        s_next_norm = scaler.transform(s_next)
        Rsa = MDP_model.predict(np.array(s_norm).reshape(1,-1))[0]
        term = gamma * Pr * np.sum(Q_model.predict(s_next_norm),axis = 1)
        vk += [Rsa + term]
    
    X = np.array([np.array(unique_states_df.state_norm[i])
                  for i in np.arange(len(unique_states_df))])
    y = np.array(vk)

    Q_model.fit(X,y)

print('training completed!')
Q_model.save(path + Qmodelfilename)
Q_model.save_weights(path + Qmodel_weigths_filename)

iteraction: 1
setting target...




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
iteraction: 2
setting target...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
iteraction: 3
setting target...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
training completed!


In [None]:

rhat = [np.argmax(MDP_model.predict(np.array(s).reshape(1,-1))[0])
        for s in unique_states_df.state_norm.tolist()]



In [None]:
ticks = np.unique(rhat)
plt.figure(figsize = (6,1), dpi = 200)
plt.hist(rhat, bins=126)
plt.xticks(ticks)
plt.show()

In [None]:
Rhat = [MDP_model.predict(np.array(s).reshape(1,-1))[0]
        for s in unique_states_df.state_norm.tolist()]



In [None]:
# np.array(Rhat).shape
data = pd.DataFrame(data = np.array(Rhat),columns = np.arange(126).astype(str))


In [None]:
import seaborn as sns
# sns.set(font_scale=0.5)

plt.figure(figsize=(2.5,5), dpi = 200)
sns.heatmap(data = data, cmap = 'RdBu', vmin=-10, vmax = +5, center = -4)

plt.xlabel('Action')
plt.ylabel('Sampled states')
plt.xticks()
# plt.yticks(fontsize = 6)
# cbar.set_label("predicted SINR (db)", size=6)
# cbar.ax.tick_params(labelsize=6)
plt.show()

In [None]:

qhat = [np.argmax(Q_model.predict(np.array(s).reshape(1,-1))[0])
        for s in unique_states_df.state_norm.tolist()]



In [None]:
ticks = np.unique(qhat)
plt.figure(figsize = (6,1), dpi = 200)
plt.hist(qhat, bins=126)
plt.xticks(ticks)
plt.show()

In [None]:
Qhat = [Q_model.predict(np.array(s).reshape(1,-1))[0]
        for s in unique_states_df.state_norm.tolist()]


In [None]:
test = np.array(Qhat).flatten()
plt.hist(test)
plt.show()

In [None]:
Qdata = pd.DataFrame(data = np.array(Qhat),columns = np.arange(126).astype(str))

In [None]:
import seaborn as sns
# sns.set(font_scale=0.5)

plt.figure(figsize=(2.5,5), dpi = 200)
sns.heatmap(data = Qdata, cmap = 'RdBu', vmin = -50, vmax = 10)

plt.xlabel('Action')
plt.ylabel('Sampled states')
plt.xticks()
# plt.yticks(fontsize = 6)
# cbar.set_label("predicted SINR (db)", size=6)
# cbar.ax.tick_params(labelsize=6)
plt.show()

## Playground