In [2]:
# import optimizer as op

import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.externals import joblib

# import plotly
# import plotly.plotly as py
# import plotly.graph_objs as go
# import plotly.figure_factory as ff

np.set_printoptions(precision=3, suppress=True)

pandas_dataframe_styles = {
    'font-family': 'monospace',
    'white-space': 'pre'
}

In [3]:
inputCSVFile   = 'data/grafana_data_export_long_running_test.csv'

targetVariable = 'avg latency (quantile 0.5)'


scaler_min = -1                     # 0
scaler_max = 1                      # 1
train_test_ratio = 0.3              # 0.3
activation_function = 'tanh'        # tanh, relu, logistic
neuronsWhole = 10                   # 10
neuronsTrainTest = 4                # 4
cutFirstCases = 10                  # 10

lead = 1                            # 1 default

showPlots = True                    # True

In [4]:
def readCSV(filename):
    df = pd.read_csv(filename, sep=";", header="infer", skiprows=0, na_values="null" )

    # Return DataFrame
    return df


In [5]:
# Read DataFrame
df = readCSV(inputCSVFile)


In [6]:
def removeMissingData(df):
    cleanDF = df.dropna(axis=0)
    return cleanDF


def dropVariable(df, column):
    del df[column]
    return df


def preProcessing(df):
    df = df.copy()
    
    # Drop Time
    df = dropVariable(df, 'Time')
    df = dropVariable(df, 'avg latency (quantile 0.9)')

    # Debug
    # printDF(df)

    # Remove cases with missing values
    df = removeMissingData(df)
    return df


In [7]:

# Preprecess DataFrame
preProcessedDF = preProcessing(df)


In [8]:
targetVariable = targetVariable

In [9]:
def renameVariable(df, old_var_name, new_var_name):
    new_df = df.copy()
    new_df.rename(columns={old_var_name: new_var_name}, inplace=True)
    return new_df


In [10]:
preProcessedDF = renameVariable(preProcessedDF, 'Worker count', 'WorkerCount')

In [11]:
def setMetricNames(names):
    new_metricNames = names.copy()
    return new_metricNames


In [12]:
metricNames = setMetricNames(['CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut', 'PktOut'])

In [13]:
def setExtendedMetricNames(names):
    new_extendedMetricNames = names.copy()
    return new_extendedMetricNames


In [14]:
extendedMetricNames = setExtendedMetricNames(['CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut', 'PktOut', 'WorkerCount'])

extendedMetricNames

['CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut', 'PktOut', 'WorkerCount']

In [15]:
def dropFirstCases(df, n):
    new_df = df.copy()
    filteredDF = new_df[new_df.index > n]
    return filteredDF


In [16]:
# because in the begining of the samples have a lot of outliers

filteredDF = dropFirstCases(preProcessedDF, cutFirstCases)

In [17]:
preProcessedDF = filteredDF

### Correlation Matrix

In [18]:
from visualizerlinux import CorrelationMatrixSave

In [19]:
CorrelationMatrixSave(preProcessedDF)

Loading page (1/2)


In [20]:
from visualizerlinux import ScatterPlots

In [21]:
if showPlots : ScatterPlots(preProcessedDF, preProcessedDF[targetVariable], extendedMetricNames, targetVariable)

In [22]:
from visualizerlinux import TimeLinePlot

In [23]:
if showPlots : TimeLinePlot(preProcessedDF, targetVariable)

In [24]:
from visualizerlinux import TimeLinePlots

In [25]:
if showPlots : TimeLinePlots(preProcessedDF, extendedMetricNames)

In [26]:

n = 1
for i in preProcessedDF.columns:
    print('AC(1)      ', i, '\t= ', np.round(preProcessedDF[i].autocorr(lag=1), 2))
    n = n+1
    if( n == 10 ):
        break


AC(1)       AVG RR 	=  0.71
AC(1)       SUM RR 	=  0.82
AC(1)       CPU 	=  0.77
AC(1)       Inter 	=  0.76
AC(1)       CTXSW 	=  0.75
AC(1)       KBIn 	=  0.4
AC(1)       PktIn 	=  0.8
AC(1)       KBOut 	=  0.76
AC(1)       PktOut 	=  0.78


## Create a whole new DataFrame for Before After Data

In [27]:
def createBeforeafterDF(df, lag):
    beforeafterDF = df.copy()
    inputVariables = np.flip(beforeafterDF.columns[0:9].ravel(), axis=-1)
    print('Input Variablels : ', inputVariables)

    index = 9
    for i in inputVariables:
        new_column = beforeafterDF[i].shift(lag)
        new_column_name = (i + str(1)) # Todo: rename str(lag)
        beforeafterDF.insert(loc=index, column=new_column_name, value=new_column)
    
    beforeafterDF = beforeafterDF[lag:]
    
    print('Before After DF columns: ', beforeafterDF.columns)
    
    return beforeafterDF

In [28]:
beforeafterDF = createBeforeafterDF(preProcessedDF, 1)

Input Variablels :  ['PktOut' 'KBOut' 'PktIn' 'KBIn' 'CTXSW' 'Inter' 'CPU' 'SUM RR' 'AVG RR']
Before After DF columns:  Index(['AVG RR', 'SUM RR', 'CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut',
       'PktOut', 'AVG RR1', 'SUM RR1', 'CPU1', 'Inter1', 'CTXSW1', 'KBIn1',
       'PktIn1', 'KBOut1', 'PktOut1', 'WorkerCount',
       'avg latency (quantile 0.5)'],
      dtype='object')


### Set Features for Neural Network - these are the input variables

In [29]:
def setFeaturesAndTheirLags(df):
    X = df.iloc[:,0:9]
    return X


In [30]:
X = setFeaturesAndTheirLags(beforeafterDF)

### Set Target Variable for Neural Network - this is the target variable

In [31]:
def setTarget(df, targetVariable):
    y = df[targetVariable]
    return y


In [32]:
y = setTarget(beforeafterDF, targetVariable)

In [33]:
y.values[0:10]

array([ 1498608.567,  1499530.468,  2170280.382,  1602826.789,
        1679743.324,    24193.916,    32378.571,    50828.776,
        2760269.008,  2760269.008])

In [34]:
y.head()

13    1.498609e+06
14    1.499530e+06
15    2.170280e+06
16    1.602827e+06
17    1.679743e+06
Name: avg latency (quantile 0.5), dtype: float64

In [35]:
y.describe()

count    6.730000e+02
mean     3.619691e+06
std      3.217239e+06
min      2.419392e+04
25%      2.479101e+06
50%      3.207532e+06
75%      4.119402e+06
max      4.203820e+07
Name: avg latency (quantile 0.5), dtype: float64

### Normalize the whole X

In [36]:
def normalizeX(df):
    """Return a normalized value of df.
    Save MinMaxScaler normalizer for X variable"""
    
    scaler = MinMaxScaler(feature_range=(scaler_min, scaler_max))
    # scaler.fit(df)
    scaler.fit(df.astype(np.float64))
    # normalized = scaler.transform(df)
    normalized = scaler.transform(df.astype(np.float64))
    
    # store MinMaxScaler for X
    joblib.dump(scaler, 'models/scaler_normalizeX.save') 

    return normalized, scaler


In [37]:
X_normalized, X_normalized_MinMaxScaler = normalizeX(X)

### Load MinMaxScalerXFull


In [38]:
def loadMinMaxScalerXFull():
    X_normalized_MinMaxScaler = joblib.load('models/scaler_normalizeX.save')
    
    return X_normalized_MinMaxScaler


In [39]:
X_normalized_MinMaxScaler = loadMinMaxScalerXFull()

In [40]:
def printNormalizedX(X_normalized):
    print("X_normalized type        = ", type(X_normalized))
    print("X_normalizde dtype       = ", X_normalized.dtype)
    print("X_normalized shape       = ", X_normalized.shape)
    print("X_normalized ndim        = ", X_normalized.ndim)
    print("X_normalized[:,0].max()  = ", X_normalized[:,0].max())
    print("X_normalized[:,0].min()  = ", X_normalized[:,0].min())

In [41]:
printNormalizedX(X_normalized)

X_normalized type        =  <class 'numpy.ndarray'>
X_normalizde dtype       =  float64
X_normalized shape       =  (673, 9)
X_normalized ndim        =  2
X_normalized[:,0].max()  =  1.0
X_normalized[:,0].min()  =  -1.0


In [42]:
X_normalized[1]

array([-0.495, -0.586,  0.437, -0.143,  0.272, -0.627, -0.37 , -0.222,
       -0.229])

In [43]:
X_denormalized = X_normalized_MinMaxScaler.inverse_transform(X_normalized)

In [44]:
X_denormalized[1]

array([    43.837,     87.674,     87.675,   6773.008,  10261.946,
         1086.267,   1985.507,    205.35 ,   1670.186])

In [45]:
X_denormalized[-1]

array([   56.849,   113.698,    84.537,  6142.203,  9008.754,   981.49 ,
        1888.163,   201.356,  1698.07 ])

### Normalize the whole y

In [46]:
def normalizeY(df):
    """Return a normalized value of df.
    Save MinMaxScaler normalizer for Y variable"""
    
    new_df = df.copy()
    new_df_reshaped = new_df.values.reshape(-1,1)
    scaler = MinMaxScaler(feature_range=(scaler_min, scaler_max))
    scaler.fit(new_df_reshaped.astype(np.float64))
    normalizedY = scaler.transform(new_df_reshaped.astype(np.float64))
    normalizedY = normalizedY.flatten()
    
    # store MinMaxScaler for Y
    joblib.dump(scaler, 'models/scaler_normalizeY.save') 

    return normalizedY, scaler

In [47]:
y_normalized, y_normalized_MinMaxScaler = normalizeY(y)

In [48]:
def printNormalizedY(y_normalized):
    """Void. Print normalizeY(df) values"""
    
    print("y_normalized type        = ", type(y_normalized))
    print("y_normalized dtype       = ", y_normalized.dtype)
    print("y_normalized shape       = ", y_normalized.shape)
    print("y_normalized ndim        = ", y_normalized.ndim)
    print("y_normalized[:].max()    = ", y_normalized[:].max())
    print("y_normalized[:].min()    = ", y_normalized[:].min())

In [49]:
printNormalizedY(y_normalized)

y_normalized type        =  <class 'numpy.ndarray'>
y_normalized dtype       =  float64
y_normalized shape       =  (673,)
y_normalized ndim        =  1
y_normalized[:].max()    =  1.0
y_normalized[:].min()    =  -1.0


In [50]:
y_normalized[0:3]

array([-0.93 , -0.93 , -0.898])

### Load MinMaxScalerYFull

In [51]:
def loadMinMaxScalerYFull():
    y_normalized_MinMaxScaler = joblib.load('models/scaler_normalizeY.save')
    
    return y_normalized_MinMaxScaler


In [52]:
y_normalized_MinMaxScaler = loadMinMaxScalerYFull()

In [53]:
y_denormalized = y_normalized_MinMaxScaler.inverse_transform(y_normalized.reshape(y_normalized.shape[0],1))

In [54]:
y_denormalized[0:3]

array([[ 1498608.567],
       [ 1499530.468],
       [ 2170280.382]])

In [55]:
y_denormalized[-3:]

array([[ 4974982.36 ],
       [ 4081220.691],
       [ 4553424.952]])

## Train Neural Network with Optimizer Class, trainMultiLayerRegressor method

In [56]:
def trainMultiLayerRegressor(X_normalized, y_normalized, activation, neuronsWhole):

    # Train Neural Network
    mlp = MLPRegressor(hidden_layer_sizes=neuronsWhole, \
                       max_iter=250, \
                       activation=activation, \
                       solver="lbfgs", \
                       learning_rate="constant", \
                       learning_rate_init=0.01, \
                       alpha=0.01, \
                       verbose=False, \
                       momentum=0.9, \
                       early_stopping=False, \
                       tol=0.00000001, \
                       shuffle=False, \
                       # n_iter_no_change=20, \
                       random_state=1234)

    mlp.fit(X_normalized, y_normalized)
    
    # ide kéne beilleszteni a modell elmentését
    joblib.dump(mlp, 'models/saved_mlp_model.pkl')

    return mlp


In [57]:
# Train Neural Network
mlp = trainMultiLayerRegressor(X_normalized, y_normalized, activation_function, neuronsWhole)

In [58]:
def predictMultiLayerRegressor(mlp, X_normalized):
    y_predicted = mlp.predict(X_normalized)

    return y_predicted

In [59]:
# Create prediction
y_predicted = predictMultiLayerRegressor(mlp, X_normalized)

In [60]:
from utils import evaluateGoodnessOfPrediction

In [61]:
evaluateGoodnessOfPrediction(y_normalized, y_predicted)

Correlation           = 0.605
Explained variance    = 0.366
Mean Absolute Error   = 0.067
Mean Squared Error    = 0.015
R2 Score              = 0.366


### Visualize Data

In [62]:
from visualizerlinux import VisualizePredictedYScatter

In [63]:
VisualizePredictedYScatter(y_normalized, y_predicted, targetVariable)

In [64]:
from visualizerlinux import VisualizePredictedYLine, VisualizePredictedYLineWithValues

In [65]:
VisualizePredictedYLineWithValues(y_normalized, y_predicted, targetVariable, 'Normalized')

### De-normlaize

I want to see the result in original scale. I don't care about the X but the y_normalized and y_predcited.



In [66]:
y_denormalized = y_normalized_MinMaxScaler.inverse_transform(y_normalized.reshape(y_normalized.shape[0],1))

y_predicted_denormalized = y_normalized_MinMaxScaler.inverse_transform(y_predicted.reshape(y_predicted.shape[0],1))

### Can I visualize the de-normalized data as well?

In [67]:
VisualizePredictedYLineWithValues(y_denormalized, y_predicted_denormalized, targetVariable, 'Denormalized')

### Compare the Original Target Variable and the mean of its Predicted Values

In [68]:
meanOfOriginalPandasDataframe = y.values.mean()
meanOfOriginalTargetVariable  = y_denormalized.mean()
meanOfPredictedTargetVariable = y_predicted_denormalized.mean()

print('mean original pandas dataframe = ', meanOfOriginalPandasDataframe)
print('mean original target variable  = ', meanOfOriginalTargetVariable)
print('mean predicted target variable = ', meanOfPredictedTargetVariable)

mean original pandas dataframe =  3619691.41872
mean original target variable  =  3619691.41872
mean predicted target variable =  3627819.30705


### De-normalizer function

In [69]:
def denormalizeX(X_normalized, X_normalized_MinMaxScaler):
    X_denormalized = X_normalized_MinMaxScaler.inverse_transform(X_normalized)
    return X_denormalized


In [70]:
X_denormalized = denormalizeX(X_normalized, X_normalized_MinMaxScaler)

In [71]:
X_denormalized[1]

array([    43.837,     87.674,     87.675,   6773.008,  10261.946,
         1086.267,   1985.507,    205.35 ,   1670.186])

In [72]:
X_normalized[1]

array([-0.495, -0.586,  0.437, -0.143,  0.272, -0.627, -0.37 , -0.222,
       -0.229])

In [73]:
X_denormalized[-1]

array([   56.849,   113.698,    84.537,  6142.203,  9008.754,   981.49 ,
        1888.163,   201.356,  1698.07 ])

In [74]:
X_normalized[-1]

array([ 0.074, -0.383,  0.294, -0.471, -0.318, -0.733, -0.462, -0.263,
       -0.197])

In [75]:
def denormalizeY(y_normalized, y_normalized_MinMaxScaler):
    y_denormalized = y_normalized_MinMaxScaler.inverse_transform(y_normalized.reshape(y_normalized.shape[0],1))
    return y_denormalized


In [76]:
y_denormalized = denormalizeY(y_normalized, y_normalized_MinMaxScaler)

y_predicted_denormalized = denormalizeY(y_predicted, y_normalized_MinMaxScaler)

In [77]:
VisualizePredictedYLineWithValues(y_denormalized, y_predicted_denormalized, targetVariable, 'Denormalized')

## Create Train-Test-Validation set

### Split Data

In [78]:
def splitDataFrame(X, y, testSize):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=testSize, \
                                                        random_state=12345, \
                                                        shuffle=False, \
                                                        stratify=None)

    return X_train, X_test, y_train, y_test

In [79]:
# Split DataFrame
X_train, X_test, y_train, y_test = splitDataFrame(X, y, train_test_ratio)

In [80]:
print(X_train.count())

AVG RR    471
SUM RR    471
CPU       471
Inter     471
CTXSW     471
KBIn      471
PktIn     471
KBOut     471
PktOut    471
dtype: int64


In [81]:
print(X_test.count())

AVG RR    202
SUM RR    202
CPU       202
Inter     202
CTXSW     202
KBIn      202
PktIn     202
KBOut     202
PktOut    202
dtype: int64


In [82]:
print("y_train.count() = ", y_train.count())
print("y_test.count()  = ", y_test.count())

y_train.count() =  471
y_test.count()  =  202


In [83]:
from utils import compareTwoVariables

In [84]:
compareTwoVariables(X_train, X_test, 'CPU')

Unnamed: 0,CPU,CPUT,Difference in percent
count,471.0,202.0,57.11
mean,95.16,90.99,4.39
std,7.33,7.46,-1.75
min,56.22,75.43,-34.18
50%,98.93,85.55,13.52
max,100.0,100.0,0.0


In [85]:
compareTwoVariables(X_train, X_test, 'CTXSW')

Unnamed: 0,CTXSW,CTXSWT,Difference in percent
count,471.0,202.0,57.11
mean,10126.84,9508.34,6.11
std,494.67,680.0,-37.47
min,7558.23,8088.82,-7.02
50%,10214.3,9311.8,8.84
max,11810.59,11551.91,2.19


In [86]:
compareTwoVariables(X_train, X_test, 'AVG RR')

Unnamed: 0,AVG RR,AVG RRT,Difference in percent
count,471.0,202.0,57.11
mean,51.78,52.19,-0.8
std,8.85,6.34,28.42
min,32.28,34.73,-7.6
50%,50.54,50.51,0.05
max,78.05,76.33,2.2


In [87]:
VisualizePredictedYLineWithValues(y.values, y_train.values, targetVariable, 'Denormalized')

In [88]:
VisualizePredictedYLineWithValues(y, y_train, targetVariable, 'Denormalized')

In [89]:
VisualizePredictedYLineWithValues(0, y_test, targetVariable, 'Denormalized')

In [90]:
VisualizePredictedYLineWithValues(0, y_test.values, targetVariable, 'Denormalized')

In [91]:
VisualizePredictedYLineWithValues(y_train, y_train.values, targetVariable, 'Denormalized')

### Train Test set comparison

In [92]:
from utils import printInfoTrainTestSet

In [93]:
printInfoTrainTestSet(y_train, y_test)

y_train.min()             =  24193.916
y_test.min()              =  229587.881
------------------------------------------
type(y_train)             =  <class 'pandas.core.series.Series'>
type(y_test)              =  <class 'pandas.core.series.Series'>
------------------------------------------
type(y_train.values)      =  <class 'numpy.ndarray'>
type(y_test.values)       =  <class 'numpy.ndarray'>
------------------------------------------
y_train.values.min()      =  24193.916
y_test.values.min()       =  229587.881
------------------------------------------
y_train.values.max()      =  32912000.374
y_test.values.max()       =  42038198.617
------------------------------------------
y_train.values.argmin()   =  5
y_test.values.argmin()    =  19
------------------------------------------
y_train.values.argmax()   =  240
y_test.values.argmax()    =  28
------------------------------------------
y_train.idxmin()          =  18
y_test.idxmin()           =  588
----------------------------

In [94]:
y_train.values[0:10]

array([ 1498608.567,  1499530.468,  2170280.382,  1602826.789,
        1679743.324,    24193.916,    32378.571,    50828.776,
        2760269.008,  2760269.008])

In [95]:
y_test.values[10:20]

array([ 5594757.536,  5547258.272,  3778227.519,  2343328.501,
        3842487.763,  5217770.458,  4943885.104,  2584097.733,
        2083820.325,   229587.881])

### Normalize Data - based on Train set - It is forbiden to use Test set

In [96]:
def normalizeXTrainTest(X_train, X_test):
    scaler = MinMaxScaler(feature_range=(scaler_min, scaler_max))
    scaler.fit(X_train)
    X_train_normalized = scaler.transform(X_train)
    X_test_normalized = scaler.transform(X_test)

    return X_train_normalized, X_test_normalized, scaler

In [97]:
X_train_normalized, X_test_normalized, X_normalized_MinMaxScalerTrainTest = normalizeXTrainTest(X_train, X_test)

In [98]:
print("X_train_normalized max = ", X_train_normalized.max())
print("X_train_normalized min = ", X_train_normalized.min())
print("X_test_normalized max  = ", X_test_normalized.max())
print("X_test_normalized min  = ", X_test_normalized.min())

X_train_normalized max =  1.0
X_train_normalized min =  -1.0
X_test_normalized max  =  1.0
X_test_normalized min  =  -1.02083227423


In [99]:
def normalizeYTrainTest(y_train, y_test):
    # Create numpy.array from pandas.series then reshape numpy array
    y_train_input = y_train.values.reshape(-1, 1)
    y_test_input = y_test.values.reshape(-1, 1)
    # Scaler
    scaler = MinMaxScaler(feature_range=(scaler_min, scaler_max))
    scaler.fit(y_train_input)
    # Scale
    y_train_normalized_tmp = scaler.transform(y_train_input)
    y_test_normalized_tmp = scaler.transform(y_test_input)
    # Flat numpy.array
    y_train_normalized = y_train_normalized_tmp.flatten()
    y_test_normalized = y_test_normalized_tmp.flatten()

    return y_train_normalized, y_test_normalized, scaler

In [100]:
y_train_normalized, y_test_normalized, y_normalized_MinMaxScalerTrainTest = normalizeYTrainTest(y_train, y_test)

In [101]:
print("y_train_normalized max = ", y_train_normalized.max())
print("y_train_normalized min = ", y_train_normalized.min())
print("y_test_normalized max  = ", y_test_normalized.max())
print("y_test_normalized min  = ", y_test_normalized.min())

y_train_normalized max =  1.0
y_train_normalized min =  -1.0
y_test_normalized max  =  1.55498978046
y_test_normalized min  =  -0.987509415366


In [102]:
from utils import printInfoNumpyArrays

In [103]:
printInfoNumpyArrays(y_train_normalized, y_test_normalized)

type(y_train)             =  <class 'numpy.ndarray'>
type(y_test)              =  <class 'numpy.ndarray'>
------------------------------------------
len(y_train)              =  471
len(y_test)               =  202
------------------------------------------
y_train.size              =  471
y_test.size               =  202
------------------------------------------
y_train.shape             =  (471,)
y_test.shape              =  (202,)
------------------------------------------
y_train.min               =  -1.0
y_test.min                =  -0.987509415366
------------------------------------------
y_train.max               =  1.0
y_test.max                =  1.55498978046


In [104]:
printInfoNumpyArrays(y_train, y_test)

type(y_train)             =  <class 'pandas.core.series.Series'>
type(y_test)              =  <class 'pandas.core.series.Series'>
------------------------------------------
len(y_train)              =  471
len(y_test)               =  202
------------------------------------------
y_train.size              =  471
y_test.size               =  202
------------------------------------------
y_train.shape             =  (471,)
y_test.shape              =  (202,)
------------------------------------------
y_train.min               =  24193.916
y_test.min                =  229587.881
------------------------------------------
y_train.max               =  32912000.374
y_test.max                =  42038198.617


In [105]:
for i in range(0,9):
    print(X_test_normalized[:,i].min())
    print(X_test_normalized[:,i].max())
    print('--------------------------')

-0.892778123319
0.925000613381
--------------------------
-1.02083227423
0.903520298425
--------------------------
-0.122322514627
1.0
--------------------------
-0.76708183367
0.912774493606
--------------------------
-0.750447581807
0.878339324448
--------------------------
-0.872004712958
0.874473818425
--------------------------
-0.6713302944
0.699040661439
--------------------------
-0.588795295738
0.917562261089
--------------------------
-0.529004385985
0.916726794555
--------------------------


In [106]:
X_train_denormalized = denormalizeX(X_train_normalized, X_normalized_MinMaxScalerTrainTest)

In [107]:
X_test_denormalized = denormalizeX(X_test_normalized, X_normalized_MinMaxScalerTrainTest)

In [108]:
y_train_denormalized = denormalizeY(y_train_normalized, y_normalized_MinMaxScalerTrainTest)

In [109]:
y_test_denormalized = denormalizeY(y_test_normalized, y_normalized_MinMaxScalerTrainTest)

In [110]:
VisualizePredictedYLineWithValues(y.values, y_train_denormalized, targetVariable, 'Denormalized')

In [111]:
VisualizePredictedYLineWithValues(y.values[len(y_train_denormalized):], y_test_denormalized, targetVariable, 'Denormalized')

In [112]:
# this is the same as did it before, when whole dataset was trained

def trainMultiLayerRegressor(X_train_normalized, y_train_normalized, activation, neuronsTrainTest):

    # Train Neural Network
    mlp = MLPRegressor(hidden_layer_sizes=neuronsTrainTest, \
                       max_iter=250, \
                       activation=activation, \
                       solver="lbfgs", \
                       learning_rate="constant", \
                       learning_rate_init=0.01, \
                       alpha=0.01, \
                       verbose=False, \
                       momentum=0.9, \
                       early_stopping=False, \
                       tol=0.00000001, \
                       shuffle=False, \
                       # n_iter_no_change=200, \
                       random_state=1234)

    mlp.fit(X_train_normalized, y_train_normalized)
    
    # ide kéne beilleszteni a modell elmentését
    joblib.dump(mlp, 'models/saved_mlp_model_train_test.pkl')

    return mlp


In [113]:
# Train Neural Network
mlp = trainMultiLayerRegressor(X_train_normalized, y_train_normalized, activation_function, neuronsTrainTest)

In [114]:
def predictMultiLayerRegressor(mlp, X_normalized):
    y_predicted = mlp.predict(X_normalized)

    return y_predicted


In [115]:
# Create prediction
y_train_predicted = predictMultiLayerRegressor(mlp, X_train_normalized)

# Create prediction
y_test_predicted = predictMultiLayerRegressor(mlp, X_test_normalized)

In [116]:
evaluateGoodnessOfPrediction(y_train_normalized, y_train_predicted)
print('---------------------')
evaluateGoodnessOfPrediction(y_test_normalized, y_test_predicted)

Correlation           = 0.652
Explained variance    = 0.424
Mean Absolute Error   = 0.086
Mean Squared Error    = 0.020
R2 Score              = 0.424
---------------------
Correlation           = 0.396
Explained variance    = -0.652
Mean Absolute Error   = 0.121
Mean Squared Error    = 0.072
R2 Score              = -0.746


In [117]:
y_train_denormalized = denormalizeY(y_train_normalized, y_normalized_MinMaxScalerTrainTest)

y_test_denormalized = denormalizeY(y_test_normalized, y_normalized_MinMaxScalerTrainTest)

y_train_predicted_denormalized = denormalizeY(y_train_predicted, y_normalized_MinMaxScalerTrainTest)

y_test_predicted_denormalized = denormalizeY(y_test_predicted, y_normalized_MinMaxScalerTrainTest)

In [118]:
from visualizerlinux import ScatterPlotsTrainTest

In [119]:
ScatterPlotsTrainTest(y_train_denormalized, y_train_predicted_denormalized, \
                      y_test_denormalized, y_test_predicted_denormalized, targetVariable)

In [120]:
ScatterPlotsTrainTest(y_train_normalized, y_train_predicted, \
                      y_test_normalized, y_test_predicted, targetVariable)

In [121]:
VisualizePredictedYLine(y_train_denormalized, y_train_predicted_denormalized, targetVariable, lines = True)

In [122]:
VisualizePredictedYLine(y_train_normalized, y_train_predicted, targetVariable, lines = True)

In [123]:
VisualizePredictedYLine(y_test_denormalized, y_test_predicted_denormalized, targetVariable, lines = True)

In [124]:
VisualizePredictedYLine(y_test_normalized, y_test_predicted, targetVariable, lines = True)

In [125]:
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [126]:
def createBeforeafterDFLags(df, lag):
    beforeafterDFLags = df.copy()
    inputVariables = np.flip(beforeafterDFLags.columns[0:10].ravel(), axis=-1)
    print('Input Variablels : ', inputVariables)

    index = 10
    for i in inputVariables:
        new_column = beforeafterDFLags[i].shift(lag)
        new_column_name = (str('prev') + str(1) + i) # Todo: rename str(lag)
        beforeafterDFLags.insert(loc=index, column=new_column_name, value=new_column)

    beforeafterDFLags = beforeafterDFLags[lag:]             # remove first row as we haven't got data in lag var
    
    return beforeafterDFLags


In [127]:
beforeafterDFLags = createBeforeafterDFLags(preProcessedDF, 1)

Input Variablels :  ['WorkerCount' 'PktOut' 'KBOut' 'PktIn' 'KBIn' 'CTXSW' 'Inter' 'CPU'
 'SUM RR' 'AVG RR']


In [128]:
beforeafterDFLags.columns

Index(['AVG RR', 'SUM RR', 'CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut',
       'PktOut', 'WorkerCount', 'prev1AVG RR', 'prev1SUM RR', 'prev1CPU',
       'prev1Inter', 'prev1CTXSW', 'prev1KBIn', 'prev1PktIn', 'prev1KBOut',
       'prev1PktOut', 'prev1WorkerCount', 'avg latency (quantile 0.5)'],
      dtype='object')

In [129]:
def createBeforeafterDFLeads(df, lead = 1):
    beforeafterDFLeads = df.copy()
    inputVariables = np.flip(beforeafterDFLeads.columns[0:10].ravel(), axis=-1)
    print('Input Variablels : ', inputVariables)

    index = 10
    for i in inputVariables:
        new_column = beforeafterDFLeads[i].shift(-lead)
        new_column_name = (str('next') + str(1) + i) # Todo: rename str(lead)
        beforeafterDFLeads.insert(loc=index, column=new_column_name, value=new_column)

    beforeafterDFLeads = beforeafterDFLeads[:-lead]             # remove last row as we haven't got data in lead var
    
    beforeafterDFLeads = beforeafterDFLeads.iloc[:,:-1]     # remove last column - Latency
    
    return beforeafterDFLeads


In [130]:
beforeafterDF = createBeforeafterDFLeads(beforeafterDFLags, lead = lead)

Input Variablels :  ['WorkerCount' 'PktOut' 'KBOut' 'PktIn' 'KBIn' 'CTXSW' 'Inter' 'CPU'
 'SUM RR' 'AVG RR']


In [131]:
beforeafterDF.columns

Index(['AVG RR', 'SUM RR', 'CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut',
       'PktOut', 'WorkerCount', 'next1AVG RR', 'next1SUM RR', 'next1CPU',
       'next1Inter', 'next1CTXSW', 'next1KBIn', 'next1PktIn', 'next1KBOut',
       'next1PktOut', 'next1WorkerCount', 'prev1AVG RR', 'prev1SUM RR',
       'prev1CPU', 'prev1Inter', 'prev1CTXSW', 'prev1KBIn', 'prev1PktIn',
       'prev1KBOut', 'prev1PktOut', 'prev1WorkerCount'],
      dtype='object')

In [132]:
# assert

a_colName = beforeafterDF.columns[-1]
a_cols = beforeafterDF.shape[1]

assert a_colName == 'prev1WorkerCount', "This column name is: {0} insted of prev1WorkerCount".format(a_colName)
assert a_cols == 30, "This column number is: {0} insted of 17".format(a_colName)

In [133]:
def calculateWorkerCountDifferences(beforeafterDF):
    new_beforeafterDF = beforeafterDF.copy()
    new_beforeafterDF['addedWorkerCount'] = new_beforeafterDF['next1WorkerCount'].values - new_beforeafterDF['WorkerCount']
    
    return new_beforeafterDF


In [134]:
theBeforeAfterDF = calculateWorkerCountDifferences(beforeafterDF)

In [135]:
def createScalingDF(theBeforeAfterDF):
    new_beforeafterDF = theBeforeAfterDF.copy()
    scalingDF = new_beforeafterDF[new_beforeafterDF.WorkerCount != new_beforeafterDF.next1WorkerCount]
    
    return scalingDF


In [136]:
scalingDF = createScalingDF(theBeforeAfterDF)

In [137]:
beforeafterMetricsDF = scalingDF.copy()

for i in metricNames:
    # print(i)
    changeInMetricAfterScale = beforeafterMetricsDF['next1'+i]-beforeafterMetricsDF[i]
    beforeafterMetricsDF['changed1'+i] = changeInMetricAfterScale

In [138]:
beforeafterMetricsDF[['prev1CPU','CPU','next1CPU','changed1CPU','prev1WorkerCount','WorkerCount','next1WorkerCount']]. \
head(10).style.set_properties(**pandas_dataframe_styles).format("{:0.2f}")

Unnamed: 0,prev1CPU,CPU,next1CPU,changed1CPU,prev1WorkerCount,WorkerCount,next1WorkerCount
19,99.9,99.16,81.5,-17.66,2.0,2.0,3.0
28,85.2,90.21,77.8,-12.41,3.0,3.0,4.0
37,100.0,99.94,88.64,-11.3,4.0,4.0,5.0
46,96.09,97.33,97.02,-0.32,5.0,5.0,6.0
53,98.92,97.55,98.71,1.16,6.0,6.0,5.0
59,99.91,99.97,99.98,0.01,5.0,5.0,4.0
68,99.98,100.0,99.72,-0.28,4.0,4.0,5.0
69,100.0,99.72,80.43,-19.29,4.0,5.0,6.0
75,95.79,95.04,94.53,-0.51,6.0,6.0,5.0
82,98.79,99.66,99.63,-0.03,5.0,5.0,4.0


In [139]:
beforeafterMetricsDF[['prev1CPU','CPU','next1CPU','changed1CPU','prev1WorkerCount','WorkerCount','next1WorkerCount']]. \
head(10).style.set_properties(**pandas_dataframe_styles).format("{:0.2f}")

Unnamed: 0,prev1CPU,CPU,next1CPU,changed1CPU,prev1WorkerCount,WorkerCount,next1WorkerCount
19,99.9,99.16,81.5,-17.66,2.0,2.0,3.0
28,85.2,90.21,77.8,-12.41,3.0,3.0,4.0
37,100.0,99.94,88.64,-11.3,4.0,4.0,5.0
46,96.09,97.33,97.02,-0.32,5.0,5.0,6.0
53,98.92,97.55,98.71,1.16,6.0,6.0,5.0
59,99.91,99.97,99.98,0.01,5.0,5.0,4.0
68,99.98,100.0,99.72,-0.28,4.0,4.0,5.0
69,100.0,99.72,80.43,-19.29,4.0,5.0,6.0
75,95.79,95.04,94.53,-0.51,6.0,6.0,5.0
82,98.79,99.66,99.63,-0.03,5.0,5.0,4.0


In [140]:
beforeafterMetricsDF[['changed1CPU', 'changed1Inter', 'changed1CTXSW', 'changed1KBIn', \
                      'changed1KBOut', 'changed1PktIn', 'changed1PktOut', 'addedWorkerCount']]. \
groupby(['addedWorkerCount'], as_index=False).mean().style.set_properties(**pandas_dataframe_styles).format("{:0.2f}")

Unnamed: 0,addedWorkerCount,changed1CPU,changed1Inter,changed1CTXSW,changed1KBIn,changed1KBOut,changed1PktIn,changed1PktOut
0,-3.0,0.99,223.9,294.84,81.4,11.76,129.57,88.69
1,-2.0,-0.52,-5.38,-40.48,11.53,0.53,8.16,-2.38
2,-1.0,0.39,314.84,219.71,117.0,21.76,216.47,186.32
3,1.0,-6.09,-524.32,-442.57,-36.97,-30.34,-227.28,-261.25
4,2.0,-7.4,-454.79,-1035.7,-91.64,5.3,-128.3,39.61


In [141]:
beforeafterMetricsDF[['changed1CPU', 'changed1Inter', 'changed1CTXSW', 'changed1KBIn', \
                      'changed1KBOut', 'changed1PktIn', 'changed1PktOut', 'addedWorkerCount']]. \
groupby(['addedWorkerCount'], as_index=False).count().style.set_properties(**pandas_dataframe_styles).format("{:0.2f}")

Unnamed: 0,addedWorkerCount,changed1CPU,changed1Inter,changed1CTXSW,changed1KBIn,changed1KBOut,changed1PktIn,changed1PktOut
0,-3.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,-2.0,11.0,11.0,11.0,11.0,11.0,11.0,11.0
2,-1.0,34.0,34.0,34.0,34.0,34.0,34.0,34.0
3,1.0,59.0,59.0,59.0,59.0,59.0,59.0,59.0
4,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [142]:
print(theBeforeAfterDF.shape)

print(scalingDF.shape)

(672, 31)
(106, 31)


In [143]:
metricNames

['CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut', 'PktOut']

In [144]:


def calculateLinearRegressionTerms(metric, dataFrame):
    termDF = dataFrame.copy()
    termDF['metric'] = termDF[metric]
    termDF['term1']  = termDF[metric] * termDF['WorkerCount'] / (termDF['WorkerCount'] + termDF['addedWorkerCount'])
    termDF['term2']  = termDF[metric] * termDF['addedWorkerCount'] / (termDF['WorkerCount'] + termDF['addedWorkerCount'])
    return termDF


def createInputAndTargetToLinearRegression(currentMetric, dataFrameB):
    newDataFrameB = dataFrameB.copy()
    yb = newDataFrameB['next1' + str(currentMetric)]
    featuresDF = newDataFrameB[[str(currentMetric), 'WorkerCount', 'next1WorkerCount', 'addedWorkerCount']]
    
    tmpDF = calculateLinearRegressionTerms(currentMetric, featuresDF)
    
    Xb = tmpDF.iloc[:, [-3, -2, -1]]     # keep last three column - given metric, term1, term2
    
    # These are only for check everything is in order
    # print(y.head(1))
    # print(featuresDF.head(1))
    # print(X.head(2))
    # scalingDF[['CPU', 'next1CPU', 'WorkerCount', 'next1WorkerCount', 'addedWorkerCount']][0:3]
    return Xb, yb


def calculateLinearRegressionModel(currentMetric, dataFrameA):
    newDataFrameA = dataFrameA.copy()
    Xa, ya = createInputAndTargetToLinearRegression(currentMetric, newDataFrameA)
    
    # ToDo : Return and store particular model
    
    lr = LinearRegression(fit_intercept=True, normalize=False)
    lr.fit(Xa, ya)
    # prediction = lr.predict(X)
    
    return lr


def calculateLinearRegressionPrediction(metric, dataFrame, model):
    X, y = createInputAndTargetToLinearRegression(metric, dataFrame)
    
    model.fit(X, y)
    y_predicted = model.predict(X)
    
    # print('Score = ', model.score(X, y))
    # print(metric, 'MAE \t=\t{:0.2f}'.format(metrics.mean_absolute_error(y, y_predicted)))
    
    # todo: refactor
    print(metric)
    evaluateGoodnessOfPrediction(y, y_predicted)
    print('-----------------------------------')
    
    return y_predicted

In [145]:
temporaryScalingDF = scalingDF.copy()

In [146]:
d={}
for i in metricNames:
    d["model{0}".format(i)]="Hello " + i
    # print(d)
    
d.get('modelCPU')

'Hello CPU'

In [147]:

def learningLinearRegression(scalingDF, temporaryScalingDF, metricNames):
    # linearRegressionModels = {}
    # temporaryScalingDF = scalingDF.copy()
    d={}
    
    for i in metricNames:

        # d["model{0}".format(i)]="Hello " + i
        
        model = calculateLinearRegressionModel(i, scalingDF)
        prediction = calculateLinearRegressionPrediction(i, scalingDF, model)
        
        # save model to the file system
        joblib.dump(model, 'models/saved_linearregression_model_' + i + '.pkl')

        # nos van egy ilyen modellünk mit tegyünk vele, tároljuk el mindegyiket különböző néven
        d["model{0}".format(i)] = model

        # el kéne tárolni
        temporaryScalingDF['predictedNext1'+i] = prediction

    
    return temporaryScalingDF, d


In [148]:
temporaryScalingDF, linearRegressionModels = learningLinearRegression(scalingDF, temporaryScalingDF, metricNames)

CPU
Correlation           = 0.652
Explained variance    = 0.425
Mean Absolute Error   = 5.310
Mean Squared Error    = 48.328
R2 Score              = 0.425
-----------------------------------
Inter
Correlation           = 0.647
Explained variance    = 0.419
Mean Absolute Error   = 395.734
Mean Squared Error    = 299776.965
R2 Score              = 0.419
-----------------------------------
CTXSW
Correlation           = 0.547
Explained variance    = 0.300
Mean Absolute Error   = 385.045
Mean Squared Error    = 297667.521
R2 Score              = 0.300
-----------------------------------
KBIn
Correlation           = 0.311
Explained variance    = 0.097
Mean Absolute Error   = 220.380
Mean Squared Error    = 107965.121
R2 Score              = 0.097
-----------------------------------
PktIn
Correlation           = 0.650
Explained variance    = 0.423
Mean Absolute Error   = 203.032
Mean Squared Error    = 80902.089
R2 Score              = 0.423
-----------------------------------
KBOut
Correlati

In [149]:
linearRegressionModelNames = linearRegressionModels.keys()

print(linearRegressionModelNames)

modelCPU = linearRegressionModels.get('modelCPU')

print(type(modelCPU))

dict_keys(['modelCPU', 'modelInter', 'modelCTXSW', 'modelKBIn', 'modelPktIn', 'modelKBOut', 'modelPktOut'])
<class 'sklearn.linear_model.base.LinearRegression'>


In [150]:
temporaryScalingDF.columns

Index(['AVG RR', 'SUM RR', 'CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut',
       'PktOut', 'WorkerCount', 'next1AVG RR', 'next1SUM RR', 'next1CPU',
       'next1Inter', 'next1CTXSW', 'next1KBIn', 'next1PktIn', 'next1KBOut',
       'next1PktOut', 'next1WorkerCount', 'prev1AVG RR', 'prev1SUM RR',
       'prev1CPU', 'prev1Inter', 'prev1CTXSW', 'prev1KBIn', 'prev1PktIn',
       'prev1KBOut', 'prev1PktOut', 'prev1WorkerCount', 'addedWorkerCount',
       'predictedNext1CPU', 'predictedNext1Inter', 'predictedNext1CTXSW',
       'predictedNext1KBIn', 'predictedNext1PktIn', 'predictedNext1KBOut',
       'predictedNext1PktOut'],
      dtype='object')

In [151]:
temporaryScalingDF.shape

(106, 38)

In [152]:
temporaryScalingDF.shape

(106, 38)

In [153]:
from visualizerlinux import ipythonPlotMetricsRealAgainstPredicted

In [154]:
metricNames = ['CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut', 'PktOut']

ipythonPlotMetricsRealAgainstPredicted(temporaryScalingDF, metricNames)

In [155]:
from visualizerlinux import ipythonPlotMetricsRealAgainstPredictedRegression

In [156]:
ipythonPlotMetricsRealAgainstPredictedRegression(temporaryScalingDF, metricNames)

### End of Learning Phase

<a id="test_begin"></a>

# Advice Phase - Production Phase

In [157]:
from utils import loadMinMaxScalerXFull, loadMinMaxScalerYFull
from utils import loadNeuralNetworkModel
from utils import readCSV
from utils import preProcessing, renameVariable, setMetricNames, setExtendedMetricNames, dropFirstCases

from linearregression import calculateLinearRegressionTerms

from visualizerlinux import VisualizePredictedYLine, VisualizePredictedYWithWorkers

from sklearn.externals import joblib

pandas_dataframe_styles = {
    'font-family': 'monospace',
    'white-space': 'pre'
}

In [158]:
X_normalized_MinMaxScaler = loadMinMaxScalerXFull()
y_normalized_MinMaxScaler = loadMinMaxScalerYFull()

modelNeuralNet = loadNeuralNetworkModel()

In [159]:
# Vigyázat ennek azonosnak kell lennie a korábbi értékkel különben para van (ezt valahogy ki kéne vezetni valami külső
# fájlba, vagy csinálni valamilyen osztályt amiben ez el van tárolva)

cutFirstCases = 0                                                      # 10
targetVariable = 'avg latency (quantile 0.5)'
testFileName = 'data/grafana_data_export_long_running_test.csv'        # original data
testFileName = 'data/test_data.csv'                                    # test data
testFileName = 'data/test_data2.csv'                                   # test data
# testFileName = 'data/micado0730715_v2.csv'

maximumNumberIncreasableNode = 6                                       # must be positive
minimumNumberReducibleNode = -4                                        # must be negativ

upperLimit = 4000000                                                   # 6000000
lowerLimit = 3000000                                                   # 1000000

In [160]:
newDF = readCSV(testFileName)

In [161]:
newPreProcessedDF = preProcessing(newDF)

newRenamedDF = renameVariable(newPreProcessedDF, 'Worker count', 'WorkerCount')

metricNames         = setMetricNames(['CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut', 'PktOut'])
extendedMetricNames = setExtendedMetricNames(['CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut', 'PktOut', 'WorkerCount'])

newFilteredDF = dropFirstCases(newRenamedDF, cutFirstCases)

>#### Add new workers (increse the nuber of added Worker)

In [162]:
metricNames

['CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut', 'PktOut']

In [163]:
def calculatePredictedLatencyWithVariousWorkers(modelNeuralNet, to):

    newDFForRegression = newFilteredDF.copy()
    nDD = newFilteredDF.copy()
    
    step = 0
    
    if( to == 0 ):
        print("")
        assert to != 0,"This value can not be 0. Error in calculatePredictedLatencyWithVariousWorkers method set maximum number of scalable nodes."
    elif( to > 0 ):
        step = 1
        print('............. up maximum vm = ' + str(to) + ' ...........')
    elif( to < 0 ):
        step = -1
        print('............. down maximum vm = ' + str(to) + ' ...........')

    for j in range(0, to, step):
        
        addedWorkerCount = j

        newDFForRegression['addedWorkerCount'] = addedWorkerCount

        for i in metricNames:

            newDFForRegressionWithTerms = calculateLinearRegressionTerms(i, newDFForRegression)

            print("------------     ", newDFForRegressionWithTerms.CPU.values[1], "     ------------")
            print("------------     ", newDFForRegressionWithTerms.shape, "     ------------")
            print("------------     ", newDFForRegressionWithTerms.columns, "     ------------")

            # keep last three column - given metric, term1, term2
            X = newDFForRegressionWithTerms.iloc[:, [-3, -2, -1]]
            
            print("------------     ", X.shape, "     ------------")
            print("------------     ", X.values[0], "     ------------") # Error ez az érték első eleme fix kéne hogy legyen
            print("------------     ", X.values[-1], "     ------------")# ugyanakkor folyamatosan változik
            
            # load the proper current metric model
            modelForMetric = joblib.load('models/saved_linearregression_model_' + i + '.pkl')

            print("------------     ", modelForMetric.get_params(), "     ------------")

            if( np.isinf(X).any()[1] ):
                X['term1'] = np.where(np.isinf(X['term1'].values), X['metric'], X['term1'])
                X['term2'] = np.where(np.isinf(X['term2'].values), 0, X['term2'])
                # print('-----------')
                # print(X.to_string())
                
            
            # create prediction and store in a new numpy.array object
            predictedMetric = modelForMetric.predict(X)
            
            
            # leave original metric value (just for fun and investigation) and store in a new column
            newDFForRegression['original' + i] = newDFForRegression[i]

            # store predicted value pretend as would be the original. for example predictedCPU will be CPU
            newDFForRegression[i] = predictedMetric
            nDD[i] = predictedMetric

            print("------------     ", newDFForRegression[['CPU']].values[1], "    ------------")
            print("------------     ", nDD[['CPU']].values[1], "    ------------")

            # print out the new data frame
            newDFForRegression.head()


        newDFForNerualNetworkPrediction = newDFForRegression.copy()     
        
        # X must contain exactly the same columns as the model does
        X = newDFForNerualNetworkPrediction.iloc[:, :9]
        
        # X must be normalized based on a previously created MinMaxScaler
        X_normalized_MinMaxScaler # the name of the MinMaxScaler

        X_normalized = X_normalized_MinMaxScaler.transform(X)

        # modelNeuralNet = joblib.load('models/saved_mlp_model.pkl')
        modelNeuralNet = modelNeuralNet

        # create and store predicted values in a numpy.array object
        y_predicted_with_new_metrics = modelNeuralNet.predict(X_normalized)

        # denormalized predicted values
        y_predicted_with_new_metrics_denormalized = y_normalized_MinMaxScaler.inverse_transform(y_predicted_with_new_metrics.reshape(y_predicted_with_new_metrics.shape[0],1))

        newDFForNerualNetworkPrediction['predictedResponseTimeAdded' + str(j) + 'Worker'] = y_predicted_with_new_metrics
        newDFForNerualNetworkPrediction['denormalizedPredictedResponseTimeAdded' + str(j) + 'Worker'] = y_predicted_with_new_metrics_denormalized

        if(j == 0):
            investigationDF = newDFForNerualNetworkPrediction[[targetVariable, 'WorkerCount']]
            investigationDFDeNormalized = newDFForNerualNetworkPrediction[[targetVariable, 'WorkerCount']]
            #investigationDF = newDFForNerualNetworkPrediction[['predictedResponseTimeAdded0Worker']]
            #investigationDFDeNormalized = newDFForNerualNetworkPrediction[['denormalizedPredictedResponseTimeAdded0Worker']]

        investigationDF['predictedResponseTimeAdded' + str(j) + 'Worker'] = newDFForNerualNetworkPrediction[['predictedResponseTimeAdded' + str(j) + 'Worker']]
        investigationDFDeNormalized['denormalizedPredictedResponseTimeAdded' + str(j) + 'Worker'] = newDFForNerualNetworkPrediction[['denormalizedPredictedResponseTimeAdded' + str(j) + 'Worker']]

    print(newDFForNerualNetworkPrediction.columns)
    
    print(investigationDFDeNormalized.columns)
    
    return investigationDF, investigationDFDeNormalized

In [164]:


investigationDFUp, investigationDFDeNormalizedUp = calculatePredictedLatencyWithVariousWorkers(modelNeuralNet, \
                                                                                               maximumNumberIncreasableNode)



............. up maximum vm = 6 ...........
------------      90.205557037      ------------
------------      (659, 15)      ------------
------------      Index(['AVG RR', 'SUM RR', 'CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut',
       'PktOut', 'WorkerCount', 'avg latency (quantile 0.5)',
       'addedWorkerCount', 'metric', 'term1', 'term2'],
      dtype='object')      ------------
------------      (659, 3)      ------------
------------      [ 85.2  85.2   0. ]      ------------
------------      [ 84.689  84.689   0.   ]      ------------
------------      {'copy_X': True, 'fit_intercept': True, 'n_jobs': 1, 'normalize': False}      ------------
------------      [ 87.815]     ------------
------------      [ 87.815]     ------------
------------      87.8146942636      ------------
------------      (659, 16)      ------------
------------      Index(['AVG RR', 'SUM RR', 'CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut',
       'PktOut', 'WorkerCount', 'avg latency (quantil

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


{'copy_X': True, 'fit_intercept': True, 'n_jobs': 1, 'normalize': False}      ------------
------------      [ 87.815]     ------------
------------      [ 87.815]     ------------
------------      87.8146942636      ------------
------------      (659, 22)      ------------
------------      Index(['AVG RR', 'SUM RR', 'CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut',
       'PktOut', 'WorkerCount', 'avg latency (quantile 0.5)',
       'addedWorkerCount', 'originalCPU', 'originalInter', 'originalCTXSW',
       'originalKBIn', 'originalPktIn', 'originalKBOut', 'originalPktOut',
       'metric', 'term1', 'term2'],
      dtype='object')      ------------
------------      (659, 3)      ------------
------------      [ 84.131  63.098  21.033]      ------------
------------      [ 83.755  71.79   11.965]      ------------
------------      {'copy_X': True, 'fit_intercept': True, 'n_jobs': 1, 'normalize': False}      ------------
------------      [ 84.49]     ------------
------------    

------------      (659, 22)      ------------
------------      Index(['AVG RR', 'SUM RR', 'CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut',
       'PktOut', 'WorkerCount', 'avg latency (quantile 0.5)',
       'addedWorkerCount', 'originalCPU', 'originalInter', 'originalCTXSW',
       'originalKBIn', 'originalPktIn', 'originalKBOut', 'originalPktOut',
       'metric', 'term1', 'term2'],
      dtype='object')      ------------
------------      (659, 3)      ------------
------------      [ 1305.402   652.701   652.701]      ------------
------------      [ 1293.492   862.328   431.164]      ------------
------------      {'copy_X': True, 'fit_intercept': True, 'n_jobs': 1, 'normalize': False}      ------------
------------      [ 78.29]     ------------
------------      [ 78.29]     ------------
------------      78.2902940369      ------------
------------      (659, 22)      ------------
------------      Index(['AVG RR', 'SUM RR', 'CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut',

In [165]:


investigationDFDown, investigationDFDeNormalizedDown = calculatePredictedLatencyWithVariousWorkers(modelNeuralNet, \
                                                                                                   minimumNumberReducibleNode)



............. down maximum vm = -4 ...........
------------      90.205557037      ------------
------------      (659, 15)      ------------
------------      Index(['AVG RR', 'SUM RR', 'CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut',
       'PktOut', 'WorkerCount', 'avg latency (quantile 0.5)',
       'addedWorkerCount', 'metric', 'term1', 'term2'],
      dtype='object')      ------------
------------      (659, 3)      ------------
------------      [ 85.2  85.2   0. ]      ------------
------------      [ 84.689  84.689   0.   ]      ------------
------------      {'copy_X': True, 'fit_intercept': True, 'n_jobs': 1, 'normalize': False}      ------------
------------      [ 87.815]     ------------
------------      [ 87.815]     ------------
------------      87.8146942636      ------------
------------      (659, 16)      ------------
------------      Index(['AVG RR', 'SUM RR', 'CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut',
       'PktOut', 'WorkerCount', 'avg latency (quan

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


------------      Index(['AVG RR', 'SUM RR', 'CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut',
       'PktOut', 'WorkerCount', 'avg latency (quantile 0.5)',
       'addedWorkerCount', 'originalCPU', 'originalInter', 'originalCTXSW',
       'originalKBIn', 'originalPktIn', 'metric', 'term1', 'term2'],
      dtype='object')      ------------
------------      (659, 3)      ------------
------------      [ 192.742  192.742    0.   ]      ------------
------------      [ 199.202  199.202    0.   ]      ------------
------------      {'copy_X': True, 'fit_intercept': True, 'n_jobs': 1, 'normalize': False}      ------------
------------      [ 87.815]     ------------
------------      [ 87.815]     ------------
------------      87.8146942636      ------------
------------      (659, 21)      ------------
------------      Index(['AVG RR', 'SUM RR', 'CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut',
       'PktOut', 'WorkerCount', 'avg latency (quantile 0.5)',
       'addedWorkerCount', 'o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

      ------------
------------      [ 89.185]     ------------
------------      [ 89.185]     ------------
------------      89.1853995785      ------------
------------      (659, 22)      ------------
------------      Index(['AVG RR', 'SUM RR', 'CPU', 'Inter', 'CTXSW', 'KBIn', 'PktIn', 'KBOut',
       'PktOut', 'WorkerCount', 'avg latency (quantile 0.5)',
       'addedWorkerCount', 'originalCPU', 'originalInter', 'originalCTXSW',
       'originalKBIn', 'originalPktIn', 'originalKBOut', 'originalPktOut',
       'metric', 'term1', 'term2'],
      dtype='object')      ------------
------------      (659, 3)      ------------
------------      [ 6623.409  9935.114 -3311.705]      ------------
------------      [ 6317.511  7581.014 -1263.502]      ------------
------------      {'copy_X': True, 'fit_intercept': True, 'n_jobs': 1, 'normalize': False}      ------------
------------      [ 89.185]     ------------
------------      [ 89.185]     ------------
------------      89.185399578

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

### Merge Up and Down Adviser

In [166]:
investigationDeNormalizedDF = pd.concat([investigationDFDeNormalizedDown, \
                             investigationDFDeNormalizedUp], axis = 1).T.drop_duplicates().T

investigationDeNormalizedDF.values.shape

(659, 11)

In [167]:
investigationDeNormalizedDF.head().style.set_properties(**pandas_dataframe_styles).format("{:0.3f}")

Unnamed: 0,avg latency (quantile 0.5),WorkerCount,denormalizedPredictedResponseTimeAdded0Worker,denormalizedPredictedResponseTimeAdded-1Worker,denormalizedPredictedResponseTimeAdded-2Worker,denormalizedPredictedResponseTimeAdded-3Worker,denormalizedPredictedResponseTimeAdded1Worker,denormalizedPredictedResponseTimeAdded2Worker,denormalizedPredictedResponseTimeAdded3Worker,denormalizedPredictedResponseTimeAdded4Worker,denormalizedPredictedResponseTimeAdded5Worker
1,735675.318,3.0,948173.024,1628245.063,-987294.019,395979.017,364691.351,-311751.779,-747769.847,-943432.159,-1003234.083
2,280211.301,3.0,2301201.004,2479383.084,1475498.878,2189419.779,1593649.788,829065.77,310211.033,-15344.896,-234982.468
3,723342.349,4.0,1687264.182,2662209.067,1958228.255,3954094.053,1340493.777,909550.7,548477.019,273627.8,49872.713
5,661149.152,4.0,1651933.068,1816959.703,564852.576,730605.591,1031857.504,285678.273,-238013.659,-551641.983,-752051.104
6,1369729.792,4.0,2022762.948,2014192.854,917154.365,1413714.897,1285569.936,516006.024,-50546.356,-423286.885,-693538.01


In [168]:
investigationDFUp.head().style.set_properties(**pandas_dataframe_styles).format("{:0.3f}")

Unnamed: 0,avg latency (quantile 0.5),WorkerCount,predictedResponseTimeAdded0Worker,predictedResponseTimeAdded1Worker,predictedResponseTimeAdded2Worker,predictedResponseTimeAdded3Worker,predictedResponseTimeAdded4Worker,predictedResponseTimeAdded5Worker
1,735675.318,3.0,-0.956,-0.984,-1.016,-1.037,-1.046,-1.049
2,280211.301,3.0,-0.892,-0.925,-0.962,-0.986,-1.002,-1.012
3,723342.349,4.0,-0.921,-0.937,-0.958,-0.975,-0.988,-0.999
5,661149.152,4.0,-0.923,-0.952,-0.988,-1.012,-1.027,-1.037
6,1369729.792,4.0,-0.905,-0.94,-0.977,-1.004,-1.021,-1.034


In [169]:
investigationDFDown.head().style.set_properties(**pandas_dataframe_styles).format("{:0.3f}")

Unnamed: 0,avg latency (quantile 0.5),WorkerCount,predictedResponseTimeAdded0Worker,predictedResponseTimeAdded-1Worker,predictedResponseTimeAdded-2Worker,predictedResponseTimeAdded-3Worker
1,735675.318,3.0,-0.956,-0.924,-1.048,-0.982
2,280211.301,3.0,-0.892,-0.883,-0.931,-0.897
3,723342.349,4.0,-0.921,-0.874,-0.908,-0.813
5,661149.152,4.0,-0.923,-0.915,-0.974,-0.966
6,1369729.792,4.0,-0.905,-0.905,-0.957,-0.934


In [170]:
investigationDFDeNormalizedUp.head().style.set_properties(**pandas_dataframe_styles).format("{:0.2f}")

Unnamed: 0,avg latency (quantile 0.5),WorkerCount,denormalizedPredictedResponseTimeAdded0Worker,denormalizedPredictedResponseTimeAdded1Worker,denormalizedPredictedResponseTimeAdded2Worker,denormalizedPredictedResponseTimeAdded3Worker,denormalizedPredictedResponseTimeAdded4Worker,denormalizedPredictedResponseTimeAdded5Worker
1,735675.32,3.0,948173.02,364691.35,-311751.78,-747769.85,-943432.16,-1003234.08
2,280211.3,3.0,2301201.0,1593649.79,829065.77,310211.03,-15344.9,-234982.47
3,723342.35,4.0,1687264.18,1340493.78,909550.7,548477.02,273627.8,49872.71
5,661149.15,4.0,1651933.07,1031857.5,285678.27,-238013.66,-551641.98,-752051.1
6,1369729.79,4.0,2022762.95,1285569.94,516006.02,-50546.36,-423286.88,-693538.01


In [171]:
investigationDFDeNormalizedDown.head().style.set_properties(**pandas_dataframe_styles).format("{:0.2f}")

Unnamed: 0,avg latency (quantile 0.5),WorkerCount,denormalizedPredictedResponseTimeAdded0Worker,denormalizedPredictedResponseTimeAdded-1Worker,denormalizedPredictedResponseTimeAdded-2Worker,denormalizedPredictedResponseTimeAdded-3Worker
1,735675.32,3.0,948173.02,1628245.06,-987294.02,395979.02
2,280211.3,3.0,2301201.0,2479383.08,1475498.88,2189419.78
3,723342.35,4.0,1687264.18,2662209.07,1958228.26,3954094.05
5,661149.15,4.0,1651933.07,1816959.7,564852.58,730605.59
6,1369729.79,4.0,2022762.95,2014192.85,917154.36,1413714.9


In [172]:
VisualizePredictedYWithWorkers(0, investigationDFDown[['predictedResponseTimeAdded0Worker', \
                                                       'predictedResponseTimeAdded-1Worker', \
                                                       'predictedResponseTimeAdded-2Worker', \
                                                       'predictedResponseTimeAdded-3Worker']], targetVariable)

In [173]:
VisualizePredictedYWithWorkers(0, investigationDFUp[['predictedResponseTimeAdded1Worker', \
                                                     'predictedResponseTimeAdded2Worker', \
                                                     'predictedResponseTimeAdded3Worker']], targetVariable)

In [174]:
VisualizePredictedYWithWorkers(0, investigationDFUp[['predictedResponseTimeAdded0Worker', \
                                              'predictedResponseTimeAdded1Worker', \
                                              'predictedResponseTimeAdded2Worker', \
                                              'predictedResponseTimeAdded3Worker', \
                                              'predictedResponseTimeAdded4Worker', \
                                              'predictedResponseTimeAdded5Worker']], targetVariable)

In [175]:
VisualizePredictedYWithWorkers(0, investigationDFDeNormalizedUp[['denormalizedPredictedResponseTimeAdded0Worker', \
                                                                 'denormalizedPredictedResponseTimeAdded1Worker', \
                                                                 'denormalizedPredictedResponseTimeAdded2Worker', \
                                                                 'denormalizedPredictedResponseTimeAdded3Worker', \
                                                                 'denormalizedPredictedResponseTimeAdded4Worker', \
                                                                 'denormalizedPredictedResponseTimeAdded5Worker']], targetVariable)

In [176]:
VisualizePredictedYLine(investigationDFDeNormalizedUp['avg latency (quantile 0.5)'], \
                        investigationDFDeNormalizedUp[['denormalizedPredictedResponseTimeAdded0Worker', \
                                                          'denormalizedPredictedResponseTimeAdded1Worker', \
                                                          'denormalizedPredictedResponseTimeAdded2Worker', \
                                                          'denormalizedPredictedResponseTimeAdded3Worker', \
                                                          'denormalizedPredictedResponseTimeAdded4Worker', \
                                                          'denormalizedPredictedResponseTimeAdded5Worker']], targetVariable)

In [177]:
VisualizePredictedYLine(investigationDFDeNormalizedUp[[targetVariable]], \
                        investigationDFDeNormalizedUp[['denormalizedPredictedResponseTimeAdded0Worker']], targetVariable)

In [178]:
VisualizePredictedYLine(investigationDFDeNormalizedUp[[targetVariable]], \
                        investigationDFDeNormalizedUp[['denormalizedPredictedResponseTimeAdded0Worker']], targetVariable)

### Get Advice

In [179]:
from visualizerlinux import VisualizePredictedXYLine
from visualizerlinux import VisualizePredictedXY2Line

In [180]:
VisualizePredictedXYLine(0, investigationDFDeNormalizedUp[[targetVariable]], \
                         targetVariable, lowerLimit, upperLimit)

In [181]:
advice = 0
countInRange = 0
countViolatedUp = 0
countViolatedDown = 0

advicedDF = investigationDeNormalizedDF.copy()
advicedDF['advice'] = 0
advicedDF['postScaledTargetVariable'] = np.nan

for i in investigationDeNormalizedDF.index:
    distance = 99999999999
    real = investigationDeNormalizedDF[[targetVariable]].get_value(i, targetVariable)
    if( upperLimit > real and lowerLimit < real ):
        advice = 0
        advicedDF.ix[i,'advice'] = 0
        countInRange += 1
        print("ok")
    else:
        print("threshold violation at index " + str(i))
        if( upperLimit < real ):
            countViolatedUp += 1
            print("threshold up violation")
            advice = 0
            postScaledTargetVariable = np.nan # 0
            distance = float('inf')
            for j in range(1, 6):
                print(distance)
                advice = 0
                # két feltételnek kell megfelelnie sorrendben legyen a legkisebb távolsága a felső limittől
                # kettő legyen a felső limit alatt (utóbbi nem biztos, hogy teljesül)
                varName = 'denormalizedPredictedResponseTimeAdded' + str(j) + 'Worker'
                relatedTargetVariable = investigationDeNormalizedDF.get_value(i, varName)
                calculatedDistance = investigationDeNormalizedDF.get_value(i, varName)
                if( calculatedDistance < upperLimit ):
                    distance = calculatedDistance
                    advice = j
                    postScaledTargetVariable = relatedTargetVariable
                    break
                print(calculatedDistance)
            advicedDF.ix[i,'advice'] = advice
            advicedDF.ix[i, 'postScaledTargetVariable'] = postScaledTargetVariable
        elif( lowerLimit > real ):
            countViolatedDown += 1
            print("threshold down violation")
            advice = 0
            postScaledTargetVariable = np.nan # 0
            distance = float('-inf')
            for j in range(-1, -3, -1):
                print(distance)
                advice = 0
                # két feltételnek kell megfelelnie sorrendben legyen a legkisebb távolsága az alsó limittől
                # kettő legyen az alsó limit fölött (utóbbi nem biztos, hogy teljesül)
                varName = 'denormalizedPredictedResponseTimeAdded' + str(j) + 'Worker'
                relatedTargetVariable = investigationDeNormalizedDF.get_value(i, varName)
                calculateDistance = investigationDeNormalizedDF.get_value(i, varName)
                if( calculateDistance > lowerLimit ):
                    distance = calculateDistance
                    advice = j
                    postScaledTargetVariable = relatedTargetVariable
                    if( calculateDistance < upperLimit ):
                        distance = calculateDistance
                        advice = j
                        postScaledTargetVariable = relatedTargetVariable
                        break
                    # break
                print(calculateDistance)
            advicedDF.ix[i, 'advice'] = advice
            advicedDF.ix[i, 'postScaledTargetVariable'] = postScaledTargetVariable


threshold violation at index 1
threshold down violation
-inf
1628245.06284
-inf
-987294.019271
threshold violation at index 2
threshold down violation
-inf
2479383.08449
-inf
1475498.87798
threshold violation at index 3
threshold down violation
-inf
2662209.06749
-inf
1958228.25548
threshold violation at index 5
threshold down violation
-inf
1816959.70265
-inf
564852.57562
threshold violation at index 6
threshold down violation
-inf
2014192.85393
-inf
917154.364544
threshold violation at index 7
threshold down violation
-inf
2287537.65984
-inf
1154879.73454
threshold violation at index 8
threshold down violation
-inf
2351000.7106
-inf
1310821.18468
threshold violation at index 9
threshold down violation
-inf
2840427.61887
-inf
2074440.39286
threshold violation at index 10
threshold down violation
-inf
threshold violation at index 11
threshold down violation
-inf
threshold violation at index 12
threshold down violation
-inf
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
threshold violation at index 25
t

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated



threshold up violation
inf
ok
ok
threshold violation at index 77
threshold down violation
-inf
-3116025.16767
-inf
-2592506.61704
threshold violation at index 78
threshold down violation
-inf
1646179.41654
-inf
-1256542.96522
threshold violation at index 79
threshold down violation
-inf
2886468.39588
-inf
6761190.74374
threshold violation at index 80
threshold down violation
-inf
2473979.85835
-inf
7723185.20849
threshold violation at index 81
threshold down violation
-inf
2899208.05934
-inf
6824502.22529
threshold violation at index 82
threshold down violation
-inf
2463871.20762
-inf
8023143.61999
threshold violation at index 83
threshold down violation
-inf
2703253.80119
-inf
7468962.17033
threshold violation at index 84
threshold down violation
-inf
2078136.15579
-inf
8453757.23059
threshold violation at index 85
threshold down violation
-inf
2389068.26517
-inf
7391480.08523
ok
ok
ok
ok
ok
threshold violation at index 91
threshold down violation
-inf
2417214.10306
-inf
903163.41241

threshold up violation
inf
4182802.61853
inf
ok
threshold violation at index 341
threshold down violation
-inf
2032550.20955
-inf
60518.9486131
threshold violation at index 342
threshold down violation
-inf
431181.094208
-inf
-3411112.65992
threshold violation at index 343
threshold down violation
-inf
-748345.968042
-inf
-4081070.62127
threshold violation at index 344
threshold down violation
-inf
35995.1361081
-inf
-3501805.03149
threshold violation at index 345
threshold down violation
-inf
1058443.96321
-inf
-2074882.58945
threshold violation at index 346
threshold down violation
-inf
2102160.18956
-inf
724412.574036
threshold violation at index 347
threshold down violation
-inf
2668574.59521
-inf
1495184.62872
threshold violation at index 349
threshold down violation
-inf
1693479.13033
-inf
489831.912475
threshold violation at index 350
threshold down violation
-inf
2230233.93885
-inf
972994.816206
threshold violation at index 351
threshold down violation
-inf
2195863.53412
-inf
1

In [182]:
advicedDF.head(10).style.set_properties(**pandas_dataframe_styles).format("{:0.0f}")

Unnamed: 0,avg latency (quantile 0.5),WorkerCount,denormalizedPredictedResponseTimeAdded0Worker,denormalizedPredictedResponseTimeAdded-1Worker,denormalizedPredictedResponseTimeAdded-2Worker,denormalizedPredictedResponseTimeAdded-3Worker,denormalizedPredictedResponseTimeAdded1Worker,denormalizedPredictedResponseTimeAdded2Worker,denormalizedPredictedResponseTimeAdded3Worker,denormalizedPredictedResponseTimeAdded4Worker,denormalizedPredictedResponseTimeAdded5Worker,advice,postScaledTargetVariable
1,735675,3,948173,1628245,-987294,395979,364691,-311752,-747770,-943432,-1003234,0,
2,280211,3,2301201,2479383,1475499,2189420,1593650,829066,310211,-15345,-234982,0,
3,723342,4,1687264,2662209,1958228,3954094,1340494,909551,548477,273628,49873,0,
5,661149,4,1651933,1816960,564853,730606,1031858,285678,-238014,-551642,-752051,0,
6,1369730,4,2022763,2014193,917154,1413715,1285570,516006,-50546,-423287,-693538,0,
7,2173550,4,2422063,2287538,1154880,2386310,1667920,822568,154208,-346422,-733542,0,
8,2497541,4,2398956,2351001,1310821,2091982,1654608,789904,127065,-351926,-712494,0,
9,2885125,4,3177110,2840428,2074440,3132990,2335331,1232270,349981,-311520,-829714,0,
10,2729371,4,3626870,3134213,2509933,4144110,3166150,2129674,1037521,92012,-663680,-1,3134213.0
11,2936877,4,3614206,3178340,2640742,4608287,3102914,2056891,964457,15735,-740566,-1,3178340.0


In [183]:
VisualizePredictedXYLine(advicedDF[['advice']] * 2000000, advicedDF[[targetVariable]], \
                         targetVariable, lowerLimit, upperLimit)

In [184]:
print('countInRange      = ', countInRange)
print('countViolatedDown = ', countViolatedDown)
print('countVilolatedUp  = ', countViolatedUp)

countInRange      =  190
countViolatedDown =  282
countVilolatedUp  =  187


In [185]:
VisualizePredictedXY2Line(advicedDF[[targetVariable]], advicedDF[['advice']], \
                         targetVariable, lowerLimit, upperLimit)

In [186]:
from visualizerlinux import VisualizePredictedXY3Line

In [187]:
VisualizePredictedXY3Line(advicedDF[[targetVariable]], \
                          advicedDF[['postScaledTargetVariable']], \
                          advicedDF[['advice']], \
                          targetVariable, lowerLimit, upperLimit)

In [188]:
advicedDF.style.set_properties(**pandas_dataframe_styles).format("{:0.2f}")

Unnamed: 0,avg latency (quantile 0.5),WorkerCount,denormalizedPredictedResponseTimeAdded0Worker,denormalizedPredictedResponseTimeAdded-1Worker,denormalizedPredictedResponseTimeAdded-2Worker,denormalizedPredictedResponseTimeAdded-3Worker,denormalizedPredictedResponseTimeAdded1Worker,denormalizedPredictedResponseTimeAdded2Worker,denormalizedPredictedResponseTimeAdded3Worker,denormalizedPredictedResponseTimeAdded4Worker,denormalizedPredictedResponseTimeAdded5Worker,advice,postScaledTargetVariable
1,735675.32,3.0,948173.02,1628245.06,-987294.02,395979.02,364691.35,-311751.78,-747769.85,-943432.16,-1003234.08,0.0,
2,280211.3,3.0,2301201.0,2479383.08,1475498.88,2189419.78,1593649.79,829065.77,310211.03,-15344.9,-234982.47,0.0,
3,723342.35,4.0,1687264.18,2662209.07,1958228.26,3954094.05,1340493.78,909550.7,548477.02,273627.8,49872.71,0.0,
5,661149.15,4.0,1651933.07,1816959.7,564852.58,730605.59,1031857.5,285678.27,-238013.66,-551641.98,-752051.1,0.0,
6,1369729.79,4.0,2022762.95,2014192.85,917154.36,1413714.9,1285569.94,516006.02,-50546.36,-423286.88,-693538.01,0.0,
7,2173549.57,4.0,2422063.48,2287537.66,1154879.73,2386310.23,1667920.48,822567.86,154208.03,-346422.15,-733542.06,0.0,
8,2497540.93,4.0,2398955.8,2351000.71,1310821.18,2091981.76,1654607.99,789903.67,127064.85,-351926.49,-712494.42,0.0,
9,2885125.24,4.0,3177109.95,2840427.62,2074440.39,3132990.03,2335330.85,1232270.09,349980.86,-311519.85,-829714.45,0.0,
10,2729371.14,4.0,3626869.6,3134212.96,2509932.64,4144110.12,3166149.56,2129674.05,1037520.92,92012.1,-663680.11,-1.0,3134212.96
11,2936876.79,4.0,3614206.28,3178340.26,2640742.02,4608286.9,3102913.62,2056891.37,964457.34,15735.14,-740566.27,-1.0,3178340.26


In [189]:
advicedDF.to_csv('outputs/adviceDF.csv', sep=';', encoding='utf-8')