In [1]:
##DESCRIPTION
# This notebook calculates the so called "Polygons" to describe how a system under test reacts to a set of performance tests.


In [2]:
#install.packages("RColorBrewer", repos='http://cran.us.r-project.org')
#install.packages("gridExtra")
#install.packages("getPass")
#install.packages("RPostgreSQL")

library("RColorBrewer")
library(ggplot2)
library(gridExtra)
library(getPass)
library(RPostgreSQL)
library(dplyr)

Loading required package: DBI


Attaching package: ‘dplyr’


The following object is masked from ‘package:gridExtra’:

    combine


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [3]:
db_connection <- DBI::dbConnect(dbDriver(drvName = "PostgreSQL"), dbname = Sys.getenv("DB_NAME"), host=Sys.getenv("HOST_NAME"), port="5432", user=Sys.getenv("POSTGRES_USERNAME"), password=Sys.getenv("POSTGRES_PASSWORD"))

In [4]:
# This query outputs the list of current projects in the db.
project_id = dbGetQuery(db_connection, "SELECT id::text FROM projects WHERE name='Sockshop'")$id

In [5]:
sql_operational_profile = "
    SELECT users, frequency FROM operational_profile_observations 
        WHERE operational_profile = (SELECT id FROM operational_profiles WHERE project = ?project)"
operational_profile <- dbGetQuery(db_connection, sqlInterpolate(db_connection, sql_operational_profile, project = project_id))

In [6]:
sql_all_data = "
    SELECT tests.id::text AS test_id, test_sets.id::text AS test_set_id, test_properties.value::numeric AS users, metrics.abbreviation AS metric, items.name AS item_name, results.value AS item_value
        FROM results 
        INNER JOIN tests ON results.test = tests.id
        INNER JOIN items ON results.item = items.id
        INNER JOIN test_properties ON (test_properties.test = tests.id AND test_properties.name = 'load')
        INNER JOIN metrics ON results.metric = metrics.id 
        INNER JOIN test_set_tests ON (test_set_tests.test = tests.id)
        INNER JOIN test_sets ON (test_sets.id = test_set_tests.test_set AND test_sets.project = tests.project)
        WHERE tests.project = ?project AND metrics.abbreviation IN ('art', 'sdrt', 'mix')"
all_data = dbGetQuery(db_connection, sqlInterpolate(db_connection, sql_all_data, project = project_id))

list_of_microservices = as.data.frame(unique(all_data[,5]))
no_of_microservices = nrow(list_of_microservices)

test_users_metric<-unique(all_data[,c(1:4)])

In [7]:
test_users_metric[list_of_microservices[,1]]<-NA

In [19]:
#If the tests occur too fast, it might be that some services have no data. This case is not handled, yet.

for (i in 1:nrow(test_users_metric)) {
    search_test_id <- test_users_metric[i,1]
    search_metric <- test_users_metric[i,4]
    
    for (j in 1:no_of_microservices) {
        search_microservice <- list_of_microservices[j,]
        
        row <- filter(all_data, test_id == search_test_id & metric == search_metric & item_name == search_microservice)
        
        if (dim(row)[1] > 0) {
            found_value = row$item_value
            
            if (length(found_value) == 1) {
                test_users_metric[i,j+4] <- found_value
            }
        }
    }
}
raw_data <- test_users_metric
raw_data


Unnamed: 0_level_0,test_id,test_set_id,users,metric,get_cart,get_catalogue1,get_index,add_item_to_cart,get_catalogue2,get_catalogue3,⋯,get_customer_orders,get_detail,get_item,get_orders,get_related,get_tags,login,get_address,get_basket,get_card
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,dea746ad-bd81-491b-96f6-f01e21da77a4,e28f82f0-6354-43d4-8201-77fdeb296a47,2,art,5052.0193805,1734.1451773,20.4331530,,,,⋯,,,,,,,,,,
2,dea746ad-bd81-491b-96f6-f01e21da77a4,e28f82f0-6354-43d4-8201-77fdeb296a47,2,sdrt,0.0000000,1890.3703704,24.4444444,,,,⋯,,,,,,,,,,
3,dea746ad-bd81-491b-96f6-f01e21da77a4,e28f82f0-6354-43d4-8201-77fdeb296a47,2,mix,0.2500000,0.3750000,0.3750000,,,,⋯,,,,,,,,,,
4,01106da7-f28f-44a1-8c80-e352991d3de4,9750e08c-428b-40f2-9d36-e382eef84002,50,art,386.8301784,556.0919042,282.1821439,683.64814129,4.339986e+02,360.25526731,⋯,2.030548e+02,170.29662710,318.06286258,402.30327050,279.69824609,3.889557e+02,7.532411e+02,,,
5,01106da7-f28f-44a1-8c80-e352991d3de4,9750e08c-428b-40f2-9d36-e382eef84002,50,sdrt,118.5185185,111.1111111,155.5555556,22.22222222,1.333333e+02,125.92592593,⋯,2.222222e+01,118.51851852,29.62962963,14.81481481,44.44444444,1.185185e+02,4.518519e+02,,,
6,01106da7-f28f-44a1-8c80-e352991d3de4,9750e08c-428b-40f2-9d36-e382eef84002,50,mix,0.2392344,0.1578947,0.1607656,0.01339713,3.349282e-02,0.03349282,⋯,2.488038e-02,0.04688995,0.04976077,0.01531100,0.03349282,3.349282e-02,4.306220e-02,,,
7,6c8d71e3-ac11-41bf-bf33-d34c781363e1,9750e08c-428b-40f2-9d36-e382eef84002,100,art,784.9349789,1124.8764631,567.8043484,,8.712128e+02,796.40384952,⋯,4.919069e+02,,,655.57122930,,7.525460e+02,1.441257e+03,,,
8,6c8d71e3-ac11-41bf-bf33-d34c781363e1,9750e08c-428b-40f2-9d36-e382eef84002,100,sdrt,222.2222222,370.3703704,155.5555556,,7.407407e+01,14.81481481,⋯,8.148148e+01,,,51.85185185,,8.148148e+01,7.407407e+01,,,
9,6c8d71e3-ac11-41bf-bf33-d34c781363e1,9750e08c-428b-40f2-9d36-e382eef84002,100,mix,0.2621083,0.2184236,0.2184236,,3.893637e-02,0.03798670,⋯,2.849003e-02,,,0.02849003,,3.798670e-02,5.698006e-02,,,
10,46a2ac83-0844-4617-8c85-7688d9556e44,9750e08c-428b-40f2-9d36-e382eef84002,150,art,1217.4239596,1869.4807542,834.5343658,,1.234161e+03,,⋯,6.016308e+02,,,,,,1.962654e+03,,,


In [9]:
tests <- unique(raw_data[,1:3])

max_no_of_users <- max(raw_data[,3])
min_no_of_users <- min(raw_data[,3])

user_load <- operational_profile[,1]
access_count <- operational_profile[,2]
max_no_of_requests <- max(user_load)
scale_factor <- max_no_of_users/max_no_of_requests
scaled_user_load <- floor(scale_factor * user_load)

In [16]:
##Create aggregate values (by fifty) of the user frequency from "operational_profile" 

calculate_aggregated_values <- function() {    
    access_frequency <- access_count/sum(access_count)
    by_fifty <- which(scaled_user_load %% 50 == 0)
    no_of_aggregated_rows = length(by_fifty)

    binProb <- c()
    for (i in 1:no_of_aggregated_rows) {
        if (i==1) {
            binProb[i] <- sum(access_frequency[1:by_fifty[i]])
        } else {
            binProb[i] <- sum(access_frequency[(by_fifty[i-1]+1):by_fifty[i]])
        }
    }

    matrix(c(scaled_user_load[by_fifty], binProb), ncol=2, nrow=no_of_aggregated_rows, dimnames=list(c(1:no_of_aggregated_rows), c("Workload (number of users)", "Domain metric per workload")))
}

aggregated_values_from_operational_profile <- calculate_aggregated_values()
aggregated_values_from_operational_profile

Unnamed: 0,Workload (number of users),Domain metric per workload
1,50,0.10582011
2,100,0.18518519
3,150,0.22222222
4,200,0.22222222
5,300,0.16137566
6,250,0.04497354


ERROR: Error in eval(expr, envir, enclos): object 'data_of_min_user' not found


In [18]:
#Define the threshold for each service. The threshold is a vector computed as avg+3*SD for the configuration with 
#Users=2, Memory=4, CPU=1, CartReplica=1   

data_of_min_user<-raw_data[raw_data$users==min_no_of_users,]
test_of_min_user<-tests[tests$users==min_no_of_users,]

avg <-data_of_min_user[data_of_min_user$metric=="art",][,-c(1:4)]
sd <- data_of_min_user[data_of_min_user$metric=="sdrt",][,-c(1:4)]
threshold<-data.frame(test_of_min_user,avg+3*sd)

#Check the first line of the dataframe thereshold: it must be one line
#head(threshold)
data_of_min_user

Unnamed: 0_level_0,test_id,test_set_id,users,metric,get_cart,get_catalogue1,get_index,add_item_to_cart,get_catalogue2,get_catalogue3,⋯,get_customer_orders,get_detail,get_item,get_orders,get_related,get_tags,login,get_address,get_basket,get_card
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,dea746ad-bd81-491b-96f6-f01e21da77a4,e28f82f0-6354-43d4-8201-77fdeb296a47,2,art,5052.019,1734.145,20.43315,,,,⋯,,,,,,,,,,
2,dea746ad-bd81-491b-96f6-f01e21da77a4,e28f82f0-6354-43d4-8201-77fdeb296a47,2,sdrt,0.0,1890.37,24.44444,,,,⋯,,,,,,,,,,
3,dea746ad-bd81-491b-96f6-f01e21da77a4,e28f82f0-6354-43d4-8201-77fdeb296a47,2,mix,0.25,0.375,0.375,,,,⋯,,,,,,,,,,


In [15]:
#Exclude case with user = 2 from dataFile and check whether each service passes or fail: avg<threshol (Pass). 
#Compute the relative mass for each configuration

tests_without_benchmark<-tests[!tests$users==min_no_of_users,]
raw_data_without_benchmark<-raw_data[!raw_data$users==min_no_of_users,]

avg<-raw_data_without_benchmark[raw_data_without_benchmark$metric=="art",-4]
sd<-raw_data_without_benchmark[raw_data_without_benchmark$metric=="sdrt",-4]
mix<-raw_data_without_benchmark[raw_data_without_benchmark$metric=="mix",-4]

#Check pass/fail for each service. the "mix" value is 0 if fail and mixTemp if pass. Compute the relative mass for each configuration
pass_criteria<-avg

calculate_relative_mass <- function() {    
    relative_mass<-c()

    mix_of_passing_tests<-as.data.frame(matrix(nrow=nrow(tests_without_benchmark), ncol=ncol(raw_data_without_benchmark)-1))

    for(j in 1:nrow(pass_criteria)){
        mix_of_passing_tests[j,]<-mix[j,]
        for(i in 3:(2+no_of_microservices)){
            if(pass_criteria[j,i]>threshold[i]){
                mix_of_passing_tests[j,i]<-0
            }
        }
        relative_mass[j]<-sum(mix_of_passing_tests[j,3:(2+no_of_microservices)])
    }
   
    relative_mass
}

relative_mass <- calculate_relative_mass()

#Show first lines of passCriteria
head(pass_criteria)

[1] 50
  users
1     2
[1] 386.8302
  get_cart
1 5052.019
[1] 556.0919
  get_catalogue1
1       7405.256
[1] 282.1821
  get_index
1  93.76649
[1] 683.6481
  add_item_to_cart
1               NA


ERROR: Error in if (pass_criteria[j, i] > threshold[i]) {: missing value where TRUE/FALSE needed


In [8]:
#Compute the domain metric for each configuration
tests_without_benchmark$relative_mass<-relative_mass

absolute_mass<-c()
for(j in 1:nrow(tests_without_benchmark)) {
    absolute_mass[j]<-tests_without_benchmark[j,"relative_mass"]*aggregated_values_from_operational_profile[match(tests_without_benchmark[j,"users"], aggregated_values_from_operational_profile[,1]),2]
}
tests_without_benchmark$absolute_mass<-absolute_mass

test_sets<-as.data.frame(unique(all_data[,2]))
colnames(test_sets)[1] <- "test_set_id"

set<-list()
domain_metric_list<-list()
for(i in 1:nrow(test_sets)){
    set[[i]]<-tests_without_benchmark[which(tests_without_benchmark[,2] == test_sets[i,1]),]
    domain_metric_list[[i]]<-set[[i]][,c(3,5)][order(set[[i]][,c(3,5)][,1]),]
}

#Uncomment this to show first lines of domain_metric_list
#head(domain_metric_list)
domain_metric_list

Unnamed: 0_level_0,users,absolute_mass
Unnamed: 0_level_1,<dbl>,<dbl>
1540,50,0.10118519
286,100,0.1772037
1,150,0.1082
2053,200,0.04051111
115,250,0.03723704
343,300,0.01108624

Unnamed: 0_level_0,users,absolute_mass
Unnamed: 0_level_1,<dbl>,<dbl>
1483,50,0.10128042
3592,100,0.17716667
571,150,0.07744444
3649,200,0.15666667
3535,250,0.03721667
58,300,0.03414709

Unnamed: 0_level_0,users,absolute_mass
Unnamed: 0_level_1,<dbl>,<dbl>
172,50,0.10121693
2509,100,0.17716667
1027,150,0.10811111
799,200,0.04057778
2794,250,0.03717593
3136,300,0.01109841

Unnamed: 0_level_0,users,absolute_mass
Unnamed: 0_level_1,<dbl>,<dbl>
970,50,0.10122751
514,100,0.13046296
2452,150,0.15664444
628,200,0.15651111
1939,250,0.14361111
229,300,0.01112275

Unnamed: 0_level_0,users,absolute_mass
Unnamed: 0_level_1,<dbl>,<dbl>
2338,50,0.10121693
400,100,0.17718519
3478,150,0.10548889
3193,200,0.04057778
685,250,0.0371963
2110,300,0.01033783

Unnamed: 0_level_0,users,absolute_mass
Unnamed: 0_level_1,<dbl>,<dbl>
457,50,0.101227513
3421,100,0.177111111
2224,150,0.133466667
2908,200,0.040555556
1996,250,0.037216667
1312,300,0.009285185

Unnamed: 0_level_0,users,absolute_mass
Unnamed: 0_level_1,<dbl>,<dbl>
2851,50,0.10122751
1141,100,0.17711111
742,150,0.10826667
2281,200,0.0406
3820,250,0.03721667
1426,300,0.01108624

Unnamed: 0_level_0,users,absolute_mass
Unnamed: 0_level_1,<dbl>,<dbl>
913,50,0.10121693
1084,100,0.17709259
856,150,0.1276
2395,200,0.04064444
2737,250,0.14391667
1369,300,0.03154286

Unnamed: 0_level_0,users,absolute_mass
Unnamed: 0_level_1,<dbl>,<dbl>
3079,50,0.1012381
3706,100,0.17714815
4105,150,0.13335556
1198,200,0.04062222
3022,250,0.0371963
2680,300,0.01111667

Unnamed: 0_level_0,users,absolute_mass
Unnamed: 0_level_1,<dbl>,<dbl>
3364,50,0.10119577
1882,100,0.17712963
2167,150,0.08702222
1255,200,0.04053333
3934,250,0.14357037
2566,300,0.03336825

Unnamed: 0_level_0,users,absolute_mass
Unnamed: 0_level_1,<dbl>,<dbl>
2623,50,0.10120635
1654,100,0.17711111
1597,150,0.10817778
2965,200,0.04068889
1711,250,0.03711481
1825,300,0.01111667

Unnamed: 0_level_0,users,absolute_mass
Unnamed: 0_level_1,<dbl>,<dbl>
1768,50,0.10121693
3307,100,0.13048148
3250,150,0.13331111
4048,200,0.04048889
3991,250,0.03715556
3763,300,0.01030741

users,absolute_mass
<dbl>,<dbl>


In [9]:
#Compute Cumulative Domain metric: summing up absoluteMass over users for each configuration
test_sets$domain_metric<-0
for(i in 1:nrow(test_sets)){
    test_sets[i,2]<-round(sum(tests_without_benchmark[which(tests_without_benchmark[,2] == test_sets[i,1]),"absolute_mass"]),4)
}
domain_metric<-test_sets

domain_metric

test_set_id,domain_metric
<chr>,<dbl>
ed62e924-cb3c-406b-8e89-1009eedc5b1c,0.4754
1d87c9cb-58e4-46ba-9b7f-1511ce637f52,0.5839
567f4fa3-44e0-4618-9940-2c2913b2bed5,0.4753
6492264c-060c-4933-93e8-0004ec63294a,0.6996
4d86eda1-c7d7-41f7-84d1-c8f3f363b9d2,0.472
be1ce813-c312-40ae-ae89-ca94d9b79567,0.4989
f76ddf6c-0950-42ba-821a-918a002cf3ff,0.4755
6275444c-6031-4164-ba33-f934d70ceb87,0.622
3074578d-738d-46ba-bb94-e83332a27abf,0.5007
dcd7dc15-32fe-4730-9fe4-ca54900fe0ac,0.5828


In [24]:
#Plot operational_profile against domain metric for each configuration

plot(aggregated_values_from_operational_profile, xlim=c(50, max_no_of_users), ylim=c(0, 0.3),cex.lab=1.3)
polygon(c(50,aggregated_values_from_operational_profile[,1],max_no_of_users),c(0,aggregated_values_from_operational_profile[,2],0), col="brown", lty = 1, lwd = 2, border = "black")
color=heat.colors(11)
color_transparent <- adjustcolor(color, alpha.f = 0.2) 

sorted_domain_metric<-domain_metric
k<-which(sorted_domain_metric[,2]==max(sorted_domain_metric[,2]))
#Green line whithin the polygon is the best domain matric line. 
#It corresponds to the second line in the final table below

for(i in 1:nrow(test_sets)) {
    lines(domain_metric_list[[i]], type="l", col=heat.colors(11)[i])    
    lines(domain_metric_list[[k]], type="l", col="green")
    polygon(c(50,t(domain_metric_list[[i]][1]),max_no_of_users),c(0,t(domain_metric_list[[i]][2]),0), col=color_transparent[i], lty = 1, lwd = 1 , border = rainbow(11)[i])
}

text(aggregated_values_from_operational_profile,labels = round(aggregated_values_from_operational_profile[,2],3), pos=3, col="black")

graphics.off()

In [11]:
DBI::dbDisconnect(db_connection)