In [24]:
!pip install plotly==5.6.0



In [27]:
import pandas as pd
pd.set_option('display.max_rows', None)

import os
from io import StringIO
import requests as r
from requests.auth import HTTPBasicAuth
from datetime import datetime

In [28]:
###########
# Constants
###########

JENKINS_URL = 'http://jenkins:8080'
JENKINS_USER = os.getenv('JENKINS_USER')
JENKINS_PASS = os.getenv('JENKINS_PASS')
JENKINS_JOB_NAME = 'kubedepend'

In [29]:
custom_date_parser = lambda x: datetime.strptime(x, "%m-%d-%Y_%H-%M-%S.%f")

In [30]:
#########
# Get results from Jenkins and organize them into one DataFrame
#########
session = r.Session()
session.auth = (JENKINS_USER, JENKINS_PASS)

res = session.get(f'{JENKINS_URL}/job/{JENKINS_JOB_NAME}/api/json')
data = res.json()
builds = data['builds']

results = pd.DataFrame()
build_parameters = pd.DataFrame()

for build_summary in builds:
    build_number = build_summary['number']
    res = session.get(f'{JENKINS_URL}/job/{JENKINS_JOB_NAME}/{build_number}/api/json')
    build = res.json()
    artifact_list = build['artifacts']
    if build['result'] == 'SUCCESS' and artifact_list:
        for artifact in artifact_list:
            # Get builds, where there is a 'results.csv' artifact
            if artifact['fileName'] == 'results.csv':
                # get results
                res = session.get(f'{JENKINS_URL}/job/{JENKINS_JOB_NAME}/{build_number}/artifact/{artifact["relativePath"]}')
                meas_results_df = pd.read_csv(StringIO(res.text), delimiter=',', parse_dates=['measurement_seq_start_time', 'measurement_start_time', 'measurement_end_time'], date_parser=custom_date_parser)
                results = results.append(meas_results_df, ignore_index=True)
                
                # meas_results_df['id'] is a Series
                meas_seq_id = meas_results_df['id'][0]
            
                # extract parameter info
                parameterItems = list(filter(lambda a: a and a['_class'] == 'hudson.model.ParametersAction' ,build['actions']))[0]['parameters']
                parameters = list(filter(lambda p: p['_class'] in ['hudson.model.BooleanParameterValue', 'hudson.model.StringParameterValue'], parameterItems))
                reshaped = dict()
                for parameter in parameters:
                    reshaped[parameter['name']] = parameter['value']
                # add build number
                reshaped['build_number'] = build_number
                reshaped['id'] = meas_seq_id
                build_parameters = build_parameters.append(reshaped, ignore_index=True)
                

In [31]:
# Clean build_parameters
build_parameters = build_parameters.fillna(False)
build_parameters['CleanEnhancements'] = build_parameters['CleanEnhancements'].astype(bool)
build_parameters['DeleteCFStack'] = build_parameters['DeleteCFStack'].astype(bool)
build_parameters['CleanWorkSpace'] = build_parameters['CleanWorkSpace'].astype(bool)
build_parameters['DeployTools'] = build_parameters['DeployTools'].astype(bool)
build_parameters['UseKafka'] = build_parameters['UseKafka'].astype(bool)
build_parameters['UseHeartbeats'] = build_parameters['UseHeartbeats'].astype(bool)
build_parameters['build_number'] = build_parameters['build_number'].astype(int)

build_parameters.insert(0, 'build_number', build_parameters.pop('build_number'))
build_parameters.insert(0, 'id', build_parameters.pop('id'))


build_parameters.head()
# build_parameters.info()

Unnamed: 0,id,build_number,CleanEnhancements,DeleteCFStack,CleanWorkSpace,CFStackName,EKSClusterName,AWSRegion,S3Bucket,DeployTools,...,MEASFaultProfile,MEASMinMeasurementCount,MEASMaxMeasurementCount,MEASTargetStd,MEASLoadDuration,MEASClusterType,MEASLocustUserCount,MEASLocustSpawnRate,MEASComment,MEASMeasurementCount
0,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,390,False,False,True,morova-eks-cluster,morova-eks-cluster,us-east-2,morova-bucket,False,...,none,5,15,0.1,300,eks,1,1,final,False
1,f50ee8f9-c3bf-4c87-b40a-923fcccec19e,389,False,False,True,morova-eks-cluster,morova-eks-cluster,us-east-2,morova-bucket,True,...,none,5,15,0.1,300,eks,1,1,final,False
2,42e912b3-5cc4-47bc-a518-672fd0c0d01f,387,False,False,True,morova-eks-cluster,morova-eks-cluster,us-east-2,morova-bucket,True,...,none,5,15,0.1,300,eks,1,1,final,False
3,99471c0f-1c6b-4891-a205-e398534553bc,385,False,False,True,morova-eks-cluster,morova-eks-cluster,us-east-2,morova-bucket,False,...,pod-failure,5,15,0.1,300,eks,1,1,final,False
4,67e0b885-8e9f-46b6-bab3-28289e78551b,384,False,False,True,morova-eks-cluster,morova-eks-cluster,us-east-2,morova-bucket,False,...,network-partition,5,15,0.1,300,eks,1,1,final,False


In [32]:
# Select relevant columns
build_parameters = build_parameters.loc[:,['id', 'build_number', 'UseKafka', 'UseHeartbeats']].reset_index(drop=True)
build_parameters.head()

Unnamed: 0,id,build_number,UseKafka,UseHeartbeats
0,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,390,False,False
1,f50ee8f9-c3bf-4c87-b40a-923fcccec19e,389,False,False
2,42e912b3-5cc4-47bc-a518-672fd0c0d01f,387,False,False
3,99471c0f-1c6b-4891-a205-e398534553bc,385,False,False
4,67e0b885-8e9f-46b6-bab3-28289e78551b,384,False,False


In [33]:
# for development, get only base measurements
results = results[results['comment'] == 'final']
# results.head()
results

Unnamed: 0,id,measurement_seq_start_time,availability,mut,mdt,mtbf,measurement_start_time,measurement_end_time,submitted_jobs,finished_jobs,fault_profile,cluster_type,measurement_count,load_duration,locust_user_count,locust_spawn_rate,prev_stack_git_commit_short,comment
0,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,2022-03-29 15:05:56.856168,1.0,1.0,0.0,1.0,2022-03-29 15:06:07.346348,2022-03-29 15:11:15.576554,60.0,60.0,none,eks,1,300,1,1,52988f3,final
1,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,2022-03-29 15:05:56.856168,1.0,1.0,0.0,1.0,2022-03-29 15:12:40.396560,2022-03-29 15:17:47.086874,60.0,60.0,none,eks,2,300,1,1,52988f3,final
2,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,2022-03-29 15:05:56.856168,1.0,1.0,0.0,1.0,2022-03-29 15:19:12.551118,2022-03-29 15:24:19.314107,60.0,60.0,none,eks,3,300,1,1,52988f3,final
3,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,2022-03-29 15:05:56.856168,1.0,1.0,0.0,1.0,2022-03-29 15:25:42.268273,2022-03-29 15:30:49.067060,60.0,60.0,none,eks,4,300,1,1,52988f3,final
4,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,2022-03-29 15:05:56.856168,1.0,1.0,0.0,1.0,2022-03-29 15:32:13.372686,2022-03-29 15:37:19.605380,60.0,60.0,none,eks,5,300,1,1,52988f3,final
5,f50ee8f9-c3bf-4c87-b40a-923fcccec19e,2022-03-29 14:24:48.691597,1.0,1.0,0.0,1.0,2022-03-29 14:24:58.837909,2022-03-29 14:30:05.648062,60.0,60.0,none,eks,1,300,1,1,52988f3,final
6,f50ee8f9-c3bf-4c87-b40a-923fcccec19e,2022-03-29 14:24:48.691597,1.0,1.0,0.0,1.0,2022-03-29 14:30:59.497451,2022-03-29 14:36:06.199944,60.0,60.0,none,eks,2,300,1,1,52988f3,final
7,f50ee8f9-c3bf-4c87-b40a-923fcccec19e,2022-03-29 14:24:48.691597,0.95,0.95,0.05,1.0,2022-03-29 14:38:04.645671,2022-03-29 14:43:12.043132,60.0,60.0,none,eks,3,300,1,1,52988f3,final
8,f50ee8f9-c3bf-4c87-b40a-923fcccec19e,2022-03-29 14:24:48.691597,1.0,1.0,0.0,1.0,2022-03-29 14:44:37.344645,2022-03-29 14:49:45.276164,60.0,60.0,none,eks,4,300,1,1,52988f3,final
9,f50ee8f9-c3bf-4c87-b40a-923fcccec19e,2022-03-29 14:24:48.691597,1.0,1.0,0.0,1.0,2022-03-29 14:51:08.916967,2022-03-29 14:56:16.613282,60.0,60.0,none,eks,5,300,1,1,52988f3,final


In [34]:
# Set meas count to the maximum of the measuremenet count in the same measurement sequence

# select columns
results_meas_count = results.loc[:, ['id', 'measurement_count']]
# group by max value
results_meas_max_count = results_meas_count.groupby(by=['id']).max()
# rename
results_meas_max_count = results_meas_max_count.rename(columns={'measurement_count': 'measurement_count_max'})
# merge with results
results = results.merge(results_meas_max_count, on='id').drop(columns=['measurement_count']).rename(columns={'measurement_count_max': 'measurement_count'})
results

Unnamed: 0,id,measurement_seq_start_time,availability,mut,mdt,mtbf,measurement_start_time,measurement_end_time,submitted_jobs,finished_jobs,fault_profile,cluster_type,load_duration,locust_user_count,locust_spawn_rate,prev_stack_git_commit_short,comment,measurement_count
0,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,2022-03-29 15:05:56.856168,1.0,1.0,0.0,1.0,2022-03-29 15:06:07.346348,2022-03-29 15:11:15.576554,60.0,60.0,none,eks,300,1,1,52988f3,final,5
1,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,2022-03-29 15:05:56.856168,1.0,1.0,0.0,1.0,2022-03-29 15:12:40.396560,2022-03-29 15:17:47.086874,60.0,60.0,none,eks,300,1,1,52988f3,final,5
2,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,2022-03-29 15:05:56.856168,1.0,1.0,0.0,1.0,2022-03-29 15:19:12.551118,2022-03-29 15:24:19.314107,60.0,60.0,none,eks,300,1,1,52988f3,final,5
3,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,2022-03-29 15:05:56.856168,1.0,1.0,0.0,1.0,2022-03-29 15:25:42.268273,2022-03-29 15:30:49.067060,60.0,60.0,none,eks,300,1,1,52988f3,final,5
4,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,2022-03-29 15:05:56.856168,1.0,1.0,0.0,1.0,2022-03-29 15:32:13.372686,2022-03-29 15:37:19.605380,60.0,60.0,none,eks,300,1,1,52988f3,final,5
5,f50ee8f9-c3bf-4c87-b40a-923fcccec19e,2022-03-29 14:24:48.691597,1.0,1.0,0.0,1.0,2022-03-29 14:24:58.837909,2022-03-29 14:30:05.648062,60.0,60.0,none,eks,300,1,1,52988f3,final,5
6,f50ee8f9-c3bf-4c87-b40a-923fcccec19e,2022-03-29 14:24:48.691597,1.0,1.0,0.0,1.0,2022-03-29 14:30:59.497451,2022-03-29 14:36:06.199944,60.0,60.0,none,eks,300,1,1,52988f3,final,5
7,f50ee8f9-c3bf-4c87-b40a-923fcccec19e,2022-03-29 14:24:48.691597,0.95,0.95,0.05,1.0,2022-03-29 14:38:04.645671,2022-03-29 14:43:12.043132,60.0,60.0,none,eks,300,1,1,52988f3,final,5
8,f50ee8f9-c3bf-4c87-b40a-923fcccec19e,2022-03-29 14:24:48.691597,1.0,1.0,0.0,1.0,2022-03-29 14:44:37.344645,2022-03-29 14:49:45.276164,60.0,60.0,none,eks,300,1,1,52988f3,final,5
9,f50ee8f9-c3bf-4c87-b40a-923fcccec19e,2022-03-29 14:24:48.691597,1.0,1.0,0.0,1.0,2022-03-29 14:51:08.916967,2022-03-29 14:56:16.613282,60.0,60.0,none,eks,300,1,1,52988f3,final,5


In [11]:
#  group by measurement ID and get mean of numerical properties
results_avg_by_id = results.groupby(by=['id']).mean()
# results_avg_by_id.head()
results_avg_by_id

Unnamed: 0_level_0,availability,mut,mdt,mtbf,submitted_jobs,finished_jobs,load_duration,locust_user_count,locust_spawn_rate,measurement_count
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
03631a77-d080-44bf-a582-718e4c39084d,0.325439,0.22963,,,44.777778,44.444444,300.0,1.0,1.0,9.0
42e912b3-5cc4-47bc-a518-672fd0c0d01f,0.98,0.98,0.02,1.0,60.0,60.0,300.0,1.0,1.0,5.0
4991a6f2-16d1-4219-8ca5-a3fee9cb2631,0.445175,0.252257,,,48.416667,48.041667,300.0,1.0,1.0,24.0
67e0b885-8e9f-46b6-bab3-28289e78551b,0.86,0.495,0.095,0.59,54.8,53.4,300.0,1.0,1.0,5.0
8ab0e4f9-6224-4a65-97b1-196b787b0dc4,1.0,1.0,0.0,1.0,60.0,60.0,300.0,1.0,1.0,5.0
95e538ba-6c93-464a-b62b-81d0bb41f903,0.533806,0.325962,0.357692,0.683654,47.846154,47.384615,300.0,1.0,1.0,13.0
99471c0f-1c6b-4891-a205-e398534553bc,0.666667,0.432778,0.188889,0.621667,49.0,39.8,300.0,1.0,1.0,15.0
b5e011c2-9408-454b-b18d-8234d09b630a,0.287368,0.135833,0.573333,0.709167,0.0,0.0,300.0,1.0,1.0,10.0
bc4d2a5d-d896-4ecd-8e24-acfef42c2b3f,0.463534,0.217857,0.356548,0.574405,47.857143,47.571429,300.0,1.0,1.0,14.0
cd3a38d1-4acd-4594-9492-303f32e60b29,0.544912,0.208889,0.201111,0.41,37.666667,37.666667,300.0,1.0,1.0,5.0


In [12]:
# get non-numerical data for each measurement sequence
results_non_numerical = results.loc[:, ['id', 'fault_profile', 'measurement_seq_start_time', 'cluster_type', 'comment']] \
    .drop_duplicates() \
    .reset_index(drop=True)
# results_non_numerical.head()
results_non_numerical

Unnamed: 0,id,fault_profile,measurement_seq_start_time,cluster_type,comment
0,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,none,2022-03-29 15:05:56.856168,eks,final
1,f50ee8f9-c3bf-4c87-b40a-923fcccec19e,none,2022-03-29 14:24:48.691597,eks,final
2,42e912b3-5cc4-47bc-a518-672fd0c0d01f,none,2022-03-29 12:24:31.624449,eks,final
3,99471c0f-1c6b-4891-a205-e398534553bc,pod-failure,2022-03-28 11:28:43.573199,eks,final
4,67e0b885-8e9f-46b6-bab3-28289e78551b,network-partition,2022-03-28 10:34:52.759123,eks,final
5,b5e011c2-9408-454b-b18d-8234d09b630a,network-delay,2022-03-26 21:19:32.241189,eks,final
6,bc4d2a5d-d896-4ecd-8e24-acfef42c2b3f,network-delay,2022-03-26 22:23:15.709058,eks,final
7,03631a77-d080-44bf-a582-718e4c39084d,network-delay,2022-03-27 07:25:43.904569,eks,final
8,4991a6f2-16d1-4219-8ca5-a3fee9cb2631,network-delay,2022-03-27 10:02:49.338243,eks,final
9,95e538ba-6c93-464a-b62b-81d0bb41f903,network-delay,2022-03-27 14:28:16.215479,eks,final


In [13]:
# merge the grouped by avg results with non-numerical data
results = pd.merge(results_avg_by_id, results_non_numerical, on='id')

# merge the results with build parameters
results = pd.merge(results, build_parameters, on='id')

# filter out measurement sequence with less than MIN_MEASUREMENT_COUNT measurements
# results = results[results['measurement_count'] >= MIN_MEASUREMENT_COUNT]

# results.head()
results

Unnamed: 0,id,availability,mut,mdt,mtbf,submitted_jobs,finished_jobs,load_duration,locust_user_count,locust_spawn_rate,measurement_count,fault_profile,measurement_seq_start_time,cluster_type,comment,build_number,UseKafka,UseHeartbeats
0,42e912b3-5cc4-47bc-a518-672fd0c0d01f,0.98,0.98,0.02,1.0,60.0,60.0,300.0,1.0,1.0,5.0,none,2022-03-29 12:24:31.624449,eks,final,387,False,False
1,67e0b885-8e9f-46b6-bab3-28289e78551b,0.86,0.495,0.095,0.59,54.8,53.4,300.0,1.0,1.0,5.0,network-partition,2022-03-28 10:34:52.759123,eks,final,384,False,False
2,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,1.0,1.0,0.0,1.0,60.0,60.0,300.0,1.0,1.0,5.0,none,2022-03-29 15:05:56.856168,eks,final,390,False,False
3,99471c0f-1c6b-4891-a205-e398534553bc,0.666667,0.432778,0.188889,0.621667,49.0,39.8,300.0,1.0,1.0,15.0,pod-failure,2022-03-28 11:28:43.573199,eks,final,385,False,False
4,b5e011c2-9408-454b-b18d-8234d09b630a,0.287368,0.135833,0.573333,0.709167,0.0,0.0,300.0,1.0,1.0,10.0,network-delay,2022-03-26 21:19:32.241189,eks,final,383,False,False
5,cd3a38d1-4acd-4594-9492-303f32e60b29,0.544912,0.208889,0.201111,0.41,37.666667,37.666667,300.0,1.0,1.0,5.0,io,2022-03-23 20:12:07.239328,eks,final,357,False,False
6,f50ee8f9-c3bf-4c87-b40a-923fcccec19e,0.99,0.99,0.01,1.0,60.0,60.0,300.0,1.0,1.0,5.0,none,2022-03-29 14:24:48.691597,eks,final,389,False,False
7,fbba40eb-75ef-4bd5-82ca-6b5015974e13,0.983333,0.983333,0.016667,1.0,107.2,106.8,300.0,1.0,1.0,5.0,none,2022-03-23 13:31:51.563663,eks,final,352,False,False


In [14]:
# drop rows where there are NaN data
results = results.dropna()

# drop rows where mtbf metrics have > 1 value
results = results[results['mtbf'] <= 1]

# drop rows where comment is 'test'
resutls = results[results['comment'] != 'test']

# group by fault profile and only keep the newest measurement
time_column = 'measurement_seq_start_time'
latest_idx = results.groupby(['fault_profile'])[time_column].transform('max') == results[time_column]

results = results[latest_idx]
results

Unnamed: 0,id,availability,mut,mdt,mtbf,submitted_jobs,finished_jobs,load_duration,locust_user_count,locust_spawn_rate,measurement_count,fault_profile,measurement_seq_start_time,cluster_type,comment,build_number,UseKafka,UseHeartbeats
1,67e0b885-8e9f-46b6-bab3-28289e78551b,0.86,0.495,0.095,0.59,54.8,53.4,300.0,1.0,1.0,5.0,network-partition,2022-03-28 10:34:52.759123,eks,final,384,False,False
2,8ab0e4f9-6224-4a65-97b1-196b787b0dc4,1.0,1.0,0.0,1.0,60.0,60.0,300.0,1.0,1.0,5.0,none,2022-03-29 15:05:56.856168,eks,final,390,False,False
3,99471c0f-1c6b-4891-a205-e398534553bc,0.666667,0.432778,0.188889,0.621667,49.0,39.8,300.0,1.0,1.0,15.0,pod-failure,2022-03-28 11:28:43.573199,eks,final,385,False,False
4,b5e011c2-9408-454b-b18d-8234d09b630a,0.287368,0.135833,0.573333,0.709167,0.0,0.0,300.0,1.0,1.0,10.0,network-delay,2022-03-26 21:19:32.241189,eks,final,383,False,False
5,cd3a38d1-4acd-4594-9492-303f32e60b29,0.544912,0.208889,0.201111,0.41,37.666667,37.666667,300.0,1.0,1.0,5.0,io,2022-03-23 20:12:07.239328,eks,final,357,False,False


In [23]:
# Save results
results.to_csv('data/results_cleaned.csv')