In [200]:
!pip install plotly==5.6.0



In [201]:
import plotly.express as px
import pandas as pd
pd.options.plotting.backend = "plotly"

import os
from io import StringIO
import requests as r
from requests.auth import HTTPBasicAuth
from datetime import datetime

In [202]:
###########
# Constants
###########

JENKINS_URL = 'http://jenkins:8080'
JENKINS_USER = os.getenv('JENKINS_USER')
JENKINS_PASS = os.getenv('JENKINS_PASS')
JENKINS_JOB_NAME = 'kubedepend'

MIN_MEASUREMENT_COUNT = 5

In [203]:
custom_date_parser = lambda x: datetime.strptime(x, "%m-%d-%Y_%H-%M-%S.%f")

In [204]:
#########
# Get results from Jenkins and organize them into one DataFrame
#########
session = r.Session()
session.auth = (JENKINS_USER, JENKINS_PASS)

res = session.get(f'{JENKINS_URL}/job/{JENKINS_JOB_NAME}/api/json')
data = res.json()
builds = data['builds']

results = pd.DataFrame()
build_parameters = pd.DataFrame()

for build_summary in builds:
    build_number = build_summary['number']
    res = session.get(f'{JENKINS_URL}/job/{JENKINS_JOB_NAME}/{build_number}/api/json')
    build = res.json()
    artifact_list = build['artifacts']
    if artifact_list:
        for artifact in artifact_list:
            # Get builds, where there is a 'results.csv' artifact
            if artifact['fileName'] == 'results.csv':
                # get results
                res = session.get(f'{JENKINS_URL}/job/{JENKINS_JOB_NAME}/{build_number}/artifact/{artifact["relativePath"]}')
                meas_results_df = pd.read_csv(StringIO(res.text), delimiter=',', parse_dates=['measurement_seq_start_time', 'measurement_start_time', 'measurement_end_time'], date_parser=custom_date_parser)
                results = results.append(meas_results_df, ignore_index=True)
                
                # meas_results_df['id'] is a Series
                meas_seq_id = meas_results_df['id'][0]
            
                # extract parameter info
                parameterItems = list(filter(lambda a: a and a['_class'] == 'hudson.model.ParametersAction' ,build['actions']))[0]['parameters']
                parameters = list(filter(lambda p: p['_class'] in ['hudson.model.BooleanParameterValue', 'hudson.model.StringParameterValue'], parameterItems))
                reshaped = dict()
                for parameter in parameters:
                    reshaped[parameter['name']] = parameter['value']
                # add build number
                reshaped['build_number'] = build_number
                reshaped['id'] = meas_seq_id
                build_parameters = build_parameters.append(reshaped, ignore_index=True)
                

In [205]:
# Clean build_parameters
build_parameters = build_parameters.fillna(False)
build_parameters['CleanEnhancements'] = build_parameters['CleanEnhancements'].astype(bool)
build_parameters['DeleteCFStack'] = build_parameters['DeleteCFStack'].astype(bool)
build_parameters['CleanWorkSpace'] = build_parameters['CleanWorkSpace'].astype(bool)
build_parameters['DeployTools'] = build_parameters['DeployTools'].astype(bool)
build_parameters['UseKafka'] = build_parameters['UseKafka'].astype(bool)
build_parameters['UseHeartbeats'] = build_parameters['UseHeartbeats'].astype(bool)
build_parameters['build_number'] = build_parameters['build_number'].astype(int)

build_parameters.insert(0, 'build_number', build_parameters.pop('build_number'))
build_parameters.insert(0, 'id', build_parameters.pop('id'))


build_parameters.head()
# build_parameters.info()

Unnamed: 0,id,build_number,CleanEnhancements,DeleteCFStack,CleanWorkSpace,CFStackName,EKSClusterName,AWSRegion,S3Bucket,DeployTools,...,RunMeasurement,MEASFaultProfile,MEASMeasurementCount,MEASLoadDuration,MEASClusterType,MEASLocustUserCount,MEASLocustSpawnRate,MEASComment,UseHPA,UseKEDA
0,668c3aad-e0e0-4f8c-a76d-7c40839b74aa,318,False,False,True,morova-eks-cluster,morova-eks-cluster,us-east-2,morova-bucket,False,...,1.0,pod-failure,10,300,eks,1,1,base,False,False
1,4eadf7a3-8438-412f-9902-780a23466668,315,False,False,True,morova-eks-cluster,morova-eks-cluster,us-east-2,morova-bucket,True,...,True,pod-kill,10,300,eks,1,1,base,False,False
2,9152f84b-7245-4347-972f-2cb96892e8c3,314,False,False,True,morova-eks-cluster,morova-eks-cluster,us-east-2,morova-bucket,False,...,True,none,10,300,eks,1,1,base,False,False
3,520f89e5-da6d-41c5-811a-7be604cd1bd5,313,False,False,True,morova-eks-cluster,morova-eks-cluster,us-east-2,morova-bucket,False,...,True,none,5,180,eks,1,1,base,False,False
4,953e2115-b3c3-4f9f-a0b6-1068e79d86f1,312,False,False,True,morova-eks-cluster,morova-eks-cluster,us-east-2,morova-bucket,False,...,True,none,10,180,eks,1,1,base,False,False


In [206]:
# Select relevant columns
build_parameters = build_parameters.loc[:,['id', 'build_number', 'UseKafka', 'UseHeartbeats', 'UseHPA', 'UseKEDA']].reset_index(drop=True)
build_parameters.head()

Unnamed: 0,id,build_number,UseKafka,UseHeartbeats,UseHPA,UseKEDA
0,668c3aad-e0e0-4f8c-a76d-7c40839b74aa,318,False,False,False,False
1,4eadf7a3-8438-412f-9902-780a23466668,315,False,False,False,False
2,9152f84b-7245-4347-972f-2cb96892e8c3,314,False,False,False,False
3,520f89e5-da6d-41c5-811a-7be604cd1bd5,313,False,False,False,False
4,953e2115-b3c3-4f9f-a0b6-1068e79d86f1,312,False,False,False,False


In [207]:
# for development, get only base measurements
results = results[results['comment'] == 'base']
results.head()

Unnamed: 0,id,measurement_seq_start_time,availability,mut,mdt,mtbf,measurement_start_time,measurement_end_time,submitted_jobs,finished_jobs,fault_profile,cluster_type,measurement_count,load_duration,locust_user_count,locust_spawn_rate,prev_stack_git_commit_short,comment
0,668c3aad-e0e0-4f8c-a76d-7c40839b74aa,2022-03-07 19:13:19.466927,0.55,0.275,0.225,0.5,2022-03-07 19:13:34.050629,2022-03-07 19:18:50.968751,39.0,37.0,pod-failure,eks,10,300,1,1,66f3ded,base
1,668c3aad-e0e0-4f8c-a76d-7c40839b74aa,2022-03-07 19:13:19.466927,0.75,0.375,0.25,0.625,2022-03-07 19:19:13.624507,2022-03-07 19:24:29.407058,46.0,46.0,pod-failure,eks,10,300,1,1,66f3ded,base
2,668c3aad-e0e0-4f8c-a76d-7c40839b74aa,2022-03-07 19:13:19.466927,0.65,0.216667,0.175,0.391667,2022-03-07 19:26:01.180437,2022-03-07 19:31:10.207749,50.0,36.0,pod-failure,eks,10,300,1,1,66f3ded,base
3,668c3aad-e0e0-4f8c-a76d-7c40839b74aa,2022-03-07 19:13:19.466927,0.6,0.2,0.2,0.4,2022-03-07 19:32:36.120793,2022-03-07 19:37:45.074479,36.0,36.0,pod-failure,eks,10,300,1,1,66f3ded,base
4,668c3aad-e0e0-4f8c-a76d-7c40839b74aa,2022-03-07 19:13:19.466927,0.8,0.4,0.2,0.6,2022-03-07 19:39:13.134373,2022-03-07 19:44:21.920235,46.0,46.0,pod-failure,eks,10,300,1,1,66f3ded,base


In [208]:
# TODO clean data
results.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 55 entries, 0 to 54
Data columns (total 18 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   id                           55 non-null     object        
 1   measurement_seq_start_time   55 non-null     datetime64[ns]
 2   availability                 55 non-null     float64       
 3   mut                          55 non-null     float64       
 4   mdt                          51 non-null     float64       
 5   mtbf                         44 non-null     float64       
 6   measurement_start_time       55 non-null     datetime64[ns]
 7   measurement_end_time         55 non-null     datetime64[ns]
 8   submitted_jobs               52 non-null     float64       
 9   finished_jobs                52 non-null     float64       
 10  fault_profile                55 non-null     object        
 11  cluster_type                 55 non-null     ob

In [209]:
#  group by measurement ID and get mean of numerical properties
results_avg_by_id = results.groupby(by=['id']).mean()
results_avg_by_id.head()

Unnamed: 0_level_0,availability,mut,mdt,mtbf,submitted_jobs,finished_jobs,measurement_count,load_duration,locust_user_count,locust_spawn_rate
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
4eadf7a3-8438-412f-9902-780a23466668,0.924444,0.503333,0.055,0.558333,83.857143,78.714286,10.0,300.0,1.0,1.0
520f89e5-da6d-41c5-811a-7be604cd1bd5,0.95,0.95,0.05,1.0,36.0,35.8,5.0,180.0,1.0,1.0
668c3aad-e0e0-4f8c-a76d-7c40839b74aa,0.61,0.284167,0.3,0.584167,39.0,36.4,10.0,300.0,1.0,1.0
9152f84b-7245-4347-972f-2cb96892e8c3,1.0,1.0,0.0,1.0,60.0,60.0,10.0,300.0,1.0,1.0
953e2115-b3c3-4f9f-a0b6-1068e79d86f1,0.975,175.5,4.5,180.0,36.0,36.0,10.0,180.0,1.0,1.0


In [210]:
# get non-numerical data for each measurement sequence
results_non_numerical = results.loc[:, ['id', 'fault_profile', 'measurement_seq_start_time', 'cluster_type', 'comment']] \
    .drop_duplicates() \
    .reset_index(drop=True)
results_non_numerical.head()

Unnamed: 0,id,fault_profile,measurement_seq_start_time,cluster_type,comment
0,668c3aad-e0e0-4f8c-a76d-7c40839b74aa,pod-failure,2022-03-07 19:13:19.466927,eks,base
1,4eadf7a3-8438-412f-9902-780a23466668,pod-kill,2022-03-06 13:42:10.325503,eks,base
2,9152f84b-7245-4347-972f-2cb96892e8c3,none,2022-03-06 11:26:28.082508,eks,base
3,520f89e5-da6d-41c5-811a-7be604cd1bd5,none,2022-03-06 10:53:31.260856,eks,base
4,953e2115-b3c3-4f9f-a0b6-1068e79d86f1,none,2022-03-05 20:32:20.446613,eks,base


In [211]:
# merge the grouped by avg results with non-numerical data
results = pd.merge(results_avg_by_id, results_non_numerical, on='id')

# merge the results with build parameters
results = pd.merge(results, build_parameters, on='id')

# filter out measurement sequence with less than MIN_MEASUREMENT_COUNT measurements
results = results[results['measurement_count'] >= MIN_MEASUREMENT_COUNT]

results.head()

Unnamed: 0,id,availability,mut,mdt,mtbf,submitted_jobs,finished_jobs,measurement_count,load_duration,locust_user_count,locust_spawn_rate,fault_profile,measurement_seq_start_time,cluster_type,comment,build_number,UseKafka,UseHeartbeats,UseHPA,UseKEDA
0,4eadf7a3-8438-412f-9902-780a23466668,0.924444,0.503333,0.055,0.558333,83.857143,78.714286,10.0,300.0,1.0,1.0,pod-kill,2022-03-06 13:42:10.325503,eks,base,315,False,False,False,False
1,520f89e5-da6d-41c5-811a-7be604cd1bd5,0.95,0.95,0.05,1.0,36.0,35.8,5.0,180.0,1.0,1.0,none,2022-03-06 10:53:31.260856,eks,base,313,False,False,False,False
2,668c3aad-e0e0-4f8c-a76d-7c40839b74aa,0.61,0.284167,0.3,0.584167,39.0,36.4,10.0,300.0,1.0,1.0,pod-failure,2022-03-07 19:13:19.466927,eks,base,318,False,False,False,False
3,9152f84b-7245-4347-972f-2cb96892e8c3,1.0,1.0,0.0,1.0,60.0,60.0,10.0,300.0,1.0,1.0,none,2022-03-06 11:26:28.082508,eks,base,314,False,False,False,False
4,953e2115-b3c3-4f9f-a0b6-1068e79d86f1,0.975,175.5,4.5,180.0,36.0,36.0,10.0,180.0,1.0,1.0,none,2022-03-05 20:32:20.446613,eks,base,312,False,False,False,False


In [212]:
# drop rows where there are NaN data
results = results.dropna()

# drop rows where mtbf metrics have > 1 value
results = results[results['mtbf'] <= 1]

# drop rows where comment is 'test'
resutls = results[results['comment'] != 'test']

# group by fault profile and only keep the newest measurement
time_column = 'measurement_seq_start_time'
latest_idx = results.groupby(['fault_profile'])[time_column].transform('max') == results[time_column]

results = results[latest_idx]
results

Unnamed: 0,id,availability,mut,mdt,mtbf,submitted_jobs,finished_jobs,measurement_count,load_duration,locust_user_count,locust_spawn_rate,fault_profile,measurement_seq_start_time,cluster_type,comment,build_number,UseKafka,UseHeartbeats,UseHPA,UseKEDA
0,4eadf7a3-8438-412f-9902-780a23466668,0.924444,0.503333,0.055,0.558333,83.857143,78.714286,10.0,300.0,1.0,1.0,pod-kill,2022-03-06 13:42:10.325503,eks,base,315,False,False,False,False
2,668c3aad-e0e0-4f8c-a76d-7c40839b74aa,0.61,0.284167,0.3,0.584167,39.0,36.4,10.0,300.0,1.0,1.0,pod-failure,2022-03-07 19:13:19.466927,eks,base,318,False,False,False,False
3,9152f84b-7245-4347-972f-2cb96892e8c3,1.0,1.0,0.0,1.0,60.0,60.0,10.0,300.0,1.0,1.0,none,2022-03-06 11:26:28.082508,eks,base,314,False,False,False,False


In [213]:
metrics = results[['fault_profile', 'availability', 'mut', 'mdt', 'mtbf']]
metrics.head()

Unnamed: 0,fault_profile,availability,mut,mdt,mtbf
0,pod-kill,0.924444,0.503333,0.055,0.558333
2,pod-failure,0.61,0.284167,0.3,0.584167
3,none,1.0,1.0,0.0,1.0


In [214]:
# TEST
cols = ['fault_profile', 'availability', 'mut', 'mdt', 'mtbf']
metrics = pd.concat([metrics, pd.DataFrame([['test1', 0.8, 0.6, 0.2, 0.8], ['test2', 0.4, 0.3, 0.5, 0.8]], columns=cols)], ignore_index=True)
metrics.head()

Unnamed: 0,fault_profile,availability,mut,mdt,mtbf
0,pod-kill,0.924444,0.503333,0.055,0.558333
1,pod-failure,0.61,0.284167,0.3,0.584167
2,none,1.0,1.0,0.0,1.0
3,test1,0.8,0.6,0.2,0.8
4,test2,0.4,0.3,0.5,0.8


In [215]:
# create long data from wide
metrics_long = metrics.melt(id_vars=['fault_profile'], var_name='metric')
metrics_long.head()

Unnamed: 0,fault_profile,metric,value
0,pod-kill,availability,0.924444
1,pod-failure,availability,0.61
2,none,availability,1.0
3,test1,availability,0.8
4,test2,availability,0.4


In [216]:
# ALL IN ONE Radar
fig = px.line_polar(metrics_long, r="value", theta="metric", color="fault_profile", line_close=True, color_discrete_sequence=px.colors.qualitative.Dark24)
# fig.update_traces(fill='toself')
fig.show()

In [217]:
metrics_long[metrics_long['fault_profile'] == 'pod-kill']

Unnamed: 0,fault_profile,metric,value
0,pod-kill,availability,0.924444
5,pod-kill,mut,0.503333
10,pod-kill,mdt,0.055
15,pod-kill,mtbf,0.558333


In [218]:
# 1 Radar PLOT / PROFILE (no subplots)

fault_profiles = metrics['fault_profile'].tolist()

for profile in fault_profiles:
    data = metrics_long[metrics_long['fault_profile'] == profile]
    
    fig = px.line_polar(
        data,
        r="value",
        theta='metric',
#         color="fault_profile",
        line_close=True,
        title=f'Fault Profile: {profile}'
    )
    fig.update_traces(fill='toself')
    fig.show()

In [219]:
# GROUPED BARCHART
import plotly.graph_objects as go

fault_profiles = metrics['fault_profile'].tolist()
bars = []
for profile in fault_profiles:
    data = metrics_long[metrics_long['fault_profile'] == profile].sort_values(by='metric')
#     print(data)
    bars.append(go.Bar(name=profile, x=data['metric'], y=data['value']))

fig = go.Figure(data=bars)
fig.update_layout(barmode='group')
fig.show()


In [220]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

len(metrics)
# metrics.iloc[1, :].index.tolist()
metrics.iloc[1, :].values.tolist()


['pod-failure', 0.61, 0.2841666666666667, 0.3, 0.5841666666666667]

In [221]:
# Radar SUBPLOTS

# import math
# metrics_cols = ['availability', 'mut', 'mdt', 'mtbf']
# fig = make_subplots(rows=2, cols=2, specs=[[{'type': 'polar'}]*2]*2)
# plot_count = len(metrics)
# for i in range(len(metrics)):
#     row = math.floor(i / 2) + 1
#     col = i % 2 + 1
# #     print(f'row: {row}, col: {col}')
    
#     # fault profile data
#     fp_data = metrics.iloc[i, :]
#     name = fp_data['fault_profile']
#     data = metrics.loc[:, metrics_cols].iloc[i, :]
    
#     fig.add_trace(go.Scatterpolar(
#         name = name,
#         r = data.values.tolist(),
#         theta = data.index.tolist(),
        
#     ), row, col)
    
# fig.show()

Exception: The (row, col) pair sent is out of range. Use Figure.print_grid to view the subplot grid. 