In [1]:
!pip install plotly==5.6.0
!pip install -U kaleido

Collecting plotly==5.6.0
  Downloading plotly-5.6.0-py2.py3-none-any.whl (27.7 MB)
[K     |████████████████████████████████| 27.7 MB 20.2 MB/s eta 0:00:01
Collecting tenacity>=6.2.0
  Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, plotly
Successfully installed plotly-5.6.0 tenacity-8.0.1
Collecting kaleido
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl (79.9 MB)
[K     |████████████████████████████████| 79.9 MB 16.7 MB/s eta 0:00:01
[?25hInstalling collected packages: kaleido
Successfully installed kaleido-0.2.1


In [2]:
import plotly.express as px
import pandas as pd

In [55]:
results = pd.read_csv('data/results_cleaned.csv')

In [57]:
metrics = results[['fault_profile', 'availability', 'mut', 'mdt', 'mtbf', 'finished_jobs', 'submitted_jobs', 'enhancement']]
metrics['finished_jobs_ratio'] = (
    metrics['finished_jobs'] / metrics['submitted_jobs'])
metrics = metrics.drop(columns=['finished_jobs', 'submitted_jobs'])
metrics.head()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,fault_profile,availability,mut,mdt,mtbf,enhancement,finished_jobs_ratio
0,pod-failure,0.524912,0.283333,0.284444,0.567778,Kafka,0.709091
1,network-delay,0.736316,0.395556,0.151111,0.546667,,0.98913
2,pod-kill,0.97,0.685,0.03,0.715,Heartbeats,0.982684
3,none,1.0,1.0,0.0,1.0,,1.0
4,stress-cpu,1.0,1.0,0.0,1.0,,1.0


In [58]:
# rename to relative metrics
metrics = metrics.rename(columns={'mut': 'rel_MUT', 'mdt': 'rel_MDT', 'mtbf': 'rel_MTBF'})
metrics

Unnamed: 0,fault_profile,availability,rel_MUT,rel_MDT,rel_MTBF,enhancement,finished_jobs_ratio
0,pod-failure,0.524912,0.283333,0.284444,0.567778,Kafka,0.709091
1,network-delay,0.736316,0.395556,0.151111,0.546667,,0.98913
2,pod-kill,0.97,0.685,0.03,0.715,Heartbeats,0.982684
3,none,1.0,1.0,0.0,1.0,,1.0
4,stress-cpu,1.0,1.0,0.0,1.0,,1.0
5,pod-failure,0.598947,0.335,0.279444,0.614444,Heartbeats,0.911232
6,io,0.544912,0.208889,0.201111,0.41,,1.0
7,pod-failure,0.634912,0.303889,0.210556,0.514444,,0.79316
8,network-partition,1.0,1.0,0.0,1.0,,0.983333
9,stress-mem,0.97,0.97,0.03,1.0,,0.996644


In [59]:
# Baseline metrics
baseline_metrics = metrics[metrics['enhancement'] == 'None']
baseline_metrics

Unnamed: 0,fault_profile,availability,rel_MUT,rel_MDT,rel_MTBF,enhancement,finished_jobs_ratio
1,network-delay,0.736316,0.395556,0.151111,0.546667,,0.98913
3,none,1.0,1.0,0.0,1.0,,1.0
4,stress-cpu,1.0,1.0,0.0,1.0,,1.0
6,io,0.544912,0.208889,0.201111,0.41,,1.0
7,pod-failure,0.634912,0.303889,0.210556,0.514444,,0.79316
8,network-partition,1.0,1.0,0.0,1.0,,0.983333
9,stress-mem,0.97,0.97,0.03,1.0,,0.996644
10,pod-kill,0.88,0.353333,0.065,0.418333,,0.834559


In [60]:
# create long data from wide
metrics_long = metrics.melt(id_vars=['fault_profile', 'enhancement'], var_name='metric')
metrics_long.head()
metrics_long

Unnamed: 0,fault_profile,enhancement,metric,value
0,pod-failure,Kafka,availability,0.524912
1,network-delay,,availability,0.736316
2,pod-kill,Heartbeats,availability,0.97
3,none,,availability,1.0
4,stress-cpu,,availability,1.0
5,pod-failure,Heartbeats,availability,0.598947
6,io,,availability,0.544912
7,pod-failure,,availability,0.634912
8,network-partition,,availability,1.0
9,stress-mem,,availability,0.97


In [61]:
# Baseline metrics long
baseline_metrics_long = metrics_long[metrics_long['enhancement'] == 'None']
baseline_metrics_long

Unnamed: 0,fault_profile,enhancement,metric,value
1,network-delay,,availability,0.736316
3,none,,availability,1.0
4,stress-cpu,,availability,1.0
6,io,,availability,0.544912
7,pod-failure,,availability,0.634912
8,network-partition,,availability,1.0
9,stress-mem,,availability,0.97
10,pod-kill,,availability,0.88
12,network-delay,,rel_MUT,0.395556
14,none,,rel_MUT,1.0


In [62]:
# ALL BASELINE RESULTS IN ONE Radar
fig = px.line_polar(baseline_metrics_long, r="value", theta="metric", color="fault_profile", line_close=True, color_discrete_sequence=px.colors.qualitative.Dark24)
# fig.update_traces(fill='toself')
fig.show()

In [63]:
# BASELINE METRICS - 1 Radar PLOT / PROFILE (no subplots)

fault_profiles = baseline_metrics['fault_profile'].tolist()

for profile in fault_profiles:
    data = baseline_metrics_long[baseline_metrics_long['fault_profile'] == profile]
    
#     print(data)
    
    fig = px.line_polar(
        data,
        r="value",
        theta='metric',
#         color="fault_profile",
        line_close=True,
        title=f'Fault Profile: {profile}'
    )
    fig.update_traces(fill='toself')
    fig.show()

In [64]:
# BASELINE METRICS - GROUPED BARCHART
import plotly.graph_objects as go

fault_profiles = baseline_metrics['fault_profile'].tolist()
bars = []
for profile in fault_profiles:
    data = baseline_metrics_long[baseline_metrics_long['fault_profile'] == profile].sort_values(by='metric')
#     print(data)
    bars.append(go.Bar(name=profile, x=data['metric'], y=data['value']))

fig = go.Figure(data=bars)
fig.update_layout(barmode='group')

fig.show()

fig.write_image('images/baseline_grouped_barchart.png', engine='kaleido',scale=2)

# Enhancements compared to baseline results

## Kafka

In [65]:
### only use_kafka and baseline metrics (on same fault profiles)
kafka_only_metrics_long = metrics_long[metrics_long['enhancement'] == 'Kafka']
kafka_only_metrics_long

Unnamed: 0,fault_profile,enhancement,metric,value
0,pod-failure,Kafka,availability,0.524912
11,pod-failure,Kafka,rel_MUT,0.283333
22,pod-failure,Kafka,rel_MDT,0.284444
33,pod-failure,Kafka,rel_MTBF,0.567778
44,pod-failure,Kafka,finished_jobs_ratio,0.709091


In [66]:
kafka_only_profiles = kafka_only_metrics_long['fault_profile'].unique().tolist()
kafka_only_profiles

['pod-failure']

In [67]:
kafka_or_baseline_filter = metrics_long['enhancement'].isin(['None', 'Kafka'])
kafka_with_base_metrics_long = metrics_long[ \
    (kafka_or_baseline_filter & (metrics_long['fault_profile'].isin(kafka_only_profiles)))]
kafka_with_base_metrics_long

Unnamed: 0,fault_profile,enhancement,metric,value
0,pod-failure,Kafka,availability,0.524912
7,pod-failure,,availability,0.634912
11,pod-failure,Kafka,rel_MUT,0.283333
18,pod-failure,,rel_MUT,0.303889
22,pod-failure,Kafka,rel_MDT,0.284444
29,pod-failure,,rel_MDT,0.210556
33,pod-failure,Kafka,rel_MTBF,0.567778
40,pod-failure,,rel_MTBF,0.514444
44,pod-failure,Kafka,finished_jobs_ratio,0.709091
51,pod-failure,,finished_jobs_ratio,0.79316


In [68]:
for profile in kafka_only_profiles:
#     data = baseline_metrics_long[baseline_metrics_long['fault_profile'] == profile]
    data = kafka_with_base_metrics_long[kafka_with_base_metrics_long['fault_profile'] == profile]
    
    fig = px.line_polar(
        data,
        r="value",
        theta='metric',
        color="enhancement",
        line_close=True,
        title=f'Fault Profile: {profile}'
    )
    fig.update_traces(fill='toself')
    fig.show()

## Heartbeats

In [69]:
heartbeats_only_metrics_long = metrics_long[metrics_long['enhancement'] == 'Heartbeats']
heartbeats_only_metrics_long

Unnamed: 0,fault_profile,enhancement,metric,value
2,pod-kill,Heartbeats,availability,0.97
5,pod-failure,Heartbeats,availability,0.598947
13,pod-kill,Heartbeats,rel_MUT,0.685
16,pod-failure,Heartbeats,rel_MUT,0.335
24,pod-kill,Heartbeats,rel_MDT,0.03
27,pod-failure,Heartbeats,rel_MDT,0.279444
35,pod-kill,Heartbeats,rel_MTBF,0.715
38,pod-failure,Heartbeats,rel_MTBF,0.614444
46,pod-kill,Heartbeats,finished_jobs_ratio,0.982684
49,pod-failure,Heartbeats,finished_jobs_ratio,0.911232


In [70]:
heartbeats_only_profiles = heartbeats_only_metrics_long['fault_profile'].unique().tolist()
heartbeats_only_profiles

['pod-kill', 'pod-failure']

In [71]:
heartbeats_or_baseline_filter = metrics_long['enhancement'].isin(['None', 'Heartbeats'])
heartbeats_with_base_metrics_long = metrics_long[ \
    (heartbeats_or_baseline_filter & (metrics_long['fault_profile'].isin(heartbeats_only_profiles)))]
heartbeats_with_base_metrics_long

Unnamed: 0,fault_profile,enhancement,metric,value
2,pod-kill,Heartbeats,availability,0.97
5,pod-failure,Heartbeats,availability,0.598947
7,pod-failure,,availability,0.634912
10,pod-kill,,availability,0.88
13,pod-kill,Heartbeats,rel_MUT,0.685
16,pod-failure,Heartbeats,rel_MUT,0.335
18,pod-failure,,rel_MUT,0.303889
21,pod-kill,,rel_MUT,0.353333
24,pod-kill,Heartbeats,rel_MDT,0.03
27,pod-failure,Heartbeats,rel_MDT,0.279444


In [72]:
for profile in heartbeats_only_profiles:
#     data = baseline_metrics_long[baseline_metrics_long['fault_profile'] == profile]
    data = heartbeats_with_base_metrics_long[heartbeats_with_base_metrics_long['fault_profile'] == profile]
    
    fig = px.line_polar(
        data,
        r="value",
        theta='metric',
        color="enhancement",
        line_close=True,
        title=f'Fault Profile: {profile}'
    )
    fig.update_traces(fill='toself')
    fig.show()

## Kafka and Heartbeats

In [73]:
kafka_and_hb_metrics_long = metrics_long[metrics_long['enhancement'] == 'Kafka_And_Heartbeats']
kafka_and_hb_metrics_long

Unnamed: 0,fault_profile,enhancement,metric,value


In [74]:
kafka_and_hb_profiles = kafka_and_hb_metrics_long['fault_profile'].unique().tolist()
kafka_and_hb_profiles

[]

In [47]:
kafka_and_hb_or_baseline_filter = ~metrics_long['use_kafka'] & ~metrics_long['use_heartbeats']
kafka_and_hb_filter = metrics_long['use_kafka'] & metrics_long['use_heartbeats']
kafka_and_hb_with_base_metrics_long = metrics_long[ \
    (kafka_and_hb_filter) | (baseline_filter & (metrics_long['fault_profile'].isin(kafka_and_hb_profiles)))]
kafka_and_hb_with_base_metrics_long

Unnamed: 0,fault_profile,use_kafka,use_heartbeats,metric,value
