# Architectural Tradeoff Analysis (Feature Model)

In [2]:
import pandas as pd
import numpy as np
import random as rd
import matplotlib.pyplot as plt
from paretoset import paretoset
import plotly.express as px
from adjustText import adjust_text
from pcatools import *
from tabulate import tabulate
import os
from sklearn.decomposition import PCA
from sklearn import preprocessing
import math
import plotly.graph_objects as go
import plotly.subplots as sp
from matplotlib.text import Text
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
from sklearn.cluster import KMeans
from sklearn.tree import export_graphviz
from io import StringIO
from IPython.display import Image  
import pydotplus

path = "C:/Users/KOQVIST1/architectural-tradeoff-analysis/Simulation data/"
filedir = "April21/combined.csv"
df = pd.DataFrame(pd.read_csv((path+filedir)))

full_length = len(df)

# Clean data by dropping inf values (and NaN)
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.dropna(inplace=True)
df = df.drop_duplicates()

cleaned_length = len(df)

number_of_topologies = len([x for x in df.columns if x[0].isdigit()])

topology_names       =    ["Simple_1","Simple_2","Simple_3","Simple_4","Stream_1","Stream_2","Sophisticated_1"]
topologies           =    [1.1, 1.2, 1.3, 1.4, 2.1, 2.2, 3.0]
latency_uplims       =    [None,None,None,None,None,None,None]
latency_lowlims      =    [None,None,None,None,None,None,None]
cost_uplims          =    [50,50,50,50,50,50,50]
cost_lowlims         =    [None,None,None,None,None,None,None]
complexity_uplims    =    [None,None,None,None,None,None,None]
complexity_lowlims   =    [None,None,None,None,None,None,None]
loadsensitivity_uplims   =    [None,None,None,None,None,None,None]
loadsensitivity_lowlims  =    [None,None,None,None,None,None,None]

df = filterData(df, topologies,latency_uplims,latency_lowlims,cost_uplims,cost_lowlims,
                        complexity_uplims,complexity_lowlims,loadsensitivity_uplims,loadsensitivity_lowlims)

Filtered out 1012 rows of data


In [55]:
df_fm = pd.DataFrame({
    'MQTT': [1,1,0,0,1,1,1],
    'HTTP': [0,0,1,1,0,0,0],
    'JobBatching': [0,0,0,0,1,1,0],
    'ContinerManagement': [0,0,0,0,0,0,1],
    'LoadOptimization': [0,1,0,1,1,1,0],
    'NoIntermediary': [1,0,1,0,0,0,0],
    'Containers': [0,0,0,0,0,0,1],
    'Serverless': [0,1,0,1,0,1,0],
    'AutoScalingContainers': [1,0,1,0,1,0,0]
})

qms = ['Latency', 'Cost', 'Complexity', 'Load Sensitivity']

df_feature = pd.DataFrame(columns=['Topology','Caching', 'NoCaching','MQTT',
    'HTTP','JobBatching','ContinerManagement','LoadOptimization','NoIntermediary','Containers','Serverless','AutoScalingContainers','Latency', 'Cost', 'Complexity', 'Load Sensitivity'])

# Iterate over rows in df
for index, row in df.iterrows():
    # Get topology index
    topology_index = topologies.index(row['Topology'])
    
    # Get corresponding row from df_fm
    fm_row = df_fm.iloc[topology_index]
    
    # Determine Caching and NoCaching values
    if row['car_cache'] == 1:
        caching = 0
        no_caching = 1
    elif row['car_cache'] > 1:
        caching = 1
        no_caching = 0
    else:
        caching = 0
        no_caching = 0
    
    # Add row to df_feature
    a = [row['Topology']]
    b = [caching, no_caching]
    c = fm_row.array.tolist()
    d = [row['Latency'], row['Cost'], row['Complexity'], row['Load Sensitivity']]
    new_row = a + b + c + d
    df_feature.loc[len(df_feature)] = new_row


In [56]:
pareto_targets = ['Latency', 'Cost','Complexity','Load Sensitivity']
pareto_objectives = ["min", "min","min","min"]
df_5 = paretoOptimize(df_feature,pareto_targets, pareto_objectives,0.05)
df_15 = paretoOptimize(df_feature,pareto_targets, pareto_objectives,0.15)
df_50 = paretoOptimize(df_feature,pareto_targets, pareto_objectives,0.5)
df_75 = paretoOptimize(df_feature,pareto_targets, pareto_objectives,0.75)

df_t_5 = pd.DataFrame()
df_t_15 = pd.DataFrame()
df_t_50 = pd.DataFrame()
df_t_75 = pd.DataFrame()
for i, topo in enumerate(topologies) :
    topo_filter = df_feature.Topology == topo
    df_temp_5 = paretoOptimize(df_feature[topo_filter],pareto_targets, pareto_objectives,0.05)
    df_temp_15 = paretoOptimize(df_feature[topo_filter],pareto_targets, pareto_objectives,0.15)
    df_temp_50 = paretoOptimize(df_feature[topo_filter],pareto_targets, pareto_objectives,0.5)
    df_temp_75 = paretoOptimize(df_feature[topo_filter],pareto_targets, pareto_objectives,0.75)
    df_t_5 = pd.concat([df_t_5,df_temp_5])
    df_t_15 = pd.concat([df_t_15,df_temp_15])
    df_t_50 = pd.concat([df_t_50,df_temp_50])
    df_t_75 = pd.concat([df_t_75,df_temp_75])

In [68]:
df_feature.columns

Index(['Topology', 'Caching', 'NoCaching', 'MQTT', 'HTTP', 'JobBatching',
       'ContinerManagement', 'LoadOptimization', 'NoIntermediary',
       'Containers', 'Serverless', 'AutoScalingContainers', 'Latency', 'Cost',
       'Complexity', 'Load Sensitivity'],
      dtype='object')

In [72]:
df_temp = df_t_75

corr = df_temp.corr()
#corr = scaleData(df_temp).corr()
#corr = corr.fillna(0)
print(corr)

                       Topology   Caching  NoCaching      MQTT      HTTP   
Topology               1.000000 -0.229621   0.229621  0.393210 -0.393210  \
Caching               -0.229621  1.000000  -1.000000 -0.174847  0.174847   
NoCaching              0.229621 -1.000000   1.000000  0.174847 -0.174847   
MQTT                   0.393210 -0.174847   0.174847  1.000000 -1.000000   
HTTP                  -0.393210  0.174847  -0.174847 -1.000000  1.000000   
JobBatching            0.900345 -0.251870   0.251870  0.539614 -0.539614   
ContinerManagement     0.330894 -0.013463   0.013463  0.083176 -0.083176   
LoadOptimization       0.237381 -0.092630   0.092630  0.368284 -0.368284   
NoIntermediary        -0.327031  0.097991  -0.097991 -0.397191  0.397191   
Containers             0.330894 -0.013463   0.013463  0.083176 -0.083176   
Serverless            -0.055391  0.024693  -0.024693  0.143307 -0.143307   
AutoScalingContainers -0.016924 -0.021992   0.021992 -0.163168  0.163168   
Latency     